vp9,read_inter_block_mode_info: quiet msan warning

best_sub8x8[1] won't be used meaningfully when is_compound is false, but may trigger an msan warning as the value is copied around and later clamped. BUG=667044 Change-Id: Icc24c3b72cdb550bebea44d4aaa4ff8bf3fbab56 (cherry picked from commit cb22359d027bc44cf84fa53a3ffd81c098816cc8)
2017-01-06 14:40:13 -08:00
624 changed files with 61504 additions and 101204 deletions
--- a/.clang-format
+++ b/.clang-format
@ -1,12 +1,12 @@
 ---
 Language:        Cpp
 # BasedOnStyle:  Google
-# Generated with clang-format 5.0.0
+# Generated with clang-format 3.8.1
 AccessModifierOffset: -1
 AlignAfterOpenBracket: Align
 AlignConsecutiveAssignments: false
 AlignConsecutiveDeclarations: false
-AlignEscapedNewlines: Left
+AlignEscapedNewlinesLeft: true
 AlignOperands:   true
 AlignTrailingComments: true
 AllowAllParametersOfDeclarationOnNextLine: true
@ -33,20 +33,12 @@ BraceWrapping:
  BeforeCatch:     false
  BeforeElse:      false
  IndentBraces:    false
  SplitEmptyFunction: true
  SplitEmptyRecord: true
  SplitEmptyNamespace: true
 BreakBeforeBinaryOperators: None
 BreakBeforeBraces: Attach
 BreakBeforeInheritanceComma: false
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializersBeforeComma: false
 BreakConstructorInitializers: BeforeColon
 BreakAfterJavaFieldAnnotations: false
 BreakStringLiterals: true
 ColumnLimit:     80
 CommentPragmas:  '^ IWYU pragma:'
 CompactNamespaces: false
 ConstructorInitializerAllOnOneLineOrOnePerLine: false
 ConstructorInitializerIndentWidth: 4
 ContinuationIndentWidth: 4
@ -54,11 +46,7 @@ Cpp11BracedListStyle: false
 DerivePointerAlignment: false
 DisableFormat:   false
 ExperimentalAutoDetectBinPacking: false
-FixNamespaceComments: true
+ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
 ForEachMacros:
  - foreach
  - Q_FOREACH
  - BOOST_FOREACH
 IncludeCategories:
  - Regex:           '^<.*\.h>'
    Priority:        1
@ -66,12 +54,9 @@ IncludeCategories:
    Priority:        2
  - Regex:           '.*'
    Priority:        3
 IncludeIsMainRegex: '([-_](test|unittest))?$'
 IndentCaseLabels: true
 IndentWidth:     2
 IndentWrappedFunctionNames: false
 JavaScriptQuotes: Leave
 JavaScriptWrapImports: true
 KeepEmptyLinesAtTheStartOfBlocks: false
 MacroBlockBegin: ''
 MacroBlockEnd:   ''
@ -80,7 +65,6 @@ NamespaceIndentation: None
 ObjCBlockIndentWidth: 2
 ObjCSpaceAfterProperty: false
 ObjCSpaceBeforeProtocolList: false
 PenaltyBreakAssignment: 2
 PenaltyBreakBeforeFirstCallParameter: 1
 PenaltyBreakComment: 300
 PenaltyBreakFirstLessLess: 120
@ -90,9 +74,7 @@ PenaltyReturnTypeOnItsOwnLine: 200
 PointerAlignment: Right
 ReflowComments:  true
 SortIncludes:    false
 SortUsingDeclarations: true
 SpaceAfterCStyleCast: false
 SpaceAfterTemplateKeyword: true
 SpaceBeforeAssignmentOperators: true
 SpaceBeforeParens: ControlStatements
 SpaceInEmptyParentheses: false
--- a/.mailmap
+++ b/.mailmap
@ -3,7 +3,6 @@ Aℓex Converse <aconverse@google.com>
 Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com>
 Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
 Alpha Lam <hclam@google.com> <hclam@chromium.org>
 Chris Cunningham <chcunningham@chromium.org>
 Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
 Deb Mukherjee <debargha@google.com>
 Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
@ -22,21 +21,17 @@ Marco Paniconi <marpan@google.com>
 Marco Paniconi <marpan@google.com> <marpan@chromium.org>
 Pascal Massimino <pascal.massimino@gmail.com>
 Paul Wilkins <paulwilkins@google.com>
 Peter Boström <pbos@chromium.org> <pbos@google.com>
 Peter de Rivaz <peter.derivaz@gmail.com>
 Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com>
 Ralph Giles <giles@xiph.org> <giles@entropywave.com>
 Ralph Giles <giles@xiph.org> <giles@mozilla.com>
 Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
 Sami Pietilä <samipietila@google.com>
 Shiyou Yin <yinshiyou-hf@loongson.cn>
 Tamar Levy <tamar.levy@intel.com>
 Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
 Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
 Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com>
 Tom Finegan <tomfinegan@google.com>
 Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
 Urvang Joshi <urvang@google.com> <urvang@chromium.org>
 Yaowu Xu <yaowu@google.com> <adam@xuyaowu.com>
 Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
 Yaowu Xu <yaowu@google.com> <Yaowu Xu>
--- a/29
+++ b/29
@ -3,13 +3,11 @@
 Aaron Watry <awatry@gmail.com>
 Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
 Adam Xu <adam@xuyaowu.com>
 Adrian Grange <agrange@google.com>
 Aℓex Converse <aconverse@google.com>
 Ahmad Sharif <asharif@google.com>
 Aleksey Vasenev <margtu-fivt@ya.ru>
 Alexander Potapenko <glider@google.com>
 Alexander Voronov <avoronov@graphics.cs.msu.ru>
 Alexandra Hájková <alexandra.khirnova@gmail.com>
 Alexis Ballier <aballier@gentoo.org>
 Alok Ahuja <waveletcoeff@gmail.com>
 Alpha Lam <hclam@google.com>
@ -17,7 +15,6 @@ A.Mahfoodh <ab.mahfoodh@gmail.com>
 Ami Fischman <fischman@chromium.org>
 Andoni Morales Alastruey <ylatuya@gmail.com>
 Andres Mejia <mcitadel@gmail.com>
 Andrew Lewis <andrewlewis@google.com>
 Andrew Russell <anrussell@google.com>
 Angie Chiang <angiebird@google.com>
 Aron Rosenberg <arosenberg@logitech.com>
@ -25,14 +22,11 @@ Attila Nagy <attilanagy@google.com>
 Brion Vibber <bvibber@wikimedia.org>
 changjun.yang <changjun.yang@intel.com>
 Charles 'Buck' Krasic <ckrasic@google.com>
 Cheng Chen <chengchen@google.com>
 chm <chm@rock-chips.com>
 Chris Cunningham <chcunningham@chromium.org>
 Christian Duvivier <cduvivier@google.com>
 Daniele Castagna <dcastagna@chromium.org>
 Daniel Kang <ddkang@google.com>
 Deb Mukherjee <debargha@google.com>
 Deepa K G <deepa.kg@ittiam.com>
 Dim Temp <dimtemp0@gmail.com>
 Dmitry Kovalev <dkovalev@google.com>
 Dragan Mrdjan <dmrdjan@mips.com>
@ -43,21 +37,17 @@ Fabio Pedretti <fabio.ped@libero.it>
 Frank Galligan <fgalligan@google.com>
 Fredrik Söderquist <fs@opera.com>
 Fritz Koenig <frkoenig@google.com>
 Gabriel Marin <gmx@chromium.org>
 Gaute Strokkenes <gaute.strokkenes@broadcom.com>
 Geza Lore <gezalore@gmail.com>
 Ghislain MARY <ghislainmary2@gmail.com>
 Giuseppe Scrivano <gscrivano@gnu.org>
 Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
 Gregor Jasny <gjasny@gmail.com>
 Guillaume Martres <gmartres@google.com>
 Guillermo Ballester Valor <gbvalor@gmail.com>
 Hangyu Kuang <hkuang@google.com>
 Hanno Böck <hanno@hboeck.de>
 Han Shen <shenhan@google.com>
 Henrik Lundin <hlundin@google.com>
 Hui Su <huisu@google.com>
 Ivan Krasin <krasin@chromium.org>
 Ivan Maltz <ivanmaltz@google.com>
 Jacek Caban <cjacek@gmail.com>
 Jacky Chen <jackychen@google.com>
@ -71,7 +61,6 @@ Jean-Yves Avenard <jyavenard@mozilla.com>
 Jeff Faust <jfaust@google.com>
 Jeff Muizelaar <jmuizelaar@mozilla.com>
 Jeff Petkau <jpet@chromium.org>
 Jerome Jiang <jianj@google.com>
 Jia Jia <jia.jia@linaro.org>
 Jian Zhou <zhoujian@google.com>
 Jim Bankoski <jimbankoski@google.com>
@ -88,7 +77,6 @@ Justin Clift <justin@salasaga.org>
 Justin Lebar <justin.lebar@gmail.com>
 Kaustubh Raste <kaustubh.raste@imgtec.com>
 KO Myung-Hun <komh@chollian.net>
 Kyle Siefring <kylesiefring@gmail.com>
 Lawrence Velázquez <larryv@macports.org>
 Linfeng Zhang <linfengz@google.com>
 Lou Quillio <louquillio@google.com>
@ -104,12 +92,8 @@ Michael Kohler <michaelkohler@live.com>
 Mike Frysinger <vapier@chromium.org>
 Mike Hommey <mhommey@mozilla.com>
 Mikhal Shemer <mikhal@google.com>
 Min Chen <chenm003@gmail.com>
 Minghai Shang <minghai@google.com>
 Min Ye <yeemmi@google.com>
 Moriyoshi Koizumi <mozo@mozo.jp>
 Morton Jonuschat <yabawock@gmail.com>
 Nathan E. Egge <negge@mozilla.com>
 Nico Weber <thakis@chromium.org>
 Parag Salasakar <img.mips1@gmail.com>
 Pascal Massimino <pascal.massimino@gmail.com>
@ -118,22 +102,16 @@ Paul Wilkins <paulwilkins@google.com>
 Pavol Rusnak <stick@gk2.sk>
 Paweł Hajdan <phajdan@google.com>
 Pengchong Jin <pengchong@google.com>
 Peter Boström <pbos@chromium.org>
 Peter Collingbourne <pcc@chromium.org>
 Peter de Rivaz <peter.derivaz@gmail.com>
 Philip Jägenstedt <philipj@opera.com>
 Priit Laes <plaes@plaes.org>
 Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
 Rafaël Carré <funman@videolan.org>
 Rafael de Lucena Valle <rafaeldelucena@gmail.com>
 Rahul Chaudhry <rahulchaudhry@google.com>
 Ralph Giles <giles@xiph.org>
 Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com>
 Rob Bradford <rob@linux.intel.com>
 Ronald S. Bultje <rsbultje@gmail.com>
 Rui Ueyama <ruiu@google.com>
 Sami Pietilä <samipietila@google.com>
 Sarah Parker <sarahparker@google.com>
 Sasi Inguva <isasi@google.com>
 Scott Graham <scottmg@chromium.org>
 Scott LaVarnway <slavarnway@google.com>
@ -141,11 +119,9 @@ Sean McGovern <gseanmcg@gmail.com>
 Sergey Kolomenkin <kolomenkin@gmail.com>
 Sergey Ulanov <sergeyu@chromium.org>
 Shimon Doodkin <helpmepro1@gmail.com>
 Shiyou Yin <yinshiyou-hf@loongson.cn>
 Shunyao Li <shunyaoli@google.com>
 Stefan Holmer <holmer@google.com>
 Suman Sunkara <sunkaras@google.com>
 Sylvestre Ledru <sylvestre@mozilla.com>
 Taekhyun Kim <takim@nvidia.com>
 Takanori MATSUURA <t.matsuu@gmail.com>
 Tamar Levy <tamar.levy@intel.com>
@ -155,10 +131,7 @@ Thijs Vermeir <thijsvermeir@gmail.com>
 Tim Kopp <tkopp@google.com>
 Timothy B. Terriberry <tterribe@xiph.org>
 Tom Finegan <tomfinegan@google.com>
 Tristan Matthews <le.businessman@gmail.com>
 Urvang Joshi <urvang@google.com>
 Vignesh Venkatasubramanian <vigneshv@google.com>
 Vlad Tsyrklevich <vtsyrklevich@chromium.org>
 Yaowu Xu <yaowu@google.com>
 Yi Luo <luoyi@google.com>
 Yongzhe Wang <yongzhe@google.com>
--- a/41
+++ b/41
@ -1,44 +1,3 @@
 2017-01-04 v1.7.0 "Mandarin Duck"
  This release focused on high bit depth performance (10/12 bit) and vp9
  encoding improvements.
  - Upgrading:
    This release is ABI incompatible due to new vp9 encoder features.
    Frame parallel decoding for vp9 has been removed.
  - Enhancements:
    vp9 encoding supports additional threads with --row-mt. This can be greater
    than the number of tiles.
    Two new vp9 encoder options have been added:
      --corpus-complexity
      --tune-content=film
    Additional tooling for respecting the vp9 "level" profiles has been added.
  - Bug fixes:
    A variety of fuzzing issues.
    vp8 threading fix for ARM.
    Codec control VP9_SET_SKIP_LOOP_FILTER fixed.
    Reject invalid multi resolution configurations.
 2017-01-09 v1.6.1 "Long Tailed Duck"
  This release improves upon the VP9 encoder and speeds up the encoding and
  decoding processes.
  - Upgrading:
    This release is ABI compatible with 1.6.0.
  - Enhancements:
    Faster VP9 encoding and decoding.
    High bit depth builds now provide similar speed for 8 bit encode and decode
    for x86 targets. Other platforms and higher bit depth improvements are in
    progress.
  - Bug Fixes:
    A variety of fuzzing issues.
 2016-07-20 v1.6.0 "Khaki Campbell Duck"
  This release improves upon the VP9 encoder and speeds up the encoding and
  decoding processes.
--- a/45
+++ b/45
@ -1,4 +1,4 @@
-README - 24 January 2018
+README - 20 July 2016
 Welcome to the WebM VP8/VP9 Codec SDK!
@ -9,26 +9,22 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  1. Prerequisites
-    * All x86 targets require the Yasm[1] assembler be installed[2].
+    * All x86 targets require the Yasm[1] assembler be installed.
-    * All Windows builds require that Cygwin[3] be installed.
+    * All Windows builds require that Cygwin[2] be installed.
-    * Building the documentation requires Doxygen[4]. If you do not
+    * Building the documentation requires Doxygen[3]. If you do not
      have this package, the install-docs option will be disabled.
-    * Downloading the data for the unit tests requires curl[5] and sha1sum.
+    * Downloading the data for the unit tests requires curl[4] and sha1sum.
      sha1sum is provided via the GNU coreutils, installed by default on
      many *nix platforms, as well as MinGW and Cygwin. If coreutils is not
      available, a compatible version of sha1sum can be built from
-      source[6]. These requirements are optional if not running the unit
+      source[5]. These requirements are optional if not running the unit
      tests.
    [1]: http://www.tortall.net/projects/yasm
-    [2]: For Visual Studio the base yasm binary (not vsyasm) should be in the
+    [2]: http://www.cygwin.com
-         PATH for Visual Studio. For VS2017 it is sufficient to rename
+    [3]: http://www.doxygen.org
-         yasm-<version>-<arch>.exe to yasm.exe and place it in:
+    [4]: http://curl.haxx.se
-         Program Files (x86)/Microsoft Visual Studio/2017/<level>/Common7/Tools/
+    [5]: http://www.microbrew.org/tools/md5sha1sum/
    [3]: http://www.cygwin.com
    [4]: http://www.doxygen.org
    [5]: http://curl.haxx.se
    [6]: http://www.microbrew.org/tools/md5sha1sum/
  2. Out-of-tree builds
  Out of tree builds are a supported method of building the application. For
@ -45,22 +41,12 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  used to get a list of supported options:
    $ ../libvpx/configure --help
-  4. Compiler analyzers
+  4. Cross development
  Compilers have added sanitizers which instrument binaries with information
  about address calculation, memory usage, threading, undefined behavior, and
  other common errors. To simplify building libvpx with some of these features
  use tools/set_analyzer_env.sh before running configure. It will set the
  compiler and necessary flags for building as well as environment variables
  read by the analyzer when testing the binaries.
    $ source ../libvpx/tools/set_analyzer_env.sh address
  5. Cross development
  For cross development, the most notable option is the --target option. The
  most up-to-date list of supported targets can be found at the bottom of the
  --help output of the configure script. As of this writing, the list of
  available targets is:
    arm64-android-gcc
    arm64-darwin-gcc
    arm64-linux-gcc
    armv7-android-gcc
@ -71,13 +57,10 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    armv7-win32-vs11
    armv7-win32-vs12
    armv7-win32-vs14
    armv7-win32-vs15
    armv7s-darwin-gcc
    armv8-linux-gcc
    mips32-linux-gcc
    mips64-linux-gcc
    ppc64-linux-gcc
    ppc64le-linux-gcc
    sparc-solaris-gcc
    x86-android-gcc
    x86-darwin8-gcc
@ -90,7 +73,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-darwin13-gcc
    x86-darwin14-gcc
    x86-darwin15-gcc
    x86-darwin16-gcc
    x86-iphonesimulator-gcc
    x86-linux-gcc
    x86-linux-icc
@ -101,7 +83,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-win32-vs11
    x86-win32-vs12
    x86-win32-vs14
    x86-win32-vs15
    x86_64-android-gcc
    x86_64-darwin9-gcc
    x86_64-darwin10-gcc
@ -110,7 +91,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86_64-darwin13-gcc
    x86_64-darwin14-gcc
    x86_64-darwin15-gcc
    x86_64-darwin16-gcc
    x86_64-iphonesimulator-gcc
    x86_64-linux-gcc
    x86_64-linux-icc
@ -120,7 +100,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86_64-win64-vs11
    x86_64-win64-vs12
    x86_64-win64-vs14
    x86_64-win64-vs15
    generic-gnu
  The generic-gnu target, in conjunction with the CROSS environment variable,
@ -136,7 +115,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  environment variables: CC, AR, LD, AS, STRIP, NM. Additional flags can be
  passed to these executables with CFLAGS, LDFLAGS, and ASFLAGS.
-  6. Configuration errors
+  5. Configuration errors
  If the configuration step fails, the first step is to look in the error log.
  This defaults to config.log. This should give a good indication of what went
  wrong. If not, contact us for support.
--- a/build/.gitattributes
+++ b/build/.gitattributes
@ -0,0 +1,2 @@
 *-vs8/*.rules -crlf
 *-msvs/*.rules -crlf
--- a/build/.gitignore
+++ b/build/.gitignore
@ -0,0 +1 @@
 x86*-win32-vs*
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@ -64,9 +64,6 @@ CONFIG_DIR := $(LOCAL_PATH)/
 LIBVPX_PATH := $(LOCAL_PATH)/libvpx
 ASM_CNV_PATH_LOCAL := $(TARGET_ARCH_ABI)/ads2gas
 ASM_CNV_PATH := $(LOCAL_PATH)/$(ASM_CNV_PATH_LOCAL)
 ifneq ($(V),1)
  qexec := @
 endif
 # Use the makefiles generated by upstream configure to determine which files to
 # build. Also set any architecture-specific flags.
@ -106,8 +103,8 @@ LOCAL_ASMFLAGS := -I$(LIBVPX_PATH)
 .PRECIOUS: %.asm.S
 $(ASM_CNV_PATH)/libvpx/%.asm.S: $(LIBVPX_PATH)/%.asm
-	$(qexec)mkdir -p $(dir $@)
+	@mkdir -p $(dir $@)
-	$(qexec)$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@
+	@$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@
 # For building *_rtcd.h, which have rules in libs.mk
 TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN)))
@ -153,27 +150,15 @@ CODEC_SRCS_ASM_ADS2GAS = $(patsubst %.S, \
 LOCAL_SRC_FILES += $(CODEC_SRCS_ASM_ADS2GAS)
 ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
  ASM_INCLUDES := vpx_dsp/arm/idct_neon.asm.S
  CODEC_SRCS_ASM_NEON = $(foreach v, \
                        $(CODEC_SRCS_ASM_ARM_ALL),\
                        $(if $(findstring neon,$(v)),$(v),))
  CODEC_SRCS_ASM_NEON := $(filter-out $(addprefix %, $(ASM_INCLUDES)), \
                         $(CODEC_SRCS_ASM_NEON))
  CODEC_SRCS_ASM_NEON_ADS2GAS = $(patsubst %.S, \
                                $(ASM_CNV_PATH_LOCAL)/libvpx/%.S, \
                                $(CODEC_SRCS_ASM_NEON))
  LOCAL_SRC_FILES += $(patsubst %.S, \
                     %.S.neon, \
                     $(CODEC_SRCS_ASM_NEON_ADS2GAS))
  NEON_ASM_TARGETS = $(patsubst %.S, \
                     $(ASM_CNV_PATH)/libvpx/%.S, \
                     $(CODEC_SRCS_ASM_NEON))
 # add a dependency to the full path to the ads2gas output to ensure the
 # includes are converted first.
 ifneq ($(strip $(NEON_ASM_TARGETS)),)
 $(NEON_ASM_TARGETS): $(addprefix $(ASM_CNV_PATH)/libvpx/, $(ASM_INCLUDES))
 endif
 endif
 LOCAL_CFLAGS += \
@ -202,7 +187,7 @@ $$(rtcd_dep_template_SRCS): vpx_scale_rtcd.h
 $$(rtcd_dep_template_SRCS): vpx_dsp_rtcd.h
 rtcd_dep_template_CONFIG_ASM_ABIS := x86 x86_64 armeabi-v7a
-ifneq ($$(findstring $(TARGET_ARCH_ABI),$$(rtcd_dep_template_CONFIG_ASM_ABIS)),)
+ifneq ($(findstring $(TARGET_ARCH_ABI),$(rtcd_dep_template_CONFIG_ASM_ABIS)),)
 $$(rtcd_dep_template_SRCS): vpx_config.asm
 endif
 endef
@ -212,17 +197,16 @@ $(eval $(call rtcd_dep_template))
 .PHONY: clean
 clean:
 	@echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]"
-	$(qexec)$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
+	@$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
-	$(qexec)$(RM) -r $(ASM_CNV_PATH)
+	@$(RM) -r $(ASM_CNV_PATH)
-	$(qexec)$(RM) $(CLEAN-OBJS)
+	@$(RM) $(CLEAN-OBJS)
 ifeq ($(ENABLE_SHARED),1)
  LOCAL_CFLAGS += -fPIC
  include $(BUILD_SHARED_LIBRARY)
 else
  include $(BUILD_STATIC_LIBRARY)
 endif
 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
-$(call import-module,android/cpufeatures)
+$(call import-module,cpufeatures)
 endif
--- a/build/make/Makefile
+++ b/build/make/Makefile
@ -124,7 +124,6 @@ ifeq ($(TOOLCHAIN), x86-os2-gcc)
 CFLAGS += -mstackrealign
 endif
 # x86[_64]
 $(BUILD_PFX)%_mmx.c.d: CFLAGS += -mmmx
 $(BUILD_PFX)%_mmx.c.o: CFLAGS += -mmmx
 $(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2
@ -139,12 +138,6 @@ $(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx
 $(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx
 $(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2
 $(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2
 $(BUILD_PFX)%_avx512.c.d: CFLAGS += -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl
 $(BUILD_PFX)%_avx512.c.o: CFLAGS += -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl
 # POWER
 $(BUILD_PFX)%_vsx.c.d: CFLAGS += -maltivec -mvsx
 $(BUILD_PFX)%_vsx.c.o: CFLAGS += -maltivec -mvsx
 $(BUILD_PFX)%.c.d: %.c
 	$(if $(quiet),@echo "    [DEP] $@")
--- a/build/make/ads2gas.pl
+++ b/build/make/ads2gas.pl
@ -23,11 +23,9 @@ use lib $FindBin::Bin;
 use thumb;
 my $thumb = 0;
 my $elf = 1;
 foreach my $arg (@ARGV) {
    $thumb = 1 if ($arg eq "-thumb");
    $elf = 0 if ($arg eq "-noelf");
 }
 print "@ This file was created from a .asm file\n";
@ -142,11 +140,7 @@ while (<STDIN>)
    # Make function visible to linker, and make additional symbol with
    # prepended underscore
-    if ($elf) {
+    s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/;
        s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/;
    } else {
        s/EXPORT\s+\|([\$\w]*)\|/.global $1/;
    }
    s/IMPORT\s+\|([\$\w]*)\|/.global $1/;
    s/EXPORT\s+([\$\w]*)/.global $1/;
@ -187,16 +181,11 @@ while (<STDIN>)
    # eabi_attributes numerical equivalents can be found in the
    # "ARM IHI 0045C" document.
-    if ($elf) {
+    # REQUIRE8 Stack is required to be 8-byte aligned
-        # REQUIRE8 Stack is required to be 8-byte aligned
+    s/\sREQUIRE8/.eabi_attribute 24, 1 \@Tag_ABI_align_needed/g;
        s/\sREQUIRE8/.eabi_attribute 24, 1 \@Tag_ABI_align_needed/g;
-        # PRESERVE8 Stack 8-byte align is preserved
+    # PRESERVE8 Stack 8-byte align is preserved
-        s/\sPRESERVE8/.eabi_attribute 25, 1 \@Tag_ABI_align_preserved/g;
+    s/\sPRESERVE8/.eabi_attribute 25, 1 \@Tag_ABI_align_preserved/g;
    } else {
        s/\sREQUIRE8//;
        s/\sPRESERVE8//;
    }
    # Use PROC and ENDP to give the symbols a .size directive.
    # This makes them show up properly in debugging tools like gdb and valgrind.
@ -213,7 +202,7 @@ while (<STDIN>)
        my $proc;
        s/\bENDP\b/@ $&/;
        $proc = pop(@proc_stack);
-        $_ = "\t.size $proc, .-$proc".$_ if ($proc and $elf);
+        $_ = "\t.size $proc, .-$proc".$_ if ($proc);
    }
    # EQU directive
@ -236,4 +225,4 @@ while (<STDIN>)
 }
 # Mark that this object doesn't need an executable stack.
-printf ("\t.section\t.note.GNU-stack,\"\",\%\%progbits\n") if $elf;
+printf ("\t.section\t.note.GNU-stack,\"\",\%\%progbits\n");
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@ -403,23 +403,6 @@ check_gcc_machine_option() {
  fi
 }
 # tests for -m$2, -m$3, -m$4... toggling the feature given in $1.
 check_gcc_machine_options() {
  feature="$1"
  shift
  flags="-m$1"
  shift
  for opt in $*; do
    flags="$flags -m$opt"
  done
  if enabled gcc && ! disabled "$feature" && ! check_cflags $flags; then
    RTCD_OPTIONS="${RTCD_OPTIONS}--disable-$feature "
  else
    soft_enable "$feature"
  fi
 }
 write_common_config_banner() {
  print_webm_license config.mk "##" ""
  echo '# This file automatically generated by configure. Do not edit!' >> config.mk
@ -691,6 +674,7 @@ check_xcode_minimum_version() {
 process_common_toolchain() {
  if [ -z "$toolchain" ]; then
    gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}"
    # detect tgt_isa
    case "$gcctarget" in
      aarch64*)
@ -713,18 +697,6 @@ process_common_toolchain() {
      *sparc*)
        tgt_isa=sparc
        ;;
      power*64*-*)
        tgt_isa=ppc64
        ;;
      power*)
        tgt_isa=ppc
        ;;
      *mips64el*)
        tgt_isa=mips64
        ;;
      *mips32el*)
        tgt_isa=mips32
        ;;
    esac
    # detect tgt_os
@ -753,20 +725,9 @@ process_common_toolchain() {
        tgt_isa=x86_64
        tgt_os=darwin15
        ;;
      *darwin16*)
        tgt_isa=x86_64
        tgt_os=darwin16
        ;;
      *darwin17*)
        tgt_isa=x86_64
        tgt_os=darwin17
        ;;
      x86_64*mingw32*)
        tgt_os=win64
        ;;
      x86_64*cygwin*)
        tgt_os=win64
        ;;
      *mingw32*|*cygwin*)
        [ -z "$tgt_isa" ] && tgt_isa=x86
        tgt_os=win32
@ -814,9 +775,6 @@ process_common_toolchain() {
    mips*)
      enable_feature mips
      ;;
    ppc*)
      enable_feature ppc
      ;;
  esac
  # PIC is probably what we want when building shared libs
@ -885,14 +843,6 @@ process_common_toolchain() {
      add_cflags  "-mmacosx-version-min=10.11"
      add_ldflags "-mmacosx-version-min=10.11"
      ;;
    *-darwin16-*)
      add_cflags  "-mmacosx-version-min=10.12"
      add_ldflags "-mmacosx-version-min=10.12"
      ;;
    *-darwin17-*)
      add_cflags  "-mmacosx-version-min=10.13"
      add_ldflags "-mmacosx-version-min=10.13"
      ;;
    *-iphonesimulator-*)
      add_cflags  "-miphoneos-version-min=${IOS_VERSION_MIN}"
      add_ldflags "-miphoneos-version-min=${IOS_VERSION_MIN}"
@ -941,6 +891,7 @@ process_common_toolchain() {
          setup_gnu_toolchain
          arch_int=${tgt_isa##armv}
          arch_int=${arch_int%%te}
          check_add_asflags --defsym ARCHITECTURE=${arch_int}
          tune_cflags="-mtune="
          if [ ${tgt_isa} = "armv7" ] || [ ${tgt_isa} = "armv7s" ]; then
            if [ -z "${float_abi}" ]; then
@ -967,19 +918,6 @@ EOF
          enabled debug && add_asflags -g
          asm_conversion_cmd="${source_path}/build/make/ads2gas.pl"
          case ${tgt_os} in
            win*)
              asm_conversion_cmd="$asm_conversion_cmd -noelf"
              AS="$CC -c"
              EXE_SFX=.exe
              enable_feature thumb
              ;;
            *)
              check_add_asflags --defsym ARCHITECTURE=${arch_int}
              ;;
          esac
          if enabled thumb; then
            asm_conversion_cmd="$asm_conversion_cmd -thumb"
            check_add_cflags -mthumb
@ -1206,25 +1144,12 @@ EOF
        fi
      fi
      if enabled mmi; then
        tgt_isa=loongson3a
        check_add_ldflags -march=loongson3a
      fi
      check_add_cflags -march=${tgt_isa}
      check_add_asflags -march=${tgt_isa}
      check_add_asflags -KPIC
      ;;
    ppc*)
      link_with_cc=gcc
      setup_gnu_toolchain
      check_gcc_machine_option "vsx"
      ;;
    x86*)
      case  ${tgt_os} in
        android)
          soft_enable realtime_only
          ;;
        win*)
          enabled gcc && add_cflags -fno-common
          ;;
@ -1277,13 +1202,6 @@ EOF
          AS=msvs
          msvs_arch_dir=x86-msvs
          vc_version=${tgt_cc##vs}
          case $vc_version in
            7|8|9|10|11|12|13|14)
              echo "${tgt_cc} does not support avx512, disabling....."
              RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx512 "
              soft_disable avx512
              ;;
          esac
          case $vc_version in
            7|8|9|10)
              echo "${tgt_cc} does not support avx/avx2, disabling....."
@ -1328,12 +1246,8 @@ EOF
        elif disabled $ext; then
          disable_exts="yes"
        else
-          if [ "$ext" = "avx512" ]; then
+          # use the shortened version for the flag: sse4_1 -> sse4
-            check_gcc_machine_options $ext avx512f avx512cd avx512bw avx512dq avx512vl
+          check_gcc_machine_option ${ext%_*} $ext
          else
            # use the shortened version for the flag: sse4_1 -> sse4
            check_gcc_machine_option ${ext%_*} $ext
          fi
        fi
      done
@ -1359,6 +1273,7 @@ EOF
        esac
        log_echo "  using $AS"
      fi
      [ "${AS##*/}" = nasm ] && add_asflags -Ox
      AS_SFX=.asm
      case  ${tgt_os} in
        win32)
@ -1367,7 +1282,7 @@ EOF
          EXE_SFX=.exe
          ;;
        win64)
-          add_asflags -f win64
+          add_asflags -f x64
          enabled debug && add_asflags -g cv8
          EXE_SFX=.exe
          ;;
@ -1394,8 +1309,7 @@ EOF
          add_cflags  ${sim_arch}
          add_ldflags ${sim_arch}
-          if [ "$(disabled external_build)" ] &&
+          if [ "$(show_darwin_sdk_major_version iphonesimulator)" -gt 8 ]; then
              [ "$(show_darwin_sdk_major_version iphonesimulator)" -gt 8 ]; then
            # yasm v1.3.0 doesn't know what -fembed-bitcode means, so turning it
            # on is pointless (unless building a C-only lib). Warn the user, but
            # do nothing here.
@ -1502,10 +1416,6 @@ EOF
          echo "msa optimizations are available only for little endian platforms"
          disable_feature msa
        fi
        if enabled mmi; then
          echo "mmi optimizations are available only for little endian platforms"
          disable_feature mmi
        fi
      fi
      ;;
  esac
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@ -25,7 +25,7 @@ files.
 Options:
    --help                      Print this message
    --out=outfile               Redirect output to a file
-    --ver=version               Version (7,8,9,10,11,12,14,15) of visual studio to generate for
+    --ver=version               Version (7,8,9,10,11,12,14) of visual studio to generate for
    --target=isa-os-cc          Target specifier
 EOF
    exit 1
@ -215,7 +215,7 @@ for opt in "$@"; do
    ;;
    --ver=*) vs_ver="$optval"
             case $optval in
-             10|11|12|14|15)
+             10|11|12|14)
             ;;
             *) die Unrecognized Visual Studio Version in $opt
             ;;
@ -240,12 +240,9 @@ case "${vs_ver:-10}" in
    12) sln_vers="12.00"
       sln_vers_str="Visual Studio 2013"
    ;;
-    14) sln_vers="12.00"
+    14) sln_vers="14.00"
       sln_vers_str="Visual Studio 2015"
    ;;
    15) sln_vers="12.00"
       sln_vers_str="Visual Studio 2017"
    ;;
 esac
 sfx=vcxproj
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@ -34,7 +34,7 @@ Options:
    --name=project_name         Name of the project (required)
    --proj-guid=GUID            GUID to use for the project
    --module-def=filename       File containing export definitions (for DLLs)
-    --ver=version               Version (10,11,12,14,15) of visual studio to generate for
+    --ver=version               Version (10,11,12,14) of visual studio to generate for
    --src-path-bare=dir         Path to root of source tree
    -Ipath/to/include           Additional include directories
    -DFLAG[=value]              Preprocessor macros to define
@ -168,7 +168,7 @@ for opt in "$@"; do
        --ver=*)
            vs_ver="$optval"
            case "$optval" in
-                10|11|12|14|15)
+                10|11|12|14)
                ;;
                *) die Unrecognized Visual Studio Version in $opt
                ;;
@ -218,7 +218,7 @@ guid=${guid:-`generate_uuid`}
 asm_use_custom_step=false
 uses_asm=${uses_asm:-false}
 case "${vs_ver:-11}" in
-    10|11|12|14|15)
+    10|11|12|14)
       asm_use_custom_step=$uses_asm
    ;;
 esac
@ -347,9 +347,6 @@ generate_vcxproj() {
            if [ "$vs_ver" = "14" ]; then
                tag_content PlatformToolset v140
            fi
            if [ "$vs_ver" = "15" ]; then
                tag_content PlatformToolset v141
            fi
            tag_content CharacterSet Unicode
            if [ "$config" = "Release" ]; then
                tag_content WholeProgramOptimization true
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@ -35,8 +35,8 @@ ARM_TARGETS="arm64-darwin-gcc
             armv7s-darwin-gcc"
 SIM_TARGETS="x86-iphonesimulator-gcc
             x86_64-iphonesimulator-gcc"
-OSX_TARGETS="x86-darwin16-gcc
+OSX_TARGETS="x86-darwin15-gcc
-             x86_64-darwin16-gcc"
+             x86_64-darwin15-gcc"
 TARGETS="${ARM_TARGETS} ${SIM_TARGETS}"
 # Configures for the target specified by $1, and invokes make with the dist
@ -271,7 +271,7 @@ cat << EOF
    --help: Display this message and exit.
    --enable-shared: Build a dynamic framework for use on iOS 8 or later.
    --extra-configure-args <args>: Extra args to pass when configuring libvpx.
-    --macosx: Uses darwin16 targets instead of iphonesimulator targets for x86
+    --macosx: Uses darwin15 targets instead of iphonesimulator targets for x86
              and x86_64. Allows linking to framework when builds target MacOSX
              instead of iOS.
    --preserve-build-output: Do not delete the build directory.
--- a/build/make/msvs_common.sh
+++ b/build/make/msvs_common.sh
@ -41,15 +41,6 @@ fix_path() {
 # Corrects the paths in file_list in one pass for efficiency.
 # $1 is the name of the array to be modified.
 fix_file_list() {
    if [ "${FIXPATH}" = "echo_path" ] ; then
      # When used with echo_path, fix_file_list is a no-op. Avoid warning about
      # unsupported 'declare -n' when it is not important.
      return 0
    elif [ "${BASH_VERSINFO}" -lt 4 ] ; then
      echo "Cygwin path conversion has failed. Please use a version of bash"
      echo "which supports nameref (-n), introduced in bash 4.3"
      return 1
    fi
    declare -n array_ref=$1
    files=$(fix_path "${array_ref[@]}")
    local IFS=$'\n'
--- a/build/make/rtcd.pl
+++ b/build/make/rtcd.pl
@ -1,13 +1,4 @@
 #!/usr/bin/env perl
 ##
 ##  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
 ##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 no strict 'refs';
 use warnings;
@ -209,7 +200,6 @@ sub filter {
 sub common_top() {
  my $include_guard = uc($opts{sym})."_H_";
  print <<EOF;
 // This file is generated. Do not edit.
 #ifndef ${include_guard}
 #define ${include_guard}
@ -345,36 +335,6 @@ EOF
  common_bottom;
 }
 sub ppc() {
  determine_indirection("c", @ALL_ARCHS);
  # Assign the helper variable for each enabled extension
  foreach my $opt (@ALL_ARCHS) {
    my $opt_uc = uc $opt;
    eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
  }
  common_top;
  print <<EOF;
 #include "vpx_config.h"
 #ifdef RTCD_C
 #include "vpx_ports/ppc.h"
 static void setup_rtcd_internal(void)
 {
    int flags = ppc_simd_caps();
    (void)flags;
 EOF
  set_function_pointers("c", @ALL_ARCHS);
  print <<EOF;
 }
 #endif
 EOF
  common_bottom;
 }
 sub unoptimized() {
  determine_indirection "c";
  common_top;
@ -401,10 +361,10 @@ EOF
 &require("c");
 if ($opts{arch} eq 'x86') {
-  @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2 avx512/);
+  @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/);
  x86;
 } elsif ($opts{arch} eq 'x86_64') {
-  @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2 avx512/);
+  @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/);
  @REQUIRES = filter(keys %required ? keys %required : qw/mmx sse sse2/);
  &require(@REQUIRES);
  x86;
@ -421,10 +381,6 @@ if ($opts{arch} eq 'x86') {
      @ALL_ARCHS = filter("$opts{arch}", qw/msa/);
      last;
    }
    if (/HAVE_MMI=yes/) {
      @ALL_ARCHS = filter("$opts{arch}", qw/mmi/);
      last;
    }
  }
  close CONFIG_FILE;
  mips;
@ -434,9 +390,6 @@ if ($opts{arch} eq 'x86') {
 } elsif ($opts{arch} eq 'armv8' || $opts{arch} eq 'arm64' ) {
  @ALL_ARCHS = filter(qw/neon/);
  arm;
 } elsif ($opts{arch} =~ /^ppc/ ) {
  @ALL_ARCHS = filter(qw/vsx/);
  ppc;
 } else {
  unoptimized;
 }
--- a/build/make/thumb.pm
+++ b/build/make/thumb.pm
@ -54,6 +54,13 @@ sub FixThumbInstructions($$)
    # "addne r0, r0, r2".
    s/^(\s*)((ldr|str)(ne)?[bhd]?)(\s+)(\w+),(\s*\w+,)?\s*\[(\w+)\],\s*(\w+)/$1$2$5$6,$7 [$8]\n$1add$4$5$8, $8, $9/g;
    # Convert a conditional addition to the pc register into a series of
    # instructions. This converts "addlt pc, pc, r3, lsl #2" into
    # "itttt lt", "movlt.n r12, pc", "addlt.w r12, #12",
    # "addlt.w r12, r12, r3, lsl #2", "movlt.n pc, r12".
    # This assumes that r12 is free at this point.
    s/^(\s*)addlt(\s+)pc,\s*pc,\s*(\w+),\s*lsl\s*#(\d+)/$1itttt$2lt\n$1movlt.n$2r12, pc\n$1addlt.w$2r12, #12\n$1addlt.w$2r12, r12, $3, lsl #($4-$branch_shift_offset)\n$1movlt.n$2pc, r12/g;
    # Convert "mov pc, lr" into "bx lr", since the former only works
    # for switching from arm to thumb (and only in armv7), but not
    # from thumb to arm.
--- a/build/make/version.sh
+++ b/build/make/version.sh
@ -60,7 +60,6 @@ if [ ${bare} ]; then
    echo "${changelog_version}${git_version_id}" > $$.tmp
 else
    cat<<EOF>$$.tmp
 // This file is generated. Do not edit.
 #define VERSION_MAJOR  $major_version
 #define VERSION_MINOR  $minor_version
 #define VERSION_PATCH  $patch_version
--- a/codereview.settings
+++ b/codereview.settings
@ -1,4 +1,5 @@
-# This file is used by git cl to get repository specific information.
+# This file is used by gcl to get repository specific information.
-GERRIT_HOST: True
+GERRIT_HOST: chromium-review.googlesource.com
 GERRIT_PORT: 29418
 CODE_REVIEW_SERVER: chromium-review.googlesource.com
 GERRIT_SQUASH_UPLOADS: False
--- a/46
+++ b/46
@ -101,23 +101,18 @@ EOF
 all_platforms="${all_platforms} arm64-android-gcc"
 all_platforms="${all_platforms} arm64-darwin-gcc"
 all_platforms="${all_platforms} arm64-linux-gcc"
 all_platforms="${all_platforms} arm64-win64-gcc"
 all_platforms="${all_platforms} armv7-android-gcc"   #neon Cortex-A8
 all_platforms="${all_platforms} armv7-darwin-gcc"    #neon Cortex-A8
 all_platforms="${all_platforms} armv7-linux-rvct"    #neon Cortex-A8
 all_platforms="${all_platforms} armv7-linux-gcc"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-none-rvct"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-win32-gcc"
 all_platforms="${all_platforms} armv7-win32-vs11"
 all_platforms="${all_platforms} armv7-win32-vs12"
 all_platforms="${all_platforms} armv7-win32-vs14"
 all_platforms="${all_platforms} armv7-win32-vs15"
 all_platforms="${all_platforms} armv7s-darwin-gcc"
 all_platforms="${all_platforms} armv8-linux-gcc"
 all_platforms="${all_platforms} mips32-linux-gcc"
 all_platforms="${all_platforms} mips64-linux-gcc"
 all_platforms="${all_platforms} ppc64-linux-gcc"
 all_platforms="${all_platforms} ppc64le-linux-gcc"
 all_platforms="${all_platforms} sparc-solaris-gcc"
 all_platforms="${all_platforms} x86-android-gcc"
 all_platforms="${all_platforms} x86-darwin8-gcc"
@ -130,8 +125,6 @@ all_platforms="${all_platforms} x86-darwin12-gcc"
 all_platforms="${all_platforms} x86-darwin13-gcc"
 all_platforms="${all_platforms} x86-darwin14-gcc"
 all_platforms="${all_platforms} x86-darwin15-gcc"
 all_platforms="${all_platforms} x86-darwin16-gcc"
 all_platforms="${all_platforms} x86-darwin17-gcc"
 all_platforms="${all_platforms} x86-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86-linux-gcc"
 all_platforms="${all_platforms} x86-linux-icc"
@ -142,7 +135,6 @@ all_platforms="${all_platforms} x86-win32-vs10"
 all_platforms="${all_platforms} x86-win32-vs11"
 all_platforms="${all_platforms} x86-win32-vs12"
 all_platforms="${all_platforms} x86-win32-vs14"
 all_platforms="${all_platforms} x86-win32-vs15"
 all_platforms="${all_platforms} x86_64-android-gcc"
 all_platforms="${all_platforms} x86_64-darwin9-gcc"
 all_platforms="${all_platforms} x86_64-darwin10-gcc"
@ -151,8 +143,6 @@ all_platforms="${all_platforms} x86_64-darwin12-gcc"
 all_platforms="${all_platforms} x86_64-darwin13-gcc"
 all_platforms="${all_platforms} x86_64-darwin14-gcc"
 all_platforms="${all_platforms} x86_64-darwin15-gcc"
 all_platforms="${all_platforms} x86_64-darwin16-gcc"
 all_platforms="${all_platforms} x86_64-darwin17-gcc"
 all_platforms="${all_platforms} x86_64-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
 all_platforms="${all_platforms} x86_64-linux-icc"
@ -162,7 +152,6 @@ all_platforms="${all_platforms} x86_64-win64-vs10"
 all_platforms="${all_platforms} x86_64-win64-vs11"
 all_platforms="${all_platforms} x86_64-win64-vs12"
 all_platforms="${all_platforms} x86_64-win64-vs14"
 all_platforms="${all_platforms} x86_64-win64-vs15"
 all_platforms="${all_platforms} generic-gnu"
 # all_targets is a list of all targets that can be configured
@ -174,14 +163,11 @@ for t in ${all_targets}; do
    [ -f "${source_path}/${t}.mk" ] && enable_feature ${t}
 done
 if ! diff --version >/dev/null; then
  die "diff missing: Try installing diffutils via your package manager."
 fi
 if ! perl --version >/dev/null; then
    die "Perl is required to build"
 fi
 if [ "`cd \"${source_path}\" && pwd`" != "`pwd`" ]; then
  # test to see if source_path already configured
  if [ -f "${source_path}/vpx_config.h" ]; then
@ -237,7 +223,6 @@ ARCH_LIST="
    mips
    x86
    x86_64
    ppc
 "
 ARCH_EXT_LIST_X86="
    mmx
@ -248,13 +233,7 @@ ARCH_EXT_LIST_X86="
    sse4_1
    avx
    avx2
    avx512
 "
 ARCH_EXT_LIST_LOONGSON="
    mmi
 "
 ARCH_EXT_LIST="
    neon
    neon_asm
@ -265,10 +244,6 @@ ARCH_EXT_LIST="
    mips64
    ${ARCH_EXT_LIST_X86}
    vsx
    ${ARCH_EXT_LIST_LOONGSON}
 "
 HAVE_LIST="
    ${ARCH_EXT_LIST}
@ -277,8 +252,10 @@ HAVE_LIST="
    unistd_h
 "
 EXPERIMENT_LIST="
    spatial_svc
    fp_mb_stats
    emulate_hardware
    misc_fixes
 "
 CONFIG_LIST="
    dependency_tracking
@ -333,7 +310,6 @@ CONFIG_LIST="
    better_hw_compatibility
    experimental
    size_limit
    always_adjust_bpm
    ${EXPERIMENT_LIST}
 "
 CMDLINE_SELECT="
@ -393,7 +369,6 @@ CMDLINE_SELECT="
    better_hw_compatibility
    vp9_highbitdepth
    experimental
    always_adjust_bpm
 "
 process_cmdline() {
@ -595,7 +570,6 @@ process_toolchain() {
        check_add_cflags -Wdeclaration-after-statement
        check_add_cflags -Wdisabled-optimization
        check_add_cflags -Wfloat-conversion
        check_add_cflags -Wparentheses-equality
        check_add_cflags -Wpointer-arith
        check_add_cflags -Wtype-limits
        check_add_cflags -Wcast-qual
@ -609,16 +583,16 @@ process_toolchain() {
        # https://bugs.chromium.org/p/webm/issues/detail?id=1069
        check_add_cflags -Wextra
        # check_add_cflags also adds to cxxflags. gtest does not do well with
-        # these flags so add them explicitly to CFLAGS only.
+        # -Wundef so add it explicitly to CFLAGS only.
        check_cflags -Wundef && add_cflags_only -Wundef
        check_cflags -Wframe-larger-than=52000 && \
          add_cflags_only -Wframe-larger-than=52000
        if enabled mips || [ -z "${INLINE}" ]; then
          enabled extra_warnings || check_add_cflags -Wno-unused-function
        fi
-        # Avoid this warning for third_party C++ sources. Some reorganization
+        if ! enabled vp9_highbitdepth; then
-        # would be needed to apply this only to test/*.cc.
+          # Avoid this warning for third_party C++ sources. Some reorganization
-        check_cflags -Wshorten-64-to-32 && add_cflags_only -Wshorten-64-to-32
+          # would be needed to apply this only to test/*.cc.
          check_cflags -Wshorten-64-to-32 && add_cflags_only -Wshorten-64-to-32
        fi
    fi
    if enabled icc; then
@ -670,7 +644,7 @@ process_toolchain() {
             gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
             enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
             all_targets="${all_targets} solution"
-             INLINE="__inline"
+             INLINE="__forceinline"
        ;;
    esac
--- a/examples.mk
+++ b/examples.mk
@ -109,17 +109,18 @@ ifeq ($(CONFIG_WEBM_IO),yes)
 endif
 vpxenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
 vpxenc.DESCRIPTION           = Full featured encoder
-
+ifeq ($(CONFIG_SPATIAL_SVC),yes)
-EXAMPLES-$(CONFIG_VP9_ENCODER)      += vp9_spatial_svc_encoder.c
+  EXAMPLES-$(CONFIG_VP9_ENCODER)      += vp9_spatial_svc_encoder.c
-vp9_spatial_svc_encoder.SRCS        += args.c args.h
+  vp9_spatial_svc_encoder.SRCS        += args.c args.h
-vp9_spatial_svc_encoder.SRCS        += ivfenc.c ivfenc.h
+  vp9_spatial_svc_encoder.SRCS        += ivfenc.c ivfenc.h
-vp9_spatial_svc_encoder.SRCS        += tools_common.c tools_common.h
+  vp9_spatial_svc_encoder.SRCS        += tools_common.c tools_common.h
-vp9_spatial_svc_encoder.SRCS        += video_common.h
+  vp9_spatial_svc_encoder.SRCS        += video_common.h
-vp9_spatial_svc_encoder.SRCS        += video_writer.h video_writer.c
+  vp9_spatial_svc_encoder.SRCS        += video_writer.h video_writer.c
-vp9_spatial_svc_encoder.SRCS        += vpx_ports/msvc.h
+  vp9_spatial_svc_encoder.SRCS        += vpx_ports/msvc.h
-vp9_spatial_svc_encoder.SRCS        += vpxstats.c vpxstats.h
+  vp9_spatial_svc_encoder.SRCS        += vpxstats.c vpxstats.h
-vp9_spatial_svc_encoder.GUID        = 4A38598D-627D-4505-9C7B-D4020C84100D
+  vp9_spatial_svc_encoder.GUID        = 4A38598D-627D-4505-9C7B-D4020C84100D
-vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder
+  vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder
 endif
 ifneq ($(CONFIG_SHARED),yes)
 EXAMPLES-$(CONFIG_VP9_ENCODER)    += resize_util.c
--- a/examples/vp8_multi_resolution_encoder.c
+++ b/examples/vp8_multi_resolution_encoder.c
@ -151,7 +151,7 @@ static void write_ivf_frame_header(FILE *outfile,
  if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return;
  pts = pkt->data.frame.pts;
-  mem_put_le32(header, (int)pkt->data.frame.sz);
+  mem_put_le32(header, pkt->data.frame.sz);
  mem_put_le32(header + 4, pts & 0xFFFFFFFF);
  mem_put_le32(header + 8, pts >> 32);
@ -190,7 +190,7 @@ static void set_temporal_layer_pattern(int num_temporal_layers,
      cfg->ts_layer_id[0] = 0;
      cfg->ts_layer_id[1] = 1;
      // Use 60/40 bit allocation as example.
-      cfg->ts_target_bitrate[0] = (int)(0.6f * bitrate);
+      cfg->ts_target_bitrate[0] = 0.6f * bitrate;
      cfg->ts_target_bitrate[1] = bitrate;
      /* 0=L, 1=GF */
@ -240,9 +240,9 @@ static void set_temporal_layer_pattern(int num_temporal_layers,
      cfg->ts_layer_id[1] = 2;
      cfg->ts_layer_id[2] = 1;
      cfg->ts_layer_id[3] = 2;
-      // Use 45/20/35 bit allocation as example.
+      // Use 40/20/40 bit allocation as example.
-      cfg->ts_target_bitrate[0] = (int)(0.45f * bitrate);
+      cfg->ts_target_bitrate[0] = 0.4f * bitrate;
-      cfg->ts_target_bitrate[1] = (int)(0.65f * bitrate);
+      cfg->ts_target_bitrate[1] = 0.6f * bitrate;
      cfg->ts_target_bitrate[2] = bitrate;
      /* 0=L, 1=GF, 2=ARF */
@ -294,8 +294,8 @@ int main(int argc, char **argv) {
  vpx_codec_err_t res[NUM_ENCODERS];
  int i;
-  int width;
+  long width;
-  int height;
+  long height;
  int length_frame;
  int frame_avail;
  int got_data;
@ -347,9 +347,9 @@ int main(int argc, char **argv) {
  printf("Using %s\n", vpx_codec_iface_name(interface));
-  width = (int)strtol(argv[1], NULL, 0);
+  width = strtol(argv[1], NULL, 0);
-  height = (int)strtol(argv[2], NULL, 0);
+  height = strtol(argv[2], NULL, 0);
-  framerate = (int)strtol(argv[3], NULL, 0);
+  framerate = strtol(argv[3], NULL, 0);
  if (width < 16 || width % 2 || height < 16 || height % 2)
    die("Invalid resolution: %ldx%ld", width, height);
@ -371,13 +371,12 @@ int main(int argc, char **argv) {
  // Bitrates per spatial layer: overwrite default rates above.
  for (i = 0; i < NUM_ENCODERS; i++) {
-    target_bitrate[i] = (int)strtol(argv[NUM_ENCODERS + 5 + i], NULL, 0);
+    target_bitrate[i] = strtol(argv[NUM_ENCODERS + 5 + i], NULL, 0);
  }
  // Temporal layers per spatial layers: overwrite default settings above.
  for (i = 0; i < NUM_ENCODERS; i++) {
-    num_temporal_layers[i] =
+    num_temporal_layers[i] = strtol(argv[2 * NUM_ENCODERS + 5 + i], NULL, 0);
        (int)strtol(argv[2 * NUM_ENCODERS + 5 + i], NULL, 0);
    if (num_temporal_layers[i] < 1 || num_temporal_layers[i] > 3)
      die("Invalid temporal layers: %d, Must be 1, 2, or 3. \n",
          num_temporal_layers);
@ -392,9 +391,9 @@ int main(int argc, char **argv) {
    downsampled_input[i] = fopen(filename, "wb");
  }
-  key_frame_insert = (int)strtol(argv[3 * NUM_ENCODERS + 5], NULL, 0);
+  key_frame_insert = strtol(argv[3 * NUM_ENCODERS + 5], NULL, 0);
-  show_psnr = (int)strtol(argv[3 * NUM_ENCODERS + 6], NULL, 0);
+  show_psnr = strtol(argv[3 * NUM_ENCODERS + 6], NULL, 0);
  /* Populate default encoder configuration */
  for (i = 0; i < NUM_ENCODERS; i++) {
@ -461,7 +460,7 @@ int main(int argc, char **argv) {
  // Set the number of threads per encode/spatial layer.
  // (1, 1, 1) means no encoder threading.
-  cfg[0].g_threads = 1;
+  cfg[0].g_threads = 2;
  cfg[1].g_threads = 1;
  cfg[2].g_threads = 1;
@ -470,7 +469,7 @@ int main(int argc, char **argv) {
    if (!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
      die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
-  if (raw[0].stride[VPX_PLANE_Y] == (int)raw[0].d_w)
+  if (raw[0].stride[VPX_PLANE_Y] == raw[0].d_w)
    read_frame_p = read_frame;
  else
    read_frame_p = read_frame_by_row;
@ -508,11 +507,9 @@ int main(int argc, char **argv) {
  /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */
  /* Enable denoising for the highest-resolution encoder. */
-  if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 1))
+  if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 4))
    die_codec(&codec[0], "Failed to set noise_sensitivity");
-  if (vpx_codec_control(&codec[1], VP8E_SET_NOISE_SENSITIVITY, 1))
+  for (i = 1; i < NUM_ENCODERS; i++) {
    die_codec(&codec[1], "Failed to set noise_sensitivity");
  for (i = 2; i < NUM_ENCODERS; i++) {
    if (vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0))
      die_codec(&codec[i], "Failed to set noise_sensitivity");
  }
@ -559,8 +556,7 @@ int main(int argc, char **argv) {
        /* Write out down-sampled input. */
        length_frame = cfg[i].g_w * cfg[i].g_h * 3 / 2;
        if (fwrite(raw[i].planes[0], 1, length_frame,
-                   downsampled_input[NUM_ENCODERS - i - 1]) !=
+                   downsampled_input[NUM_ENCODERS - i - 1]) != length_frame) {
            (unsigned int)length_frame) {
          return EXIT_FAILURE;
        }
      }
@ -621,6 +617,10 @@ int main(int argc, char **argv) {
            break;
          default: break;
        }
        printf(pkt[i]->kind == VPX_CODEC_CX_FRAME_PKT &&
                       (pkt[i]->data.frame.flags & VPX_FRAME_IS_KEY)
                   ? "K"
                   : "");
        fflush(stdout);
      }
    }
@ -661,6 +661,7 @@ int main(int argc, char **argv) {
      write_ivf_file_header(outfile[i], &cfg[i], frame_cnt - 1);
    fclose(outfile[i]);
  }
  printf("\n");
  return EXIT_SUCCESS;
 }
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@ -168,7 +168,7 @@ void usage_exit(void) {
 static void parse_command_line(int argc, const char **argv_,
                               AppInput *app_input, SvcContext *svc_ctx,
                               vpx_codec_enc_cfg_t *enc_cfg) {
-  struct arg arg;
+  struct arg arg = { 0 };
  char **argv = NULL;
  char **argi = NULL;
  char **argj = NULL;
@ -429,9 +429,8 @@ static void set_rate_control_stats(struct RateControlStats *rc,
        rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl];
      if (tl > 0) {
        rc->layer_pfb[layer] =
-            1000.0 *
+            1000.0 * (cfg->layer_target_bitrate[layer] -
-            (cfg->layer_target_bitrate[layer] -
+                      cfg->layer_target_bitrate[layer - 1]) /
             cfg->layer_target_bitrate[layer - 1]) /
            (rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
      } else {
        rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] /
@ -503,12 +502,14 @@ static void printout_rate_control_summary(struct RateControlStats *rc,
  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
         rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
         perc_fluctuation);
-  printf("Num of input, num of encoded (super) frames: %d %d \n", frame_cnt,
+  if (frame_cnt != tot_num_frames)
-         tot_num_frames);
+    die("Error: Number of input frames not equal to output encoded frames != "
        "%d tot_num_frames = %d\n",
        frame_cnt, tot_num_frames);
 }
 vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz,
-                                       uint64_t sizes[8], int *count) {
+                                       uint32_t sizes[8], int *count) {
  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
  // it is a super frame index. If the last byte of real video compression
  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
@ -560,10 +561,9 @@ vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz,
 // bypass/flexible mode. The pattern corresponds to the pattern
 // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
 // non-flexible mode.
-void set_frame_flags_bypass_mode(int tl, int num_spatial_layers,
+void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
                                 int is_key_frame,
                                 vpx_svc_ref_frame_config_t *ref_frame_config) {
  int sl;
  for (sl = 0; sl < num_spatial_layers; ++sl) {
    if (!tl) {
      if (!sl) {
@ -573,8 +573,8 @@ void set_frame_flags_bypass_mode(int tl, int num_spatial_layers,
      } else {
        if (is_key_frame) {
          ref_frame_config->frame_flags[sl] =
-              VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+              VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
-              VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
+              VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
        } else {
          ref_frame_config->frame_flags[sl] =
              VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
@ -588,24 +588,14 @@ void set_frame_flags_bypass_mode(int tl, int num_spatial_layers,
      } else {
        ref_frame_config->frame_flags[sl] =
            VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
        if (sl == num_spatial_layers - 1)
          ref_frame_config->frame_flags[sl] =
              VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_ARF |
              VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
      }
    }
    if (tl == 0) {
      ref_frame_config->lst_fb_idx[sl] = sl;
-      if (sl) {
+      if (sl)
-        if (is_key_frame) {
+        ref_frame_config->gld_fb_idx[sl] = sl - 1;
-          ref_frame_config->lst_fb_idx[sl] = sl - 1;
+      else
          ref_frame_config->gld_fb_idx[sl] = sl;
        } else {
          ref_frame_config->gld_fb_idx[sl] = sl - 1;
        }
      } else {
        ref_frame_config->gld_fb_idx[sl] = 0;
      }
      ref_frame_config->alt_fb_idx[sl] = 0;
    } else if (tl == 1) {
      ref_frame_config->lst_fb_idx[sl] = sl;
@ -616,9 +606,9 @@ void set_frame_flags_bypass_mode(int tl, int num_spatial_layers,
 }
 int main(int argc, const char **argv) {
-  AppInput app_input;
+  AppInput app_input = { 0 };
  VpxVideoWriter *writer = NULL;
-  VpxVideoInfo info;
+  VpxVideoInfo info = { 0 };
  vpx_codec_ctx_t codec;
  vpx_codec_enc_cfg_t enc_cfg;
  SvcContext svc_ctx;
@ -632,7 +622,7 @@ int main(int argc, const char **argv) {
  int end_of_stream = 0;
  int frames_received = 0;
 #if OUTPUT_RC_STATS
-  VpxVideoWriter *outfile[VPX_SS_MAX_LAYERS] = { NULL };
+  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = { NULL };
  struct RateControlStats rc;
  vpx_svc_layer_id_t layer_id;
  vpx_svc_ref_frame_config_t ref_frame_config;
@ -644,16 +634,14 @@ int main(int argc, const char **argv) {
  struct vpx_usec_timer timer;
  int64_t cx_time = 0;
  memset(&svc_ctx, 0, sizeof(svc_ctx));
-  memset(&app_input, 0, sizeof(AppInput));
+  svc_ctx.log_print = 1;
  memset(&info, 0, sizeof(VpxVideoInfo));
  exec_name = argv[0];
  parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
 // Allocate image buffer
 #if CONFIG_VP9_HIGHBITDEPTH
-  if (!vpx_img_alloc(&raw,
+  if (!vpx_img_alloc(&raw, enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420
-                     enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420
+                                                          : VPX_IMG_FMT_I42016,
                                                    : VPX_IMG_FMT_I42016,
                     enc_cfg.g_w, enc_cfg.g_h, 32)) {
    die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
  }
@ -672,10 +660,6 @@ int main(int argc, const char **argv) {
    die("Failed to initialize encoder\n");
 #if OUTPUT_RC_STATS
  rc.window_count = 1;
  rc.window_size = 15;  // Silence a static analysis warning.
  rc.avg_st_encoding_bitrate = 0.0;
  rc.variance_st_encoding_bitrate = 0.0;
  if (svc_ctx.output_rc_stat) {
    set_rate_control_stats(&rc, &enc_cfg);
    framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
@ -694,16 +678,16 @@ int main(int argc, const char **argv) {
      die("Failed to open %s for writing\n", app_input.output_filename);
  }
 #if OUTPUT_RC_STATS
-  // Write out spatial layer stream.
+  // For now, just write temporal layer streams.
-  // TODO(marpan/jianj): allow for writing each spatial and temporal stream.
+  // TODO(wonkap): do spatial by re-writing superframe.
  if (svc_ctx.output_rc_stat) {
-    for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
      char file_name[PATH_MAX];
-      snprintf(file_name, sizeof(file_name), "%s_s%d.ivf",
+      snprintf(file_name, sizeof(file_name), "%s_t%d.ivf",
-               app_input.output_filename, sl);
+               app_input.output_filename, tl);
-      outfile[sl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
+      outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
-      if (!outfile[sl]) die("Failed to open %s for writing", file_name);
+      if (!outfile[tl]) die("Failed to open %s for writing", file_name);
    }
  }
 #endif
@ -713,22 +697,12 @@ int main(int argc, const char **argv) {
  if (svc_ctx.speed != -1)
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
-  if (svc_ctx.threads) {
+  if (svc_ctx.threads)
-    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, get_msb(svc_ctx.threads));
+    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
    if (svc_ctx.threads > 1)
      vpx_codec_control(&codec, VP9E_SET_ROW_MT, 1);
    else
      vpx_codec_control(&codec, VP9E_SET_ROW_MT, 0);
  }
  if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
  if (svc_ctx.speed >= 5)
    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
  vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, 900);
  vpx_codec_control(&codec, VP9E_SET_SVC_INTER_LAYER_PRED, 0);
  vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
  // Encode frames
  while (!end_of_stream) {
@ -757,9 +731,7 @@ int main(int argc, const char **argv) {
      // the encode for the whole superframe. The encoder will internally loop
      // over all the spatial layers for the current superframe.
      vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
-      // TODO(jianj): Fix the parameter passing for "is_key_frame" in
+      set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id,
      // set_frame_flags_bypass_model() for case of periodic key frames.
      set_frame_flags_bypass_mode(layer_id.temporal_layer_id,
                                  svc_ctx.spatial_layers, frame_cnt == 0,
                                  &ref_frame_config);
      vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG,
@ -770,17 +742,6 @@ int main(int argc, const char **argv) {
        ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
                                layer_id.temporal_layer_id];
      }
    } else {
      // For the fixed pattern SVC, temporal layer is given by superframe count.
      unsigned int tl = 0;
      if (enc_cfg.ts_number_layers == 2)
        tl = (frame_cnt % 2 != 0);
      else if (enc_cfg.ts_number_layers == 3) {
        if (frame_cnt % 2 != 0) tl = 2;
        if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0)) tl = 1;
      }
      for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl)
        ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + tl];
    }
    vpx_usec_timer_start(&timer);
@ -790,6 +751,7 @@ int main(int argc, const char **argv) {
    vpx_usec_timer_mark(&timer);
    cx_time += vpx_usec_timer_elapsed(&timer);
    printf("%s", vpx_svc_get_message(&svc_ctx));
    fflush(stdout);
    if (res != VPX_CODEC_OK) {
      die_codec(&codec, "Failed to encode frame");
@ -801,64 +763,51 @@ int main(int argc, const char **argv) {
          SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal;
          if (cx_pkt->data.frame.sz > 0) {
 #if OUTPUT_RC_STATS
-            uint64_t sizes[8];
+            uint32_t sizes[8];
            uint64_t sizes_parsed[8];
            int count = 0;
            vp9_zero(sizes);
            vp9_zero(sizes_parsed);
 #endif
            vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf,
                                         cx_pkt->data.frame.sz,
                                         cx_pkt->data.frame.pts);
 #if OUTPUT_RC_STATS
-            // TODO(marpan): Put this (to line728) in separate function.
+            // TODO(marpan/wonkap): Put this (to line728) in separate function.
            if (svc_ctx.output_rc_stat) {
              vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
              parse_superframe_index(cx_pkt->data.frame.buf,
-                                     cx_pkt->data.frame.sz, sizes_parsed,
+                                     cx_pkt->data.frame.sz, sizes, &count);
-                                     &count);
+              // Note computing input_layer_frames here won't account for frame
-              if (enc_cfg.ss_number_layers == 1)
+              // drops in rate control stats.
-                sizes[0] = cx_pkt->data.frame.sz;
+              // TODO(marpan): Fix this for non-bypass mode so we can get stats
              // for dropped frames.
              if (svc_ctx.temporal_layering_mode !=
                  VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
                int num_layers_encoded = 0;
                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-                  sizes[sl] = 0;
+                  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
-                  if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {
+                                          layer_id.temporal_layer_id];
                    sizes[sl] = sizes_parsed[num_layers_encoded];
                    num_layers_encoded++;
                  }
                }
                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
                  unsigned int sl2;
                  uint64_t tot_size = 0;
                  for (sl2 = 0; sl2 <= sl; ++sl2) {
                    if (cx_pkt->data.frame.spatial_layer_encoded[sl2])
                      tot_size += sizes[sl2];
                  }
                  if (tot_size > 0)
                    vpx_video_writer_write_frame(
                        outfile[sl], cx_pkt->data.frame.buf, (size_t)(tot_size),
                        cx_pkt->data.frame.pts);
                }
              }
              for (tl = layer_id.temporal_layer_id;
                   tl < enc_cfg.ts_number_layers; ++tl) {
                vpx_video_writer_write_frame(
                    outfile[tl], cx_pkt->data.frame.buf, cx_pkt->data.frame.sz,
                    cx_pkt->data.frame.pts);
              }
              for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-                if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {
+                for (tl = layer_id.temporal_layer_id;
-                  for (tl = layer_id.temporal_layer_id;
+                     tl < enc_cfg.ts_number_layers; ++tl) {
-                       tl < enc_cfg.ts_number_layers; ++tl) {
+                  const int layer = sl * enc_cfg.ts_number_layers + tl;
-                    const int layer = sl * enc_cfg.ts_number_layers + tl;
+                  ++rc.layer_tot_enc_frames[layer];
-                    ++rc.layer_tot_enc_frames[layer];
+                  rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
-                    rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
+                  // Keep count of rate control stats per layer, for non-key
-                    // Keep count of rate control stats per layer, for non-key
+                  // frames.
-                    // frames.
+                  if (tl == (unsigned int)layer_id.temporal_layer_id &&
-                    if (tl == (unsigned int)layer_id.temporal_layer_id &&
+                      !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
-                        !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
+                    rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
-                      rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
+                    rc.layer_avg_rate_mismatch[layer] +=
-                      rc.layer_avg_rate_mismatch[layer] +=
+                        fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
-                          fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
+                        rc.layer_pfb[layer];
-                          rc.layer_pfb[layer];
+                    ++rc.layer_enc_frames[layer];
                      ++rc.layer_enc_frames[layer];
                    }
                  }
                }
              }
@ -867,9 +816,9 @@ int main(int argc, const char **argv) {
              // window of size rc->window, shifted by rc->window / 2.
              // Ignore first window segment, due to key frame.
              if (frame_cnt > (unsigned int)rc.window_size) {
                tl = layer_id.temporal_layer_id;
                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-                  if (cx_pkt->data.frame.spatial_layer_encoded[sl])
+                  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
                    sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
                }
                if (frame_cnt % rc.window_size == 0) {
                  rc.window_count += 1;
@ -884,6 +833,7 @@ int main(int argc, const char **argv) {
              // Second shifted window.
              if (frame_cnt >
                  (unsigned int)(rc.window_size + rc.window_size / 2)) {
                tl = layer_id.temporal_layer_id;
                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
                  sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
                }
@ -950,8 +900,8 @@ int main(int argc, const char **argv) {
  }
 #if OUTPUT_RC_STATS
  if (svc_ctx.output_rc_stat) {
-    for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
-      vpx_video_writer_close(outfile[sl]);
+      vpx_video_writer_close(outfile[tl]);
    }
  }
 #endif
@ -960,7 +910,7 @@ int main(int argc, const char **argv) {
         1000000 * (double)frame_cnt / (double)cx_time);
  vpx_img_free(&raw);
  // display average size, psnr
-  vpx_svc_dump_statistics(&svc_ctx);
+  printf("%s", vpx_svc_dump_statistics(&svc_ctx));
  vpx_svc_release(&svc_ctx);
  return EXIT_SUCCESS;
 }
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@ -22,34 +22,21 @@
 #include "../vpx_ports/vpx_timer.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
 #include "vpx_ports/bitops.h"
 #include "../tools_common.h"
 #include "../video_writer.h"
 #define ROI_MAP 0
 #define zero(Dest) memset(&Dest, 0, sizeof(Dest));
 static const char *exec_name;
 void usage_exit(void) { exit(EXIT_FAILURE); }
-// Denoiser states for vp8, for temporal denoising.
+// Denoiser states, for temporal denoising.
-enum denoiserStateVp8 {
+enum denoiserState {
-  kVp8DenoiserOff,
+  kDenoiserOff,
-  kVp8DenoiserOnYOnly,
+  kDenoiserOnYOnly,
-  kVp8DenoiserOnYUV,
+  kDenoiserOnYUV,
-  kVp8DenoiserOnYUVAggressive,
+  kDenoiserOnYUVAggressive,
-  kVp8DenoiserOnAdaptive
+  kDenoiserOnAdaptive
 };
 // Denoiser states for vp9, for temporal denoising.
 enum denoiserStateVp9 {
  kVp9DenoiserOff,
  kVp9DenoiserOnYOnly,
  // For SVC: denoise the top two spatial layers.
  kVp9DenoiserOnYTwoSpatialLayers
 };
 static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 };
@ -102,10 +89,9 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
  for (i = 0; i < cfg->ts_number_layers; ++i) {
    if (i > 0) {
      rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
-      rc->layer_pfb[i] =
+      rc->layer_pfb[i] = 1000.0 * (rc->layer_target_bitrate[i] -
-          1000.0 *
+                                   rc->layer_target_bitrate[i - 1]) /
-          (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
+                         (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
          (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
    }
    rc->layer_input_frames[i] = 0;
    rc->layer_enc_frames[i] = 0;
@ -168,75 +154,6 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
    die("Error: Number of input frames not equal to output! \n");
 }
 #if ROI_MAP
 static void set_roi_map(const char *enc_name, vpx_codec_enc_cfg_t *cfg,
                        vpx_roi_map_t *roi) {
  unsigned int i, j;
  int block_size = 0;
  uint8_t is_vp8 = strncmp(enc_name, "vp8", 3) == 0 ? 1 : 0;
  uint8_t is_vp9 = strncmp(enc_name, "vp9", 3) == 0 ? 1 : 0;
  if (!is_vp8 && !is_vp9) {
    die("unsupported codec.");
  }
  zero(*roi);
  block_size = is_vp9 && !is_vp8 ? 8 : 16;
  // ROI is based on the segments (4 for vp8, 8 for vp9), smallest unit for
  // segment is 16x16 for vp8, 8x8 for vp9.
  roi->rows = (cfg->g_h + block_size - 1) / block_size;
  roi->cols = (cfg->g_w + block_size - 1) / block_size;
  // Applies delta QP on the segment blocks, varies from -63 to 63.
  // Setting to negative means lower QP (better quality).
  // Below we set delta_q to the extreme (-63) to show strong effect.
  // VP8 uses the first 4 segments. VP9 uses all 8 segments.
  zero(roi->delta_q);
  roi->delta_q[1] = -63;
  // Applies delta loopfilter strength on the segment blocks, varies from -63 to
  // 63. Setting to positive means stronger loopfilter. VP8 uses the first 4
  // segments. VP9 uses all 8 segments.
  zero(roi->delta_lf);
  if (is_vp8) {
    // Applies skip encoding threshold on the segment blocks, varies from 0 to
    // UINT_MAX. Larger value means more skipping of encoding is possible.
    // This skip threshold only applies on delta frames.
    zero(roi->static_threshold);
  }
  if (is_vp9) {
    // Apply skip segment. Setting to 1 means this block will be copied from
    // previous frame.
    zero(roi->skip);
  }
  if (is_vp9) {
    // Apply ref frame segment.
    // -1 : Do not apply this segment.
    //  0 : Froce using intra.
    //  1 : Force using last.
    //  2 : Force using golden.
    //  3 : Force using alfref but not used in non-rd pickmode for 0 lag.
    memset(roi->ref_frame, -1, sizeof(roi->ref_frame));
    roi->ref_frame[1] = 1;
  }
  // Use 2 states: 1 is center square, 0 is the rest.
  roi->roi_map =
      (uint8_t *)calloc(roi->rows * roi->cols, sizeof(*roi->roi_map));
  for (i = 0; i < roi->rows; ++i) {
    for (j = 0; j < roi->cols; ++j) {
      if (i > (roi->rows >> 2) && i < ((roi->rows * 3) >> 2) &&
          j > (roi->cols >> 2) && j < ((roi->cols * 3) >> 2)) {
        roi->roi_map[i * roi->cols + j] = 1;
      }
    }
  }
 }
 #endif
 // Temporal scaling parameters:
 // NOTE: The 3 prediction frames cannot be used interchangeably due to
 // differences in the way they are handled throughout the code. The
@ -578,7 +495,6 @@ int main(int argc, char **argv) {
  vpx_codec_err_t res;
  unsigned int width;
  unsigned int height;
  uint32_t error_resilient = 0;
  int speed;
  int frame_avail;
  int got_data;
@ -589,15 +505,16 @@ int main(int argc, char **argv) {
  int layering_mode = 0;
  int layer_flags[VPX_TS_MAX_PERIODICITY] = { 0 };
  int flag_periodicity = 1;
-#if ROI_MAP
+#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
  vpx_roi_map_t roi;
 #endif
  vpx_svc_layer_id_t layer_id = { 0, 0 };
 #else
  vpx_svc_layer_id_t layer_id = { 0 };
 #endif
  const VpxInterface *encoder = NULL;
  FILE *infile = NULL;
  struct RateControlMetrics rc;
  int64_t cx_time = 0;
-  const int min_args_base = 13;
+  const int min_args_base = 12;
 #if CONFIG_VP9_HIGHBITDEPTH
  vpx_bit_depth_t bit_depth = VPX_BITS_8;
  int input_bit_depth = 8;
@ -609,21 +526,17 @@ int main(int argc, char **argv) {
  double sum_bitrate2 = 0.0;
  double framerate = 30.0;
  zero(rc.layer_target_bitrate);
  exec_name = argv[0];
  // Check usage and arguments.
  if (argc < min_args) {
 #if CONFIG_VP9_HIGHBITDEPTH
    die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
-        "<rate_num> <rate_den> <speed> <frame_drop_threshold> "
+        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <threads> <mode> "
        "<error_resilient> <threads> <mode> "
        "<Rate_0> ... <Rate_nlayers-1> <bit-depth> \n",
        argv[0]);
 #else
    die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
-        "<rate_num> <rate_den> <speed> <frame_drop_threshold> "
+        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <threads> <mode> "
        "<error_resilient> <threads> <mode> "
        "<Rate_0> ... <Rate_nlayers-1> \n",
        argv[0]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@ -640,9 +553,9 @@ int main(int argc, char **argv) {
    die("Invalid resolution: %d x %d", width, height);
  }
-  layering_mode = (int)strtol(argv[12], NULL, 0);
+  layering_mode = (int)strtol(argv[11], NULL, 0);
  if (layering_mode < 0 || layering_mode > 13) {
-    die("Invalid layering mode (0..12) %s", argv[12]);
+    die("Invalid layering mode (0..12) %s", argv[11]);
  }
  if (argc != min_args + mode_to_num_layers[layering_mode]) {
@ -706,11 +619,11 @@ int main(int argc, char **argv) {
  for (i = min_args_base;
       (int)i < min_args_base + mode_to_num_layers[layering_mode]; ++i) {
-    rc.layer_target_bitrate[i - 13] = (int)strtol(argv[i], NULL, 0);
+    rc.layer_target_bitrate[i - 12] = (int)strtol(argv[i], NULL, 0);
    if (strncmp(encoder->name, "vp8", 3) == 0)
-      cfg.ts_target_bitrate[i - 13] = rc.layer_target_bitrate[i - 13];
+      cfg.ts_target_bitrate[i - 12] = rc.layer_target_bitrate[i - 12];
    else if (strncmp(encoder->name, "vp9", 3) == 0)
-      cfg.layer_target_bitrate[i - 13] = rc.layer_target_bitrate[i - 13];
+      cfg.layer_target_bitrate[i - 12] = rc.layer_target_bitrate[i - 12];
  }
  // Real time parameters.
@ -721,7 +634,7 @@ int main(int argc, char **argv) {
  if (strncmp(encoder->name, "vp9", 3) == 0) cfg.rc_max_quantizer = 52;
  cfg.rc_undershoot_pct = 50;
  cfg.rc_overshoot_pct = 50;
-  cfg.rc_buf_initial_sz = 600;
+  cfg.rc_buf_initial_sz = 500;
  cfg.rc_buf_optimal_sz = 600;
  cfg.rc_buf_sz = 1000;
@ -729,14 +642,10 @@ int main(int argc, char **argv) {
  cfg.rc_resize_allowed = 0;
  // Use 1 thread as default.
-  cfg.g_threads = (unsigned int)strtoul(argv[11], NULL, 0);
+  cfg.g_threads = (unsigned int)strtoul(argv[10], NULL, 0);
  error_resilient = (uint32_t)strtoul(argv[10], NULL, 0);
  if (error_resilient != 0 && error_resilient != 1) {
    die("Invalid value for error resilient (0, 1): %d.", error_resilient);
  }
  // Enable error resilient mode.
-  cfg.g_error_resilient = error_resilient;
+  cfg.g_error_resilient = 1;
  cfg.g_lag_in_frames = 0;
  cfg.kf_mode = VPX_KF_AUTO;
@ -791,39 +700,18 @@ int main(int argc, char **argv) {
  if (strncmp(encoder->name, "vp8", 3) == 0) {
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
-    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kVp8DenoiserOff);
+    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
    vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0);
 #if ROI_MAP
    set_roi_map(encoder->name, &cfg, &roi);
    if (vpx_codec_control(&codec, VP8E_SET_ROI_MAP, &roi))
      die_codec(&codec, "Failed to set ROI map");
 #endif
  } else if (strncmp(encoder->name, "vp9", 3) == 0) {
    vpx_svc_extra_cfg_t svc_params;
    memset(&svc_params, 0, sizeof(svc_params));
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
    vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0);
    vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
    vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
-    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kVp9DenoiserOff);
+    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);
    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
    vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
-    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, get_msb(cfg.g_threads));
+    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
 #if ROI_MAP
    set_roi_map(encoder->name, &cfg, &roi);
    if (vpx_codec_control(&codec, VP9E_SET_ROI_MAP, &roi))
      die_codec(&codec, "Failed to set ROI map");
    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 0);
 #endif
    // TODO(marpan/jianj): There is an issue with row-mt for low resolutons at
    // high speed settings, disable its use for those cases for now.
    if (cfg.g_threads > 1 && ((cfg.g_w > 320 && cfg.g_h > 240) || speed < 7))
      vpx_codec_control(&codec, VP9E_SET_ROW_MT, 1);
    else
      vpx_codec_control(&codec, VP9E_SET_ROW_MT, 0);
    if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1 : 0))
      die_codec(&codec, "Failed to set SVC");
    for (i = 0; i < cfg.ts_number_layers; ++i) {
@ -842,7 +730,7 @@ int main(int argc, char **argv) {
  // For generating smaller key frames, use a smaller max_intra_size_pct
  // value, like 100 or 200.
  {
-    const int max_intra_size_pct = 1000;
+    const int max_intra_size_pct = 900;
    vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
                      max_intra_size_pct);
  }
@ -852,8 +740,10 @@ int main(int argc, char **argv) {
    struct vpx_usec_timer timer;
    vpx_codec_iter_t iter = NULL;
    const vpx_codec_cx_pkt_t *pkt;
 #if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
    // Update the temporal layer_id. No spatial layers in this test.
    layer_id.spatial_layer_id = 0;
 #endif
    layer_id.temporal_layer_id =
        cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
    if (strncmp(encoder->name, "vp9", 3) == 0) {
@ -945,8 +835,5 @@ int main(int argc, char **argv) {
  for (i = 0; i < cfg.ts_number_layers; ++i) vpx_video_writer_close(outfile[i]);
  vpx_img_free(&raw);
 #if ROI_MAP
  free(roi.roi_map);
 #endif
  return EXIT_SUCCESS;
 }
--- a/libs.doxy_template
+++ b/libs.doxy_template
@ -943,6 +943,18 @@ GENERATE_XML           = NO
 XML_OUTPUT             = xml
 # The XML_SCHEMA tag can be used to specify an XML schema,
 # which can be used by a validating XML parser to check the
 # syntax of the XML files.
 XML_SCHEMA             =
 # The XML_DTD tag can be used to specify an XML DTD,
 # which can be used by a validating XML parser to check the
 # syntax of the XML files.
 XML_DTD                =
 # If the XML_PROGRAMLISTING tag is set to YES Doxygen will
 # dump the program listings (including syntax highlighting
 # and cross-referencing information) to the XML output. Note that
--- a/libs.mk
+++ b/libs.mk
@ -88,7 +88,7 @@ ifeq ($(CONFIG_VP9_ENCODER),yes)
  CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS))
  CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h
  INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
-  INSTALL-LIBS-yes += include/vpx/svc_context.h
+  INSTALL-LIBS-$(CONFIG_SPATIAL_SVC) += include/vpx/svc_context.h
  INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
  CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
  CODEC_DOC_SECTIONS += vp9 vp9_encoder
@ -149,11 +149,12 @@ CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c
 INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
 ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += vpx_dsp/x86/bitdepth_conversion_sse2.asm
 endif
 CODEC_EXPORTS-yes += vpx/exports_com
 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
-CODEC_EXPORTS-$(CONFIG_VP9_ENCODER) += vpx/exports_spatial_svc
+ifeq ($(CONFIG_SPATIAL_SVC),yes)
 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_spatial_svc
 endif
 CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
 INSTALL-LIBS-yes += include/vpx/vpx_codec.h
@ -186,13 +187,6 @@ libvpx_srcs.txt:
 	@echo $(CODEC_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
 CLEAN-OBJS += libvpx_srcs.txt
 # Assembly files that are included, but don't define symbols themselves.
 # Filtered out to avoid Windows build warnings.
 ASM_INCLUDES := \
    third_party/x86inc/x86inc.asm \
    vpx_config.asm \
    vpx_ports/x86_abi_support.asm \
    vpx_dsp/x86/bitdepth_conversion_sse2.asm \
 ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
 ifeq ($(CONFIG_MSVS),yes)
@ -204,7 +198,12 @@ vpx.def: $(call enabled,CODEC_EXPORTS)
            --out=$@ $^
 CLEAN-OBJS += vpx.def
-vpx.$(VCPROJ_SFX): VCPROJ_SRCS=$(filter-out $(addprefix %, $(ASM_INCLUDES)), $^)
+# Assembly files that are included, but don't define symbols themselves.
 # Filtered out to avoid Visual Studio build warnings.
 ASM_INCLUDES := \
    third_party/x86inc/x86inc.asm \
    vpx_config.asm \
    vpx_ports/x86_abi_support.asm \
 vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
 	@echo "    [CREATE] $@"
@ -218,15 +217,7 @@ vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
            --ver=$(CONFIG_VS_VERSION) \
            --src-path-bare="$(SRC_PATH_BARE)" \
            --out=$@ $(CFLAGS) \
-            $(filter $(SRC_PATH_BARE)/vp8/%.c, $(VCPROJ_SRCS)) \
+            $(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) \
            $(filter $(SRC_PATH_BARE)/vp8/%.h, $(VCPROJ_SRCS)) \
            $(filter $(SRC_PATH_BARE)/vp9/%.c, $(VCPROJ_SRCS)) \
            $(filter $(SRC_PATH_BARE)/vp9/%.h, $(VCPROJ_SRCS)) \
            $(filter $(SRC_PATH_BARE)/vpx/%, $(VCPROJ_SRCS)) \
            $(filter $(SRC_PATH_BARE)/vpx_dsp/%, $(VCPROJ_SRCS)) \
            $(filter-out $(addprefix $(SRC_PATH_BARE)/, \
                           vp8/%.c vp8/%.h vp9/%.c vp9/%.h vpx/% vpx_dsp/%), \
              $(VCPROJ_SRCS)) \
            --src-path-bare="$(SRC_PATH_BARE)" \
 PROJECTS-yes += vpx.$(VCPROJ_SFX)
@ -236,12 +227,12 @@ vpx.$(VCPROJ_SFX): $(RTCD)
 endif
 else
-LIBVPX_OBJS=$(call objs, $(filter-out $(ASM_INCLUDES), $(CODEC_SRCS)))
+LIBVPX_OBJS=$(call objs,$(CODEC_SRCS))
 OBJS-yes += $(LIBVPX_OBJS)
 LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
 $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
-SO_VERSION_MAJOR := 5
+SO_VERSION_MAJOR := 4
 SO_VERSION_MINOR := 0
 SO_VERSION_PATCH := 0
 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
@ -400,7 +391,7 @@ LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
 LIBVPX_TEST_BIN=./test_libvpx$(EXE_SFX)
 LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
                     $(call enabled,LIBVPX_TEST_DATA))
-libvpx_test_data_url=https://storage.googleapis.com/downloads.webmproject.org/test_data/libvpx/$(1)
+libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)
 TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
 TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
@ -413,16 +404,8 @@ CLEAN-OBJS += libvpx_test_srcs.txt
 $(LIBVPX_TEST_DATA): $(SRC_PATH_BARE)/test/test-data.sha1
 	@echo "    [DOWNLOAD] $@"
-	# Attempt to download the file using curl, retrying once if it fails for a
+	$(qexec)trap 'rm -f $@' INT TERM &&\
-	# partial file (18).
+            curl -L -o $@ $(call libvpx_test_data_url,$(@F))
 	$(qexec)( \
 	  trap 'rm -f $@' INT TERM; \
 	  curl="curl --retry 1 -L -o $@ $(call libvpx_test_data_url,$(@F))"; \
 	  $$curl; \
 	  case "$$?" in \
 	    18) $$curl -C -;; \
 	  esac \
 	)
 testdata:: $(LIBVPX_TEST_DATA)
 	$(qexec)[ -x "$$(which sha1sum)" ] && sha1sum=sha1sum;\
--- a/rate_hist.c
+++ b/rate_hist.c
@ -37,13 +37,7 @@ struct rate_hist {
 struct rate_hist *init_rate_histogram(const vpx_codec_enc_cfg_t *cfg,
                                      const vpx_rational_t *fps) {
  int i;
-  struct rate_hist *hist = calloc(1, sizeof(*hist));
+  struct rate_hist *hist = malloc(sizeof(*hist));
  if (hist == NULL || cfg == NULL || fps == NULL || fps->num == 0 ||
      fps->den == 0) {
    destroy_rate_histogram(hist);
    return NULL;
  }
  // Determine the number of samples in the buffer. Use the file's framerate
  // to determine the number of frames in rc_buf_sz milliseconds, with an
@ -86,11 +80,7 @@ void update_rate_histogram(struct rate_hist *hist,
                      (uint64_t)cfg->g_timebase.num /
                      (uint64_t)cfg->g_timebase.den;
-  int idx;
+  int idx = hist->frames++ % hist->samples;
  if (hist == NULL || cfg == NULL || pkt == NULL) return;
  idx = hist->frames++ % hist->samples;
  hist->pts[idx] = now;
  hist->sz[idx] = (int)pkt->data.frame.sz;
@ -126,14 +116,9 @@ void update_rate_histogram(struct rate_hist *hist,
 static int merge_hist_buckets(struct hist_bucket *bucket, int max_buckets,
                              int *num_buckets) {
  int small_bucket = 0, merge_bucket = INT_MAX, big_bucket = 0;
-  int buckets;
+  int buckets = *num_buckets;
  int i;
  assert(bucket != NULL);
  assert(num_buckets != NULL);
  buckets = *num_buckets;
  /* Find the extrema for this list of buckets */
  big_bucket = small_bucket = 0;
  for (i = 0; i < buckets; i++) {
@ -196,8 +181,6 @@ static void show_histogram(const struct hist_bucket *bucket, int buckets,
  const char *pat1, *pat2;
  int i;
  assert(bucket != NULL);
  switch ((int)(log(bucket[buckets - 1].high) / log(10)) + 1) {
    case 1:
    case 2:
@ -276,8 +259,6 @@ void show_rate_histogram(struct rate_hist *hist, const vpx_codec_enc_cfg_t *cfg,
  int i, scale;
  int buckets = 0;
  if (hist == NULL || cfg == NULL) return;
  for (i = 0; i < RATE_BINS; i++) {
    if (hist->bucket[i].low == INT_MAX) continue;
    hist->bucket[buckets++] = hist->bucket[i];
--- a/test/acm_random.h
+++ b/test/acm_random.h
@ -11,10 +11,6 @@
 #ifndef TEST_ACM_RANDOM_H_
 #define TEST_ACM_RANDOM_H_
 #include <assert.h>
 #include <limits>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "vpx/vpx_integer.h"
@ -54,13 +50,6 @@ class ACMRandom {
    return r < 128 ? r << 4 : r >> 4;
  }
  uint32_t RandRange(const uint32_t range) {
    // testing::internal::Random::Generate provides values in the range
    // testing::internal::Random::kMaxRange.
    assert(range <= testing::internal::Random::kMaxRange);
    return random_.Generate(range);
  }
  int PseudoUniform(int range) { return random_.Generate(range); }
  int operator()(int n) { return PseudoUniform(n); }
--- a/test/android/Android.mk
+++ b/test/android/Android.mk
@ -32,7 +32,6 @@ LOCAL_CPP_EXTENSION := .cc
 LOCAL_MODULE := gtest
 LOCAL_C_INCLUDES := $(LOCAL_PATH)/third_party/googletest/src/
 LOCAL_C_INCLUDES += $(LOCAL_PATH)/third_party/googletest/src/include/
 LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/third_party/googletest/src/include/
 LOCAL_SRC_FILES := ./third_party/googletest/src/src/gtest-all.cc
 include $(BUILD_STATIC_LIBRARY)
--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@ -14,7 +14,6 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vp9_rtcd.h"
 #include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
@ -23,7 +22,6 @@
 #include "test/register_state_check.h"
 #include "test/util.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/vpx_timer.h"
 using libvpx_test::ACMRandom;
@ -91,7 +89,7 @@ class AverageTestBase : public ::testing::Test {
 };
 typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch);
-typedef ::testing::tuple<int, int, int, int, AverageFunction> AvgFunc;
+typedef std::tr1::tuple<int, int, int, int, AverageFunction> AvgFunc;
 class AverageTest : public AverageTestBase,
                    public ::testing::WithParamInterface<AvgFunc> {
@ -122,7 +120,7 @@ class AverageTest : public AverageTestBase,
 typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref,
                              const int ref_stride, const int height);
-typedef ::testing::tuple<int, IntProRowFunc, IntProRowFunc> IntProRowParam;
+typedef std::tr1::tuple<int, IntProRowFunc, IntProRowFunc> IntProRowParam;
 class IntProRowTest : public AverageTestBase,
                      public ::testing::WithParamInterface<IntProRowParam> {
@ -164,7 +162,7 @@ class IntProRowTest : public AverageTestBase,
 typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width);
-typedef ::testing::tuple<int, IntProColFunc, IntProColFunc> IntProColParam;
+typedef std::tr1::tuple<int, IntProColFunc, IntProColFunc> IntProColParam;
 class IntProColTest : public AverageTestBase,
                      public ::testing::WithParamInterface<IntProColParam> {
@ -188,8 +186,8 @@ class IntProColTest : public AverageTestBase,
  int16_t sum_c_;
 };
-typedef int (*SatdFunc)(const tran_low_t *coeffs, int length);
+typedef int (*SatdFunc)(const int16_t *coeffs, int length);
-typedef ::testing::tuple<int, SatdFunc> SatdTestParam;
+typedef std::tr1::tuple<int, SatdFunc> SatdTestParam;
 class SatdTest : public ::testing::Test,
                 public ::testing::WithParamInterface<SatdTestParam> {
@ -198,7 +196,7 @@ class SatdTest : public ::testing::Test,
    satd_size_ = GET_PARAM(0);
    satd_func_ = GET_PARAM(1);
    rnd_.Reset(ACMRandom::DeterministicSeed());
-    src_ = reinterpret_cast<tran_low_t *>(
+    src_ = reinterpret_cast<int16_t *>(
        vpx_memalign(16, sizeof(*src_) * satd_size_));
    ASSERT_TRUE(src_ != NULL);
  }
@ -208,15 +206,12 @@ class SatdTest : public ::testing::Test,
    vpx_free(src_);
  }
-  void FillConstant(const tran_low_t val) {
+  void FillConstant(const int16_t val) {
    for (int i = 0; i < satd_size_; ++i) src_[i] = val;
  }
  void FillRandom() {
-    for (int i = 0; i < satd_size_; ++i) {
+    for (int i = 0; i < satd_size_; ++i) src_[i] = rnd_.Rand16();
      const int16_t tmp = rnd_.Rand16();
      src_[i] = (tran_low_t)tmp;
    }
  }
  void Check(const int expected) {
@ -228,66 +223,11 @@ class SatdTest : public ::testing::Test,
  int satd_size_;
 private:
-  tran_low_t *src_;
+  int16_t *src_;
  SatdFunc satd_func_;
  ACMRandom rnd_;
 };
 typedef int64_t (*BlockErrorFunc)(const tran_low_t *coeff,
                                  const tran_low_t *dqcoeff, int block_size);
 typedef ::testing::tuple<int, BlockErrorFunc> BlockErrorTestFPParam;
 class BlockErrorTestFP
    : public ::testing::Test,
      public ::testing::WithParamInterface<BlockErrorTestFPParam> {
 protected:
  virtual void SetUp() {
    txfm_size_ = GET_PARAM(0);
    block_error_func_ = GET_PARAM(1);
    rnd_.Reset(ACMRandom::DeterministicSeed());
    coeff_ = reinterpret_cast<tran_low_t *>(
        vpx_memalign(16, sizeof(*coeff_) * txfm_size_));
    dqcoeff_ = reinterpret_cast<tran_low_t *>(
        vpx_memalign(16, sizeof(*dqcoeff_) * txfm_size_));
    ASSERT_TRUE(coeff_ != NULL);
    ASSERT_TRUE(dqcoeff_ != NULL);
  }
  virtual void TearDown() {
    libvpx_test::ClearSystemState();
    vpx_free(coeff_);
    vpx_free(dqcoeff_);
  }
  void FillConstant(const tran_low_t coeff_val, const tran_low_t dqcoeff_val) {
    for (int i = 0; i < txfm_size_; ++i) coeff_[i] = coeff_val;
    for (int i = 0; i < txfm_size_; ++i) dqcoeff_[i] = dqcoeff_val;
  }
  void FillRandom() {
    // Just two fixed seeds
    rnd_.Reset(0xb0b9);
    for (int i = 0; i < txfm_size_; ++i) coeff_[i] = rnd_.Rand16() >> 1;
    rnd_.Reset(0xb0c8);
    for (int i = 0; i < txfm_size_; ++i) dqcoeff_[i] = rnd_.Rand16() >> 1;
  }
  void Check(const int64_t expected) {
    int64_t total;
    ASM_REGISTER_STATE_CHECK(
        total = block_error_func_(coeff_, dqcoeff_, txfm_size_));
    EXPECT_EQ(expected, total);
  }
  int txfm_size_;
 private:
  tran_low_t *coeff_;
  tran_low_t *dqcoeff_;
  BlockErrorFunc block_error_func_;
  ACMRandom rnd_;
 };
 uint8_t *AverageTestBase::source_data_ = NULL;
 TEST_P(AverageTest, MinValue) {
@ -368,67 +308,7 @@ TEST_P(SatdTest, Random) {
  Check(expected);
 }
-TEST_P(SatdTest, DISABLED_Speed) {
+using std::tr1::make_tuple;
  const int kCountSpeedTestBlock = 20000;
  vpx_usec_timer timer;
  DECLARE_ALIGNED(16, tran_low_t, coeff[1024]);
  const int blocksize = GET_PARAM(0);
  vpx_usec_timer_start(&timer);
  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
    GET_PARAM(1)(coeff, blocksize);
  }
  vpx_usec_timer_mark(&timer);
  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  printf("blocksize: %4d time: %4d us\n", blocksize, elapsed_time);
 }
 TEST_P(BlockErrorTestFP, MinValue) {
  const int64_t kMin = -32640;
  const int64_t expected = kMin * kMin * txfm_size_;
  FillConstant(kMin, 0);
  Check(expected);
 }
 TEST_P(BlockErrorTestFP, MaxValue) {
  const int64_t kMax = 32640;
  const int64_t expected = kMax * kMax * txfm_size_;
  FillConstant(kMax, 0);
  Check(expected);
 }
 TEST_P(BlockErrorTestFP, Random) {
  int64_t expected;
  switch (txfm_size_) {
    case 16: expected = 2051681432; break;
    case 64: expected = 11075114379; break;
    case 256: expected = 44386271116; break;
    case 1024: expected = 184774996089; break;
    default:
      FAIL() << "Invalid satd size (" << txfm_size_
             << ") valid: 16/64/256/1024";
  }
  FillRandom();
  Check(expected);
 }
 TEST_P(BlockErrorTestFP, DISABLED_Speed) {
  const int kCountSpeedTestBlock = 20000;
  vpx_usec_timer timer;
  DECLARE_ALIGNED(16, tran_low_t, coeff[1024]);
  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[1024]);
  const int blocksize = GET_PARAM(0);
  vpx_usec_timer_start(&timer);
  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
    GET_PARAM(1)(coeff, dqcoeff, blocksize);
  }
  vpx_usec_timer_mark(&timer);
  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  printf("blocksize: %4d time: %4d us\n", blocksize, elapsed_time);
 }
 using ::testing::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, AverageTest,
@ -441,13 +321,6 @@ INSTANTIATE_TEST_CASE_P(C, SatdTest,
                                          make_tuple(256, &vpx_satd_c),
                                          make_tuple(1024, &vpx_satd_c)));
 INSTANTIATE_TEST_CASE_P(
    C, BlockErrorTestFP,
    ::testing::Values(make_tuple(16, &vp9_block_error_fp_c),
                      make_tuple(64, &vp9_block_error_fp_c),
                      make_tuple(256, &vp9_block_error_fp_c),
                      make_tuple(1024, &vp9_block_error_fp_c)));
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
    SSE2, AverageTest,
@ -477,28 +350,6 @@ INSTANTIATE_TEST_CASE_P(SSE2, SatdTest,
                                          make_tuple(64, &vpx_satd_sse2),
                                          make_tuple(256, &vpx_satd_sse2),
                                          make_tuple(1024, &vpx_satd_sse2)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, BlockErrorTestFP,
    ::testing::Values(make_tuple(16, &vp9_block_error_fp_sse2),
                      make_tuple(64, &vp9_block_error_fp_sse2),
                      make_tuple(256, &vp9_block_error_fp_sse2),
                      make_tuple(1024, &vp9_block_error_fp_sse2)));
 #endif  // HAVE_SSE2
 #if HAVE_AVX2
 INSTANTIATE_TEST_CASE_P(AVX2, SatdTest,
                        ::testing::Values(make_tuple(16, &vpx_satd_avx2),
                                          make_tuple(64, &vpx_satd_avx2),
                                          make_tuple(256, &vpx_satd_avx2),
                                          make_tuple(1024, &vpx_satd_avx2)));
 INSTANTIATE_TEST_CASE_P(
    AVX2, BlockErrorTestFP,
    ::testing::Values(make_tuple(16, &vp9_block_error_fp_avx2),
                      make_tuple(64, &vp9_block_error_fp_avx2),
                      make_tuple(256, &vp9_block_error_fp_avx2),
                      make_tuple(1024, &vp9_block_error_fp_avx2)));
 #endif
 #if HAVE_NEON
@ -530,18 +381,7 @@ INSTANTIATE_TEST_CASE_P(NEON, SatdTest,
                                          make_tuple(64, &vpx_satd_neon),
                                          make_tuple(256, &vpx_satd_neon),
                                          make_tuple(1024, &vpx_satd_neon)));
-
+#endif
 // TODO(jianj): Remove the highbitdepth flag once the SIMD functions are
 // in place.
 #if !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    NEON, BlockErrorTestFP,
    ::testing::Values(make_tuple(16, &vp9_block_error_fp_neon),
                      make_tuple(64, &vp9_block_error_fp_neon),
                      make_tuple(256, &vp9_block_error_fp_neon),
                      make_tuple(1024, &vp9_block_error_fp_neon)));
 #endif  // !CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_NEON
 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(
@ -552,30 +392,6 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(16, 16, 0, 4, &vpx_avg_4x4_msa),
                      make_tuple(16, 16, 5, 4, &vpx_avg_4x4_msa),
                      make_tuple(32, 32, 15, 4, &vpx_avg_4x4_msa)));
-
+#endif
 INSTANTIATE_TEST_CASE_P(
    MSA, IntProRowTest,
    ::testing::Values(make_tuple(16, &vpx_int_pro_row_msa, &vpx_int_pro_row_c),
                      make_tuple(32, &vpx_int_pro_row_msa, &vpx_int_pro_row_c),
                      make_tuple(64, &vpx_int_pro_row_msa,
                                 &vpx_int_pro_row_c)));
 INSTANTIATE_TEST_CASE_P(
    MSA, IntProColTest,
    ::testing::Values(make_tuple(16, &vpx_int_pro_col_msa, &vpx_int_pro_col_c),
                      make_tuple(32, &vpx_int_pro_col_msa, &vpx_int_pro_col_c),
                      make_tuple(64, &vpx_int_pro_col_msa,
                                 &vpx_int_pro_col_c)));
 // TODO(jingning): Remove the highbitdepth flag once the SIMD functions are
 // in place.
 #if !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(MSA, SatdTest,
                        ::testing::Values(make_tuple(16, &vpx_satd_msa),
                                          make_tuple(64, &vpx_satd_msa),
                                          make_tuple(256, &vpx_satd_msa),
                                          make_tuple(1024, &vpx_satd_msa)));
 #endif  // !CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_MSA
 }  // namespace
--- a/test/blockiness_test.cc
+++ b/test/blockiness_test.cc
@ -141,7 +141,7 @@ class BlockinessTestBase : public ::testing::Test {
 };
 #if CONFIG_VP9_ENCODER
-typedef ::testing::tuple<int, int> BlockinessParam;
+typedef std::tr1::tuple<int, int> BlockinessParam;
 class BlockinessVP9Test
    : public BlockinessTestBase,
      public ::testing::WithParamInterface<BlockinessParam> {
@ -208,14 +208,14 @@ TEST_P(BlockinessVP9Test, WorstCaseBlockiness) {
 }
 #endif  // CONFIG_VP9_ENCODER
-using ::testing::make_tuple;
+using std::tr1::make_tuple;
 //------------------------------------------------------------------------------
 // C functions
 #if CONFIG_VP9_ENCODER
 const BlockinessParam c_vp9_tests[] = {
-  make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238)
+  make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238),
 };
 INSTANTIATE_TEST_CASE_P(C, BlockinessVP9Test, ::testing::ValuesIn(c_vp9_tests));
 #endif
--- a/test/buffer.h
+++ b/test/buffer.h
@ -1,382 +0,0 @@
 /*
 *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #ifndef TEST_BUFFER_H_
 #define TEST_BUFFER_H_
 #include <stdio.h>
 #include <limits>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "test/acm_random.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
 namespace libvpx_test {
 template <typename T>
 class Buffer {
 public:
  Buffer(int width, int height, int top_padding, int left_padding,
         int right_padding, int bottom_padding)
      : width_(width), height_(height), top_padding_(top_padding),
        left_padding_(left_padding), right_padding_(right_padding),
        bottom_padding_(bottom_padding), alignment_(0), padding_value_(0),
        stride_(0), raw_size_(0), num_elements_(0), raw_buffer_(NULL) {}
  Buffer(int width, int height, int top_padding, int left_padding,
         int right_padding, int bottom_padding, unsigned int alignment)
      : width_(width), height_(height), top_padding_(top_padding),
        left_padding_(left_padding), right_padding_(right_padding),
        bottom_padding_(bottom_padding), alignment_(alignment),
        padding_value_(0), stride_(0), raw_size_(0), num_elements_(0),
        raw_buffer_(NULL) {}
  Buffer(int width, int height, int padding)
      : width_(width), height_(height), top_padding_(padding),
        left_padding_(padding), right_padding_(padding),
        bottom_padding_(padding), alignment_(0), padding_value_(0), stride_(0),
        raw_size_(0), num_elements_(0), raw_buffer_(NULL) {}
  Buffer(int width, int height, int padding, unsigned int alignment)
      : width_(width), height_(height), top_padding_(padding),
        left_padding_(padding), right_padding_(padding),
        bottom_padding_(padding), alignment_(alignment), padding_value_(0),
        stride_(0), raw_size_(0), num_elements_(0), raw_buffer_(NULL) {}
  ~Buffer() {
    if (alignment_) {
      vpx_free(raw_buffer_);
    } else {
      delete[] raw_buffer_;
    }
  }
  T *TopLeftPixel() const;
  int stride() const { return stride_; }
  // Set the buffer (excluding padding) to 'value'.
  void Set(const T value);
  // Set the buffer (excluding padding) to the output of ACMRandom function
  // 'rand_func'.
  void Set(ACMRandom *rand_class, T (ACMRandom::*rand_func)());
  // Set the buffer (excluding padding) to the output of ACMRandom function
  // 'RandRange' with range 'low' to 'high' which typically must be within
  // testing::internal::Random::kMaxRange (1u << 31). However, because we want
  // to allow negative low (and high) values, it is restricted to INT32_MAX
  // here.
  void Set(ACMRandom *rand_class, const T low, const T high);
  // Copy the contents of Buffer 'a' (excluding padding).
  void CopyFrom(const Buffer<T> &a);
  void DumpBuffer() const;
  // Highlight the differences between two buffers if they are the same size.
  void PrintDifference(const Buffer<T> &a) const;
  bool HasPadding() const;
  // Sets all the values in the buffer to 'padding_value'.
  void SetPadding(const T padding_value);
  // Checks if all the values (excluding padding) are equal to 'value' if the
  // Buffers are the same size.
  bool CheckValues(const T value) const;
  // Check that padding matches the expected value or there is no padding.
  bool CheckPadding() const;
  // Compare the non-padding portion of two buffers if they are the same size.
  bool CheckValues(const Buffer<T> &a) const;
  bool Init() {
    if (raw_buffer_ != NULL) return false;
    EXPECT_GT(width_, 0);
    EXPECT_GT(height_, 0);
    EXPECT_GE(top_padding_, 0);
    EXPECT_GE(left_padding_, 0);
    EXPECT_GE(right_padding_, 0);
    EXPECT_GE(bottom_padding_, 0);
    stride_ = left_padding_ + width_ + right_padding_;
    num_elements_ = stride_ * (top_padding_ + height_ + bottom_padding_);
    raw_size_ = num_elements_ * sizeof(T);
    if (alignment_) {
      EXPECT_GE(alignment_, sizeof(T));
      // Ensure alignment of the first value will be preserved.
      EXPECT_EQ((left_padding_ * sizeof(T)) % alignment_, 0u);
      // Ensure alignment of the subsequent rows will be preserved when there is
      // a stride.
      if (stride_ != width_) {
        EXPECT_EQ((stride_ * sizeof(T)) % alignment_, 0u);
      }
      raw_buffer_ = reinterpret_cast<T *>(vpx_memalign(alignment_, raw_size_));
    } else {
      raw_buffer_ = new (std::nothrow) T[num_elements_];
    }
    EXPECT_TRUE(raw_buffer_ != NULL);
    SetPadding(std::numeric_limits<T>::max());
    return !::testing::Test::HasFailure();
  }
 private:
  bool BufferSizesMatch(const Buffer<T> &a) const;
  const int width_;
  const int height_;
  const int top_padding_;
  const int left_padding_;
  const int right_padding_;
  const int bottom_padding_;
  const unsigned int alignment_;
  T padding_value_;
  int stride_;
  int raw_size_;
  int num_elements_;
  T *raw_buffer_;
 };
 template <typename T>
 T *Buffer<T>::TopLeftPixel() const {
  if (!raw_buffer_) return NULL;
  return raw_buffer_ + (top_padding_ * stride_) + left_padding_;
 }
 template <typename T>
 void Buffer<T>::Set(const T value) {
  if (!raw_buffer_) return;
  T *src = TopLeftPixel();
  for (int height = 0; height < height_; ++height) {
    for (int width = 0; width < width_; ++width) {
      src[width] = value;
    }
    src += stride_;
  }
 }
 template <typename T>
 void Buffer<T>::Set(ACMRandom *rand_class, T (ACMRandom::*rand_func)()) {
  if (!raw_buffer_) return;
  T *src = TopLeftPixel();
  for (int height = 0; height < height_; ++height) {
    for (int width = 0; width < width_; ++width) {
      src[width] = (*rand_class.*rand_func)();
    }
    src += stride_;
  }
 }
 template <typename T>
 void Buffer<T>::Set(ACMRandom *rand_class, const T low, const T high) {
  if (!raw_buffer_) return;
  EXPECT_LE(low, high);
  EXPECT_LE(static_cast<int64_t>(high) - low,
            std::numeric_limits<int32_t>::max());
  T *src = TopLeftPixel();
  for (int height = 0; height < height_; ++height) {
    for (int width = 0; width < width_; ++width) {
      // 'low' will be promoted to unsigned given the return type of RandRange.
      // Store the value as an int to avoid unsigned overflow warnings when
      // 'low' is negative.
      const int32_t value =
          static_cast<int32_t>((*rand_class).RandRange(high - low));
      src[width] = static_cast<T>(value + low);
    }
    src += stride_;
  }
 }
 template <typename T>
 void Buffer<T>::CopyFrom(const Buffer<T> &a) {
  if (!raw_buffer_) return;
  if (!BufferSizesMatch(a)) return;
  T *a_src = a.TopLeftPixel();
  T *b_src = this->TopLeftPixel();
  for (int height = 0; height < height_; ++height) {
    for (int width = 0; width < width_; ++width) {
      b_src[width] = a_src[width];
    }
    a_src += a.stride();
    b_src += this->stride();
  }
 }
 template <typename T>
 void Buffer<T>::DumpBuffer() const {
  if (!raw_buffer_) return;
  for (int height = 0; height < height_ + top_padding_ + bottom_padding_;
       ++height) {
    for (int width = 0; width < stride_; ++width) {
      printf("%4d", raw_buffer_[height + width * stride_]);
    }
    printf("\n");
  }
 }
 template <typename T>
 bool Buffer<T>::HasPadding() const {
  if (!raw_buffer_) return false;
  return top_padding_ || left_padding_ || right_padding_ || bottom_padding_;
 }
 template <typename T>
 void Buffer<T>::PrintDifference(const Buffer<T> &a) const {
  if (!raw_buffer_) return;
  if (!BufferSizesMatch(a)) return;
  T *a_src = a.TopLeftPixel();
  T *b_src = TopLeftPixel();
  printf("This buffer:\n");
  for (int height = 0; height < height_; ++height) {
    for (int width = 0; width < width_; ++width) {
      if (a_src[width] != b_src[width]) {
        printf("*%3d", b_src[width]);
      } else {
        printf("%4d", b_src[width]);
      }
    }
    printf("\n");
    a_src += a.stride();
    b_src += this->stride();
  }
  a_src = a.TopLeftPixel();
  b_src = TopLeftPixel();
  printf("Reference buffer:\n");
  for (int height = 0; height < height_; ++height) {
    for (int width = 0; width < width_; ++width) {
      if (a_src[width] != b_src[width]) {
        printf("*%3d", a_src[width]);
      } else {
        printf("%4d", a_src[width]);
      }
    }
    printf("\n");
    a_src += a.stride();
    b_src += this->stride();
  }
 }
 template <typename T>
 void Buffer<T>::SetPadding(const T padding_value) {
  if (!raw_buffer_) return;
  padding_value_ = padding_value;
  T *src = raw_buffer_;
  for (int i = 0; i < num_elements_; ++i) {
    src[i] = padding_value;
  }
 }
 template <typename T>
 bool Buffer<T>::CheckValues(const T value) const {
  if (!raw_buffer_) return false;
  T *src = TopLeftPixel();
  for (int height = 0; height < height_; ++height) {
    for (int width = 0; width < width_; ++width) {
      if (value != src[width]) {
        return false;
      }
    }
    src += stride_;
  }
  return true;
 }
 template <typename T>
 bool Buffer<T>::CheckPadding() const {
  if (!raw_buffer_) return false;
  if (!HasPadding()) return true;
  // Top padding.
  T const *top = raw_buffer_;
  for (int i = 0; i < stride_ * top_padding_; ++i) {
    if (padding_value_ != top[i]) {
      return false;
    }
  }
  // Left padding.
  T const *left = TopLeftPixel() - left_padding_;
  for (int height = 0; height < height_; ++height) {
    for (int width = 0; width < left_padding_; ++width) {
      if (padding_value_ != left[width]) {
        return false;
      }
    }
    left += stride_;
  }
  // Right padding.
  T const *right = TopLeftPixel() + width_;
  for (int height = 0; height < height_; ++height) {
    for (int width = 0; width < right_padding_; ++width) {
      if (padding_value_ != right[width]) {
        return false;
      }
    }
    right += stride_;
  }
  // Bottom padding
  T const *bottom = raw_buffer_ + (top_padding_ + height_) * stride_;
  for (int i = 0; i < stride_ * bottom_padding_; ++i) {
    if (padding_value_ != bottom[i]) {
      return false;
    }
  }
  return true;
 }
 template <typename T>
 bool Buffer<T>::CheckValues(const Buffer<T> &a) const {
  if (!raw_buffer_) return false;
  if (!BufferSizesMatch(a)) return false;
  T *a_src = a.TopLeftPixel();
  T *b_src = this->TopLeftPixel();
  for (int height = 0; height < height_; ++height) {
    for (int width = 0; width < width_; ++width) {
      if (a_src[width] != b_src[width]) {
        return false;
      }
    }
    a_src += a.stride();
    b_src += this->stride();
  }
  return true;
 }
 template <typename T>
 bool Buffer<T>::BufferSizesMatch(const Buffer<T> &a) const {
  if (!raw_buffer_) return false;
  if (a.width_ != this->width_ || a.height_ != this->height_) {
    printf(
        "Reference buffer of size %dx%d does not match this buffer which is "
        "size %dx%d\n",
        a.width_, a.height_, this->width_, this->height_);
    return false;
  }
  return true;
 }
 }  // namespace libvpx_test
 #endif  // TEST_BUFFER_H_
--- a/test/byte_alignment_test.cc
+++ b/test/byte_alignment_test.cc
@ -128,8 +128,8 @@ class ByteAlignmentTest
  // TODO(fgalligan): Move the MD5 testing code into another class.
  void OpenMd5File(const std::string &md5_file_name_) {
    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
-    ASSERT_TRUE(md5_file_ != NULL)
+    ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: "
-        << "MD5 file open failed. Filename: " << md5_file_name_;
+                                   << md5_file_name_;
  }
  void CheckMd5(const vpx_image_t &img) {
@ -171,9 +171,8 @@ TEST_F(ByteAlignmentTest, SwitchByteAlignment) {
 TEST_P(ByteAlignmentTest, TestAlignment) {
  const ByteAlignmentTestParam t = GetParam();
  SetByteAlignment(t.byte_alignment, t.expected_value);
-  if (t.decode_remaining) {
+  if (t.decode_remaining)
    ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames(t.byte_alignment));
  }
 }
 INSTANTIATE_TEST_CASE_P(Alignments, ByteAlignmentTest,
--- a/test/clear_system_state.h
+++ b/test/clear_system_state.h
@ -11,13 +11,19 @@
 #define TEST_CLEAR_SYSTEM_STATE_H_
 #include "./vpx_config.h"
-#include "vpx_ports/system_state.h"
+#if ARCH_X86 || ARCH_X86_64
 #include "vpx_ports/x86.h"
 #endif
 namespace libvpx_test {
 // Reset system to a known state. This function should be used for all non-API
 // test cases.
-inline void ClearSystemState() { vpx_clear_system_state(); }
+inline void ClearSystemState() {
 #if ARCH_X86 || ARCH_X86_64
  vpx_reset_mmx_state();
 #endif
 }
 }  // namespace libvpx_test
 #endif  // TEST_CLEAR_SYSTEM_STATE_H_
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@ -53,22 +53,23 @@ class CodecFactory {
 template <class T1>
 class CodecTestWithParam
    : public ::testing::TestWithParam<
-          ::testing::tuple<const libvpx_test::CodecFactory *, T1> > {};
+          std::tr1::tuple<const libvpx_test::CodecFactory *, T1> > {};
 template <class T1, class T2>
 class CodecTestWith2Params
    : public ::testing::TestWithParam<
-          ::testing::tuple<const libvpx_test::CodecFactory *, T1, T2> > {};
+          std::tr1::tuple<const libvpx_test::CodecFactory *, T1, T2> > {};
 template <class T1, class T2, class T3>
 class CodecTestWith3Params
    : public ::testing::TestWithParam<
-          ::testing::tuple<const libvpx_test::CodecFactory *, T1, T2, T3> > {};
+          std::tr1::tuple<const libvpx_test::CodecFactory *, T1, T2, T3> > {};
 template <class T1, class T2, class T3, class T4>
 class CodecTestWith4Params
-    : public ::testing::TestWithParam< ::testing::tuple<
+    : public ::testing::TestWithParam<
-          const libvpx_test::CodecFactory *, T1, T2, T3, T4> > {};
+          std::tr1::tuple<const libvpx_test::CodecFactory *, T1, T2, T3, T4> > {
 };
 /*
 * VP8 Codec Definitions
--- a/test/comp_avg_pred_test.cc
+++ b/test/comp_avg_pred_test.cc
@ -1,182 +0,0 @@
 /*
 *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/buffer.h"
 #include "test/register_state_check.h"
 #include "vpx_ports/vpx_timer.h"
 namespace {
 using ::libvpx_test::ACMRandom;
 using ::libvpx_test::Buffer;
 typedef void (*AvgPredFunc)(uint8_t *a, const uint8_t *b, int w, int h,
                            const uint8_t *c, int c_stride);
 uint8_t avg_with_rounding(uint8_t a, uint8_t b) { return (a + b + 1) >> 1; }
 void reference_pred(const Buffer<uint8_t> &pred, const Buffer<uint8_t> &ref,
                    int width, int height, Buffer<uint8_t> *avg) {
  for (int y = 0; y < height; ++y) {
    for (int x = 0; x < width; ++x) {
      avg->TopLeftPixel()[y * avg->stride() + x] =
          avg_with_rounding(pred.TopLeftPixel()[y * pred.stride() + x],
                            ref.TopLeftPixel()[y * ref.stride() + x]);
    }
  }
 }
 class AvgPredTest : public ::testing::TestWithParam<AvgPredFunc> {
 public:
  virtual void SetUp() {
    avg_pred_func_ = GetParam();
    rnd_.Reset(ACMRandom::DeterministicSeed());
  }
 protected:
  AvgPredFunc avg_pred_func_;
  ACMRandom rnd_;
 };
 TEST_P(AvgPredTest, SizeCombinations) {
  // This is called as part of the sub pixel variance. As such it must be one of
  // the variance block sizes.
  for (int width_pow = 2; width_pow <= 6; ++width_pow) {
    for (int height_pow = width_pow - 1; height_pow <= width_pow + 1;
         ++height_pow) {
      // Don't test 4x2 or 64x128
      if (height_pow == 1 || height_pow == 7) continue;
      // The sse2 special-cases when ref width == stride, so make sure to test
      // it.
      for (int ref_padding = 0; ref_padding < 2; ref_padding++) {
        const int width = 1 << width_pow;
        const int height = 1 << height_pow;
        // Only the reference buffer may have a stride not equal to width.
        Buffer<uint8_t> ref =
            Buffer<uint8_t>(width, height, ref_padding ? 8 : 0);
        ASSERT_TRUE(ref.Init());
        Buffer<uint8_t> pred = Buffer<uint8_t>(width, height, 0, 16);
        ASSERT_TRUE(pred.Init());
        Buffer<uint8_t> avg_ref = Buffer<uint8_t>(width, height, 0, 16);
        ASSERT_TRUE(avg_ref.Init());
        Buffer<uint8_t> avg_chk = Buffer<uint8_t>(width, height, 0, 16);
        ASSERT_TRUE(avg_chk.Init());
        ref.Set(&rnd_, &ACMRandom::Rand8);
        pred.Set(&rnd_, &ACMRandom::Rand8);
        reference_pred(pred, ref, width, height, &avg_ref);
        ASM_REGISTER_STATE_CHECK(
            avg_pred_func_(avg_chk.TopLeftPixel(), pred.TopLeftPixel(), width,
                           height, ref.TopLeftPixel(), ref.stride()));
        EXPECT_TRUE(avg_chk.CheckValues(avg_ref));
        if (HasFailure()) {
          printf("Width: %d Height: %d\n", width, height);
          avg_chk.PrintDifference(avg_ref);
          return;
        }
      }
    }
  }
 }
 TEST_P(AvgPredTest, CompareReferenceRandom) {
  const int width = 64;
  const int height = 32;
  Buffer<uint8_t> ref = Buffer<uint8_t>(width, height, 8);
  ASSERT_TRUE(ref.Init());
  Buffer<uint8_t> pred = Buffer<uint8_t>(width, height, 0, 16);
  ASSERT_TRUE(pred.Init());
  Buffer<uint8_t> avg_ref = Buffer<uint8_t>(width, height, 0, 16);
  ASSERT_TRUE(avg_ref.Init());
  Buffer<uint8_t> avg_chk = Buffer<uint8_t>(width, height, 0, 16);
  ASSERT_TRUE(avg_chk.Init());
  for (int i = 0; i < 500; ++i) {
    ref.Set(&rnd_, &ACMRandom::Rand8);
    pred.Set(&rnd_, &ACMRandom::Rand8);
    reference_pred(pred, ref, width, height, &avg_ref);
    ASM_REGISTER_STATE_CHECK(avg_pred_func_(avg_chk.TopLeftPixel(),
                                            pred.TopLeftPixel(), width, height,
                                            ref.TopLeftPixel(), ref.stride()));
    EXPECT_TRUE(avg_chk.CheckValues(avg_ref));
    if (HasFailure()) {
      printf("Width: %d Height: %d\n", width, height);
      avg_chk.PrintDifference(avg_ref);
      return;
    }
  }
 }
 TEST_P(AvgPredTest, DISABLED_Speed) {
  for (int width_pow = 2; width_pow <= 6; ++width_pow) {
    for (int height_pow = width_pow - 1; height_pow <= width_pow + 1;
         ++height_pow) {
      // Don't test 4x2 or 64x128
      if (height_pow == 1 || height_pow == 7) continue;
      for (int ref_padding = 0; ref_padding < 2; ref_padding++) {
        const int width = 1 << width_pow;
        const int height = 1 << height_pow;
        Buffer<uint8_t> ref =
            Buffer<uint8_t>(width, height, ref_padding ? 8 : 0);
        ASSERT_TRUE(ref.Init());
        Buffer<uint8_t> pred = Buffer<uint8_t>(width, height, 0, 16);
        ASSERT_TRUE(pred.Init());
        Buffer<uint8_t> avg = Buffer<uint8_t>(width, height, 0, 16);
        ASSERT_TRUE(avg.Init());
        ref.Set(&rnd_, &ACMRandom::Rand8);
        pred.Set(&rnd_, &ACMRandom::Rand8);
        vpx_usec_timer timer;
        vpx_usec_timer_start(&timer);
        for (int i = 0; i < 10000000 / (width * height); ++i) {
          avg_pred_func_(avg.TopLeftPixel(), pred.TopLeftPixel(), width, height,
                         ref.TopLeftPixel(), ref.stride());
        }
        vpx_usec_timer_mark(&timer);
        const int elapsed_time =
            static_cast<int>(vpx_usec_timer_elapsed(&timer));
        printf("Average Test (ref_padding: %d) %dx%d time: %5d us\n",
               ref_padding, width, height, elapsed_time);
      }
    }
  }
 }
 INSTANTIATE_TEST_CASE_P(C, AvgPredTest,
                        ::testing::Values(&vpx_comp_avg_pred_c));
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(SSE2, AvgPredTest,
                        ::testing::Values(&vpx_comp_avg_pred_sse2));
 #endif  // HAVE_SSE2
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(NEON, AvgPredTest,
                        ::testing::Values(&vpx_comp_avg_pred_neon));
 #endif  // HAVE_NEON
 #if HAVE_VSX
 INSTANTIATE_TEST_CASE_P(VSX, AvgPredTest,
                        ::testing::Values(&vpx_comp_avg_pred_vsx));
 #endif  // HAVE_VSX
 }  // namespace
--- a/test/consistency_test.cc
+++ b/test/consistency_test.cc
@ -127,7 +127,7 @@ class ConsistencyTestBase : public ::testing::Test {
 };
 #if CONFIG_VP9_ENCODER
-typedef ::testing::tuple<int, int> ConsistencyParam;
+typedef std::tr1::tuple<int, int> ConsistencyParam;
 class ConsistencyVP9Test
    : public ConsistencyTestBase,
      public ::testing::WithParamInterface<ConsistencyParam> {
@ -198,14 +198,14 @@ TEST_P(ConsistencyVP9Test, ConsistencyIsZero) {
 }
 #endif  // CONFIG_VP9_ENCODER
-using ::testing::make_tuple;
+using std::tr1::make_tuple;
 //------------------------------------------------------------------------------
 // C functions
 #if CONFIG_VP9_ENCODER
 const ConsistencyParam c_vp9_tests[] = {
-  make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238)
+  make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238),
 };
 INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test,
                        ::testing::ValuesIn(c_vp9_tests));
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@ -25,7 +25,6 @@
 #include "vpx_dsp/vpx_filter.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
 #include "vpx_ports/vpx_timer.h"
 namespace {
@ -33,9 +32,9 @@ static const unsigned int kMaxDimension = 64;
 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
                             uint8_t *dst, ptrdiff_t dst_stride,
-                             const InterpKernel *filter, int x0_q4,
+                             const int16_t *filter_x, int filter_x_stride,
-                             int x_step_q4, int y0_q4, int y_step_q4, int w,
+                             const int16_t *filter_y, int filter_y_stride,
-                             int h);
+                             int w, int h);
 typedef void (*WrapperFilterBlock2d8Func)(
    const uint8_t *src_ptr, const unsigned int src_stride,
@ -77,7 +76,7 @@ struct ConvolveFunctions {
  int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
 };
-typedef ::testing::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
+typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
 #define ALL_SIZES(convolve_fn)                                            \
  make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn),         \
@ -301,9 +300,9 @@ void wrapper_filter_average_block2d_8_c(
    filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
                               dst_stride, output_width, output_height);
  } else {
-    highbd_filter_average_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride,
+    highbd_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
                                      hfilter, vfilter,
-                                      CAST_TO_SHORTPTR(dst_ptr), dst_stride,
+                                      CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
                                      output_width, output_height, use_highbd);
  }
 #else
@ -324,8 +323,8 @@ void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
    filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
                       dst_stride, output_width, output_height);
  } else {
-    highbd_filter_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, hfilter,
+    highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, hfilter,
-                              vfilter, CAST_TO_SHORTPTR(dst_ptr), dst_stride,
+                              vfilter, CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
                              output_width, output_height, use_highbd);
  }
 #else
@ -450,9 +449,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
  void CheckGuardBlocks() {
    for (int i = 0; i < kOutputBufferSize; ++i) {
-      if (IsIndexInBorder(i)) {
+      if (IsIndexInBorder(i)) EXPECT_EQ(255, output_[i]);
        EXPECT_EQ(255, output_[i]);
      }
    }
  }
@ -462,7 +459,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
    if (UUT_->use_highbd_ == 0) {
      return input_ + offset;
    } else {
-      return CAST_TO_BYTEPTR(input16_ + offset);
+      return CONVERT_TO_BYTEPTR(input16_) + offset;
    }
 #else
    return input_ + offset;
@ -475,7 +472,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
    if (UUT_->use_highbd_ == 0) {
      return output_ + offset;
    } else {
-      return CAST_TO_BYTEPTR(output16_ + offset);
+      return CONVERT_TO_BYTEPTR(output16_) + offset;
    }
 #else
    return output_ + offset;
@ -488,7 +485,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
    if (UUT_->use_highbd_ == 0) {
      return output_ref_ + offset;
    } else {
-      return CAST_TO_BYTEPTR(output16_ref_ + offset);
+      return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
    }
 #else
    return output_ref_ + offset;
@ -500,7 +497,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
    if (UUT_->use_highbd_ == 0) {
      return list[index];
    } else {
-      return CAST_TO_SHORTPTR(list)[index];
+      return CONVERT_TO_SHORTPTR(list)[index];
    }
 #else
    return list[index];
@ -512,7 +509,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
    if (UUT_->use_highbd_ == 0) {
      list[index] = (uint8_t)val;
    } else {
-      CAST_TO_SHORTPTR(list)[index] = val;
+      CONVERT_TO_SHORTPTR(list)[index] = val;
    }
 #else
    list[index] = (uint8_t)val;
@ -542,167 +539,12 @@ uint16_t *ConvolveTest::output16_ref_ = NULL;
 TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
 TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
  const uint8_t *const in = input();
  uint8_t *const out = output();
  const int kNumTests = 5000000;
  const int width = Width();
  const int height = Height();
  vpx_usec_timer timer;
  vpx_usec_timer_start(&timer);
  for (int n = 0; n < kNumTests; ++n) {
    UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0,
                   width, height);
  }
  vpx_usec_timer_mark(&timer);
  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  printf("convolve_copy_%dx%d_%d: %d us\n", width, height,
         UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
 }
 TEST_P(ConvolveTest, DISABLED_Avg_Speed) {
  const uint8_t *const in = input();
  uint8_t *const out = output();
  const int kNumTests = 5000000;
  const int width = Width();
  const int height = Height();
  vpx_usec_timer timer;
  vpx_usec_timer_start(&timer);
  for (int n = 0; n < kNumTests; ++n) {
    UUT_->copy_[1](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0,
                   width, height);
  }
  vpx_usec_timer_mark(&timer);
  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  printf("convolve_avg_%dx%d_%d: %d us\n", width, height,
         UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
 }
 TEST_P(ConvolveTest, DISABLED_Scale_Speed) {
  const uint8_t *const in = input();
  uint8_t *const out = output();
  const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP];
  const int kNumTests = 5000000;
  const int width = Width();
  const int height = Height();
  vpx_usec_timer timer;
  SetConstantInput(127);
  vpx_usec_timer_start(&timer);
  for (int n = 0; n < kNumTests; ++n) {
    UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16,
                   width, height);
  }
  vpx_usec_timer_mark(&timer);
  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  printf("convolve_scale_%dx%d_%d: %d us\n", width, height,
         UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
 }
 TEST_P(ConvolveTest, DISABLED_8Tap_Speed) {
  const uint8_t *const in = input();
  uint8_t *const out = output();
  const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP];
  const int kNumTests = 5000000;
  const int width = Width();
  const int height = Height();
  vpx_usec_timer timer;
  SetConstantInput(127);
  vpx_usec_timer_start(&timer);
  for (int n = 0; n < kNumTests; ++n) {
    UUT_->hv8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16,
                  width, height);
  }
  vpx_usec_timer_mark(&timer);
  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  printf("convolve8_%dx%d_%d: %d us\n", width, height,
         UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
 }
 TEST_P(ConvolveTest, DISABLED_8Tap_Horiz_Speed) {
  const uint8_t *const in = input();
  uint8_t *const out = output();
  const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP];
  const int kNumTests = 5000000;
  const int width = Width();
  const int height = Height();
  vpx_usec_timer timer;
  SetConstantInput(127);
  vpx_usec_timer_start(&timer);
  for (int n = 0; n < kNumTests; ++n) {
    UUT_->h8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16,
                 width, height);
  }
  vpx_usec_timer_mark(&timer);
  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  printf("convolve8_horiz_%dx%d_%d: %d us\n", width, height,
         UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
 }
 TEST_P(ConvolveTest, DISABLED_8Tap_Vert_Speed) {
  const uint8_t *const in = input();
  uint8_t *const out = output();
  const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP];
  const int kNumTests = 5000000;
  const int width = Width();
  const int height = Height();
  vpx_usec_timer timer;
  SetConstantInput(127);
  vpx_usec_timer_start(&timer);
  for (int n = 0; n < kNumTests; ++n) {
    UUT_->v8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16,
                 width, height);
  }
  vpx_usec_timer_mark(&timer);
  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  printf("convolve8_vert_%dx%d_%d: %d us\n", width, height,
         UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
 }
 TEST_P(ConvolveTest, DISABLED_8Tap_Avg_Speed) {
  const uint8_t *const in = input();
  uint8_t *const out = output();
  const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP];
  const int kNumTests = 5000000;
  const int width = Width();
  const int height = Height();
  vpx_usec_timer timer;
  SetConstantInput(127);
  vpx_usec_timer_start(&timer);
  for (int n = 0; n < kNumTests; ++n) {
    UUT_->hv8_[1](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16,
                  width, height);
  }
  vpx_usec_timer_mark(&timer);
  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  printf("convolve8_avg_%dx%d_%d: %d us\n", width, height,
         UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
 }
 TEST_P(ConvolveTest, Copy) {
  uint8_t *const in = input();
  uint8_t *const out = output();
  ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride,
-                                          NULL, 0, 0, 0, 0, Width(), Height()));
+                                          NULL, 0, NULL, 0, Width(), Height()));
  CheckGuardBlocks();
@ -721,7 +563,7 @@ TEST_P(ConvolveTest, Avg) {
  CopyOutputToRef();
  ASM_REGISTER_STATE_CHECK(UUT_->copy_[1](in, kInputStride, out, kOutputStride,
-                                          NULL, 0, 0, 0, 0, Width(), Height()));
+                                          NULL, 0, NULL, 0, Width(), Height()));
  CheckGuardBlocks();
@ -738,10 +580,12 @@ TEST_P(ConvolveTest, Avg) {
 TEST_P(ConvolveTest, CopyHoriz) {
  uint8_t *const in = input();
  uint8_t *const out = output();
  DECLARE_ALIGNED(256, const int16_t,
                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
  ASM_REGISTER_STATE_CHECK(UUT_->sh8_[0](in, kInputStride, out, kOutputStride,
-                                         vp9_filter_kernels[0], 0, 16, 0, 16,
+                                         filter8, 16, filter8, 16, Width(),
-                                         Width(), Height()));
+                                         Height()));
  CheckGuardBlocks();
@ -756,10 +600,12 @@ TEST_P(ConvolveTest, CopyHoriz) {
 TEST_P(ConvolveTest, CopyVert) {
  uint8_t *const in = input();
  uint8_t *const out = output();
  DECLARE_ALIGNED(256, const int16_t,
                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
  ASM_REGISTER_STATE_CHECK(UUT_->sv8_[0](in, kInputStride, out, kOutputStride,
-                                         vp9_filter_kernels[0], 0, 16, 0, 16,
+                                         filter8, 16, filter8, 16, Width(),
-                                         Width(), Height()));
+                                         Height()));
  CheckGuardBlocks();
@ -774,10 +620,12 @@ TEST_P(ConvolveTest, CopyVert) {
 TEST_P(ConvolveTest, Copy2D) {
  uint8_t *const in = input();
  uint8_t *const out = output();
  DECLARE_ALIGNED(256, const int16_t,
                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
  ASM_REGISTER_STATE_CHECK(UUT_->shv8_[0](in, kInputStride, out, kOutputStride,
-                                          vp9_filter_kernels[0], 0, 16, 0, 16,
+                                          filter8, 16, filter8, 16, Width(),
-                                          Width(), Height()));
+                                          Height()));
  CheckGuardBlocks();
@ -813,6 +661,7 @@ TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
  }
 }
 const int16_t kInvalidFilter[8] = { 0 };
 const WrapperFilterBlock2d8Func wrapper_filter_block2d_8[2] = {
  wrapper_filter_block2d_8_c, wrapper_filter_average_block2d_8_c
 };
@ -828,7 +677,7 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
    if (UUT_->use_highbd_ == 0) {
      ref = ref8;
    } else {
-      ref = CAST_TO_BYTEPTR(ref16);
+      ref = CONVERT_TO_BYTEPTR(ref16);
    }
 #else
    uint8_t ref[kOutputStride * kMaxDimension];
@ -865,21 +714,21 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
                                      Width(), Height(), UUT_->use_highbd_);
          if (filter_x && filter_y)
-            ASM_REGISTER_STATE_CHECK(
+            ASM_REGISTER_STATE_CHECK(UUT_->hv8_[i](
-                UUT_->hv8_[i](in, kInputStride, out, kOutputStride, filters,
+                in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-                              filter_x, 16, filter_y, 16, Width(), Height()));
+                filters[filter_y], 16, Width(), Height()));
          else if (filter_y)
-            ASM_REGISTER_STATE_CHECK(
+            ASM_REGISTER_STATE_CHECK(UUT_->v8_[i](
-                UUT_->v8_[i](in, kInputStride, out, kOutputStride, filters, 0,
+                in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
-                             16, filter_y, 16, Width(), Height()));
+                filters[filter_y], 16, Width(), Height()));
          else if (filter_x)
-            ASM_REGISTER_STATE_CHECK(
+            ASM_REGISTER_STATE_CHECK(UUT_->h8_[i](
-                UUT_->h8_[i](in, kInputStride, out, kOutputStride, filters,
+                in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-                             filter_x, 16, 0, 16, Width(), Height()));
+                kInvalidFilter, 16, Width(), Height()));
          else
-            ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](in, kInputStride, out,
+            ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](
-                                                    kOutputStride, NULL, 0, 0,
+                in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
-                                                    0, 0, Width(), Height()));
+                kInvalidFilter, 0, Width(), Height()));
          CheckGuardBlocks();
@ -907,7 +756,7 @@ TEST_P(ConvolveTest, FilterExtremes) {
  if (UUT_->use_highbd_ == 0) {
    ref = ref8;
  } else {
-    ref = CAST_TO_BYTEPTR(ref16);
+    ref = CONVERT_TO_BYTEPTR(ref16);
  }
 #else
  uint8_t ref[kOutputStride * kMaxDimension];
@ -963,21 +812,21 @@ TEST_P(ConvolveTest, FilterExtremes) {
                                       filters[filter_y], ref, kOutputStride,
                                       Width(), Height(), UUT_->use_highbd_);
            if (filter_x && filter_y)
-              ASM_REGISTER_STATE_CHECK(
+              ASM_REGISTER_STATE_CHECK(UUT_->hv8_[0](
-                  UUT_->hv8_[0](in, kInputStride, out, kOutputStride, filters,
+                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-                                filter_x, 16, filter_y, 16, Width(), Height()));
+                  filters[filter_y], 16, Width(), Height()));
            else if (filter_y)
-              ASM_REGISTER_STATE_CHECK(
+              ASM_REGISTER_STATE_CHECK(UUT_->v8_[0](
-                  UUT_->v8_[0](in, kInputStride, out, kOutputStride, filters, 0,
+                  in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
-                               16, filter_y, 16, Width(), Height()));
+                  filters[filter_y], 16, Width(), Height()));
            else if (filter_x)
-              ASM_REGISTER_STATE_CHECK(
+              ASM_REGISTER_STATE_CHECK(UUT_->h8_[0](
-                  UUT_->h8_[0](in, kInputStride, out, kOutputStride, filters,
+                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-                               filter_x, 16, 0, 16, Width(), Height()));
+                  kInvalidFilter, 16, Width(), Height()));
            else
-              ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out,
+              ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](
-                                                      kOutputStride, NULL, 0, 0,
+                  in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
-                                                      0, 0, Width(), Height()));
+                  kInvalidFilter, 0, Width(), Height()));
            for (int y = 0; y < Height(); ++y) {
              for (int x = 0; x < Width(); ++x)
@ -996,63 +845,44 @@ TEST_P(ConvolveTest, FilterExtremes) {
 /* This test exercises that enough rows and columns are filtered with every
   possible initial fractional positions and scaling steps. */
 #if !CONFIG_VP9_HIGHBITDEPTH
 static const ConvolveFunc scaled_2d_c_funcs[2] = { vpx_scaled_2d_c,
                                                   vpx_scaled_avg_2d_c };
 TEST_P(ConvolveTest, CheckScalingFiltering) {
  uint8_t *const in = input();
  uint8_t *const out = output();
-  uint8_t ref[kOutputStride * kMaxDimension];
+  const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP];
-  ::libvpx_test::ACMRandom prng;
+  SetConstantInput(127);
  for (int y = 0; y < Height(); ++y) {
    for (int x = 0; x < Width(); ++x) {
      const uint16_t r = prng.Rand8Extremes();
      assign_val(in, y * kInputStride + x, r);
    }
  }
-  for (int i = 0; i < 2; ++i) {
+  for (int frac = 0; frac < 16; ++frac) {
-    for (INTERP_FILTER filter_type = 0; filter_type < 4; ++filter_type) {
+    for (int step = 1; step <= 32; ++step) {
-      const InterpKernel *const eighttap = vp9_filter_kernels[filter_type];
+      /* Test the horizontal and vertical filters in combination. */
-      for (int frac = 0; frac < 16; ++frac) {
+      ASM_REGISTER_STATE_CHECK(
-        for (int step = 1; step <= 32; ++step) {
+          UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap[frac],
-          /* Test the horizontal and vertical filters in combination. */
+                         step, eighttap[frac], step, Width(), Height()));
          scaled_2d_c_funcs[i](in, kInputStride, ref, kOutputStride, eighttap,
                               frac, step, frac, step, Width(), Height());
          ASM_REGISTER_STATE_CHECK(
              UUT_->shv8_[i](in, kInputStride, out, kOutputStride, eighttap,
                             frac, step, frac, step, Width(), Height()));
-          CheckGuardBlocks();
+      CheckGuardBlocks();
-          for (int y = 0; y < Height(); ++y) {
+      for (int y = 0; y < Height(); ++y) {
-            for (int x = 0; x < Width(); ++x) {
+        for (int x = 0; x < Width(); ++x) {
-              ASSERT_EQ(lookup(ref, y * kOutputStride + x),
+          ASSERT_EQ(lookup(in, y * kInputStride + x),
-                        lookup(out, y * kOutputStride + x))
+                    lookup(out, y * kOutputStride + x))
-                  << "x == " << x << ", y == " << y << ", frac == " << frac
+              << "x == " << x << ", y == " << y << ", frac == " << frac
-                  << ", step == " << step;
+              << ", step == " << step;
            }
          }
        }
      }
    }
  }
 }
 #endif
-using ::testing::make_tuple;
+using std::tr1::make_tuple;
 #if CONFIG_VP9_HIGHBITDEPTH
 #define WRAP(func, bd)                                                       \
  void wrap_##func##_##bd(                                                   \
      const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
-      ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4,           \
+      ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride,    \
-      int x_step_q4, int y0_q4, int y_step_q4, int w, int h) {               \
+      const int16_t *filter_y, int filter_y_stride, int w, int h) {          \
-    vpx_highbd_##func(reinterpret_cast<const uint16_t *>(src), src_stride,   \
+    vpx_highbd_##func(src, src_stride, dst, dst_stride, filter_x,            \
-                      reinterpret_cast<uint16_t *>(dst), dst_stride, filter, \
+                      filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
                      x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd);         \
  }
 #if HAVE_SSE2 && ARCH_X86_64
@ -1082,35 +912,6 @@ WRAP(convolve8_sse2, 12)
 WRAP(convolve8_avg_sse2, 12)
 #endif  // HAVE_SSE2 && ARCH_X86_64
 #if HAVE_AVX2
 WRAP(convolve_copy_avx2, 8)
 WRAP(convolve_avg_avx2, 8)
 WRAP(convolve8_horiz_avx2, 8)
 WRAP(convolve8_avg_horiz_avx2, 8)
 WRAP(convolve8_vert_avx2, 8)
 WRAP(convolve8_avg_vert_avx2, 8)
 WRAP(convolve8_avx2, 8)
 WRAP(convolve8_avg_avx2, 8)
 WRAP(convolve_copy_avx2, 10)
 WRAP(convolve_avg_avx2, 10)
 WRAP(convolve8_avx2, 10)
 WRAP(convolve8_horiz_avx2, 10)
 WRAP(convolve8_vert_avx2, 10)
 WRAP(convolve8_avg_avx2, 10)
 WRAP(convolve8_avg_horiz_avx2, 10)
 WRAP(convolve8_avg_vert_avx2, 10)
 WRAP(convolve_copy_avx2, 12)
 WRAP(convolve_avg_avx2, 12)
 WRAP(convolve8_avx2, 12)
 WRAP(convolve8_horiz_avx2, 12)
 WRAP(convolve8_vert_avx2, 12)
 WRAP(convolve8_avg_avx2, 12)
 WRAP(convolve8_avg_horiz_avx2, 12)
 WRAP(convolve8_avg_vert_avx2, 12)
 #endif  // HAVE_AVX2
 #if HAVE_NEON
 WRAP(convolve_copy_neon, 8)
 WRAP(convolve_avg_neon, 8)
@ -1256,48 +1057,18 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
                        ::testing::ValuesIn(kArrayConvolve8_ssse3));
 #endif
-#if HAVE_AVX2
+#if HAVE_AVX2 && HAVE_SSSE3
 #if CONFIG_VP9_HIGHBITDEPTH
 const ConvolveFunctions convolve8_avx2(
    wrap_convolve_copy_avx2_8, wrap_convolve_avg_avx2_8,
    wrap_convolve8_horiz_avx2_8, wrap_convolve8_avg_horiz_avx2_8,
    wrap_convolve8_vert_avx2_8, wrap_convolve8_avg_vert_avx2_8,
    wrap_convolve8_avx2_8, wrap_convolve8_avg_avx2_8, wrap_convolve8_horiz_c_8,
    wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
    wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
 const ConvolveFunctions convolve10_avx2(
    wrap_convolve_copy_avx2_10, wrap_convolve_avg_avx2_10,
    wrap_convolve8_horiz_avx2_10, wrap_convolve8_avg_horiz_avx2_10,
    wrap_convolve8_vert_avx2_10, wrap_convolve8_avg_vert_avx2_10,
    wrap_convolve8_avx2_10, wrap_convolve8_avg_avx2_10,
    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
    wrap_convolve8_avg_c_10, 10);
 const ConvolveFunctions convolve12_avx2(
    wrap_convolve_copy_avx2_12, wrap_convolve_avg_avx2_12,
    wrap_convolve8_horiz_avx2_12, wrap_convolve8_avg_horiz_avx2_12,
    wrap_convolve8_vert_avx2_12, wrap_convolve8_avg_vert_avx2_12,
    wrap_convolve8_avx2_12, wrap_convolve8_avg_avx2_12,
    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
    wrap_convolve8_avg_c_12, 12);
 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2),
                                               ALL_SIZES(convolve10_avx2),
                                               ALL_SIZES(convolve12_avx2) };
 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
                        ::testing::ValuesIn(kArrayConvolve8_avx2));
 #else   // !CONFIG_VP9_HIGHBITDEPTH
 const ConvolveFunctions convolve8_avx2(
    vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_avx2,
-    vpx_convolve8_avg_horiz_avx2, vpx_convolve8_vert_avx2,
+    vpx_convolve8_avg_horiz_ssse3, vpx_convolve8_vert_avx2,
-    vpx_convolve8_avg_vert_avx2, vpx_convolve8_avx2, vpx_convolve8_avg_avx2,
+    vpx_convolve8_avg_vert_ssse3, vpx_convolve8_avx2, vpx_convolve8_avg_ssse3,
    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
    vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
                        ::testing::ValuesIn(kArrayConvolve8_avx2));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_AVX2 && HAVE_SSSE3
 #endif  // HAVE_AVX2
 #if HAVE_NEON
 #if CONFIG_VP9_HIGHBITDEPTH
@ -1334,7 +1105,7 @@ const ConvolveFunctions convolve8_neon(
    vpx_convolve8_avg_horiz_neon, vpx_convolve8_vert_neon,
    vpx_convolve8_avg_vert_neon, vpx_convolve8_neon, vpx_convolve8_avg_neon,
    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
-    vpx_scaled_avg_vert_c, vpx_scaled_2d_neon, vpx_scaled_avg_2d_c, 0);
+    vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
 const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon) };
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@ -1361,34 +1132,10 @@ const ConvolveFunctions convolve8_msa(
    vpx_convolve8_avg_horiz_msa, vpx_convolve8_vert_msa,
    vpx_convolve8_avg_vert_msa, vpx_convolve8_msa, vpx_convolve8_avg_msa,
    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
-    vpx_scaled_avg_vert_c, vpx_scaled_2d_msa, vpx_scaled_avg_2d_c, 0);
+    vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
 const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) };
 INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
                        ::testing::ValuesIn(kArrayConvolve8_msa));
 #endif  // HAVE_MSA
 #if HAVE_VSX
 const ConvolveFunctions convolve8_vsx(
    vpx_convolve_copy_vsx, vpx_convolve_avg_vsx, vpx_convolve8_horiz_vsx,
    vpx_convolve8_avg_horiz_vsx, vpx_convolve8_vert_vsx,
    vpx_convolve8_avg_vert_vsx, vpx_convolve8_vsx, vpx_convolve8_avg_vsx,
    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
    vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
 const ConvolveParam kArrayConvolve_vsx[] = { ALL_SIZES(convolve8_vsx) };
 INSTANTIATE_TEST_CASE_P(VSX, ConvolveTest,
                        ::testing::ValuesIn(kArrayConvolve_vsx));
 #endif  // HAVE_VSX
 #if HAVE_MMI
 const ConvolveFunctions convolve8_mmi(
    vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_mmi,
    vpx_convolve8_avg_horiz_c, vpx_convolve8_vert_mmi,
    vpx_convolve8_avg_vert_mmi, vpx_convolve8_mmi, vpx_convolve8_avg_mmi,
    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
    vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
 const ConvolveParam kArrayConvolve_mmi[] = { ALL_SIZES(convolve8_mmi) };
 INSTANTIATE_TEST_CASE_P(MMI, ConvolveTest,
                        ::testing::ValuesIn(kArrayConvolve_mmi));
 #endif  // HAVE_MMI
 }  // namespace
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@ -229,10 +229,9 @@ typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                        int tx_type);
-typedef ::testing::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t>
+typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct16x16Param;
-    Dct16x16Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht16x16Param;
-typedef ::testing::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht16x16Param;
+typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>
 typedef ::testing::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>
    Idct16x16Param;
 void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
@ -256,11 +255,11 @@ void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
 #if CONFIG_VP9_HIGHBITDEPTH
 void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct16x16_256_add_c(in, out, stride, 10);
 }
 void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct16x16_256_add_c(in, out, stride, 12);
 }
 void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
@ -274,36 +273,36 @@ void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
 }
 void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
-  vp9_highbd_iht16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
+  vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 10);
 }
 void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
-  vp9_highbd_iht16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
+  vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
 }
 #if HAVE_SSE2
 void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_10_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct16x16_10_add_c(in, out, stride, 10);
 }
 void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_10_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct16x16_10_add_c(in, out, stride, 12);
 }
 void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_256_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
 }
 void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_256_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
 }
 void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_10_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
 }
 void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_10_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
 }
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@ -354,7 +353,7 @@ class Trans16x16TestBase {
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }
@ -476,10 +475,10 @@ class Trans16x16TestBase {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
-        inv_txfm_ref(output_ref_block, CAST_TO_BYTEPTR(ref16), pitch_,
+        inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
                     tx_type_);
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(output_ref_block, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(output_ref_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }
      if (bit_depth_ == VPX_BITS_8) {
@ -531,7 +530,8 @@ class Trans16x16TestBase {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), 16));
+        ASM_REGISTER_STATE_CHECK(
            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), 16));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
      }
@ -543,8 +543,8 @@ class Trans16x16TestBase {
        const uint32_t diff = dst[j] - src[j];
 #endif  // CONFIG_VP9_HIGHBITDEPTH
        const uint32_t error = diff * diff;
-        EXPECT_GE(1u, error)
+        EXPECT_GE(1u, error) << "Error: 16x16 IDCT has error " << error
-            << "Error: 16x16 IDCT has error " << error << " at index " << j;
+                             << " at index " << j;
      }
    }
  }
@ -585,9 +585,9 @@ class Trans16x16TestBase {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
      } else {
 #if CONFIG_VP9_HIGHBITDEPTH
-        ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
+        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
      }
@ -745,7 +745,67 @@ TEST_P(InvTrans16x16DCT, CompareReference) {
  CompareInvReference(ref_txfm_, thresh_);
 }
-using ::testing::make_tuple;
+class PartialTrans16x16Test : public ::testing::TestWithParam<
                                  std::tr1::tuple<FdctFunc, vpx_bit_depth_t> > {
 public:
  virtual ~PartialTrans16x16Test() {}
  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    bit_depth_ = GET_PARAM(1);
  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
 protected:
  vpx_bit_depth_t bit_depth_;
  FdctFunc fwd_txfm_;
 };
 TEST_P(PartialTrans16x16Test, Extremes) {
 #if CONFIG_VP9_HIGHBITDEPTH
  const int16_t maxval =
      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
 #else
  const int16_t maxval = 255;
 #endif
  const int minval = -maxval;
  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
  output[0] = 0;
  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
  EXPECT_EQ((maxval * kNumCoeffs) >> 1, output[0]);
  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
  output[0] = 0;
  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
  EXPECT_EQ((minval * kNumCoeffs) >> 1, output[0]);
 }
 TEST_P(PartialTrans16x16Test, Random) {
 #if CONFIG_VP9_HIGHBITDEPTH
  const int16_t maxval =
      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
 #else
  const int16_t maxval = 255;
 #endif
  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  int sum = 0;
  for (int i = 0; i < kNumCoeffs; ++i) {
    const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
    input[i] = val;
    sum += val;
  }
  output[0] = 0;
  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
  EXPECT_EQ(sum >> 1, output[0]);
 }
 using std::tr1::make_tuple;
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@ -777,6 +837,11 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    C, PartialTrans16x16Test,
    ::testing::Values(make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_8),
                      make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_10),
                      make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
@ -785,14 +850,17 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(C, PartialTrans16x16Test,
                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_c,
                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans16x16DCT,
-    ::testing::Values(make_tuple(&vpx_fdct16x16_neon,
+    ::testing::Values(make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_neon,
-                                 &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8)));
+                                 0, VPX_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+#endif
 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@ -809,6 +877,9 @@ INSTANTIATE_TEST_CASE_P(
                                 2, VPX_BITS_8),
                      make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
                                 3, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2,
                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@ -843,6 +914,9 @@ INSTANTIATE_TEST_CASE_P(
                                 &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
                      make_tuple(&idct16x16_12, &idct16x16_256_add_12_sse2,
                                 3167, VPX_BITS_12)));
 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2,
                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@ -858,12 +932,8 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
                   VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(MSA, PartialTrans16x16Test,
                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_msa,
                                                     VPX_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(VSX, Trans16x16DCT,
                        ::testing::Values(make_tuple(&vpx_fdct16x16_c,
                                                     &vpx_idct16x16_256_add_vsx,
                                                     0, VPX_BITS_8)));
 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@ -66,16 +66,16 @@ void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef ::testing::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
+typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
    Trans32x32Param;
 #if CONFIG_VP9_HIGHBITDEPTH
 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
 }
 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@ -137,7 +137,7 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
 #if CONFIG_VP9_HIGHBITDEPTH
    } else {
      ASM_REGISTER_STATE_CHECK(
-          inv_txfm_(test_temp_block, CAST_TO_BYTEPTR(dst16), 32));
+          inv_txfm_(test_temp_block, CONVERT_TO_BYTEPTR(dst16), 32));
 #endif
    }
@ -275,7 +275,7 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
 #if CONFIG_VP9_HIGHBITDEPTH
    } else {
-      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CAST_TO_BYTEPTR(dst16), 32));
+      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
 #endif
    }
    for (int j = 0; j < kNumCoeffs; ++j) {
@ -292,7 +292,68 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
  }
 }
-using ::testing::make_tuple;
+class PartialTrans32x32Test
    : public ::testing::TestWithParam<
          std::tr1::tuple<FwdTxfmFunc, vpx_bit_depth_t> > {
 public:
  virtual ~PartialTrans32x32Test() {}
  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    bit_depth_ = GET_PARAM(1);
  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
 protected:
  vpx_bit_depth_t bit_depth_;
  FwdTxfmFunc fwd_txfm_;
 };
 TEST_P(PartialTrans32x32Test, Extremes) {
 #if CONFIG_VP9_HIGHBITDEPTH
  const int16_t maxval =
      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
 #else
  const int16_t maxval = 255;
 #endif
  const int minval = -maxval;
  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
  output[0] = 0;
  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
  EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]);
  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
  output[0] = 0;
  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
  EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]);
 }
 TEST_P(PartialTrans32x32Test, Random) {
 #if CONFIG_VP9_HIGHBITDEPTH
  const int16_t maxval =
      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
 #else
  const int16_t maxval = 255;
 #endif
  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  int sum = 0;
  for (int i = 0; i < kNumCoeffs; ++i) {
    const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
    input[i] = val;
    sum += val;
  }
  output[0] = 0;
  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
  EXPECT_EQ(sum >> 3, output[0]);
 }
 using std::tr1::make_tuple;
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@ -305,6 +366,11 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
        make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1,
                   VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    C, PartialTrans32x32Test,
    ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_8),
                      make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_10),
                      make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans32x32Test,
@ -312,16 +378,19 @@ INSTANTIATE_TEST_CASE_P(
                                 VPX_BITS_8),
                      make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c,
                                 1, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test,
                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_c,
                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans32x32Test,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_neon,
+    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_neon,
-                                 &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
+                                 0, VPX_BITS_8),
-                      make_tuple(&vpx_fdct32x32_rd_neon,
+                      make_tuple(&vpx_fdct32x32_rd_c,
                                 &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@ -330,6 +399,9 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
                      make_tuple(&vpx_fdct32x32_rd_sse2,
                                 &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@ -346,6 +418,9 @@ INSTANTIATE_TEST_CASE_P(
                   VPX_BITS_8),
        make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
                   VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@ -364,14 +439,8 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
                      make_tuple(&vpx_fdct32x32_rd_msa,
                                 &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test,
                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa,
                                                     VPX_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    VSX, Trans32x32Test,
    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_vsx,
                                 0, VPX_BITS_8),
                      make_tuple(&vpx_fdct32x32_rd_c,
                                 &vpx_idct32x32_1024_add_vsx, 1, VPX_BITS_8)));
 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/dct_partial_test.cc
+++ b/test/dct_partial_test.cc
@ -1,169 +0,0 @@
 /*
 *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <limits>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/buffer.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_dsp/vpx_dsp_common.h"
 using ::testing::make_tuple;
 using ::testing::tuple;
 using libvpx_test::ACMRandom;
 using libvpx_test::Buffer;
 namespace {
 typedef void (*PartialFdctFunc)(const int16_t *in, tran_low_t *out, int stride);
 typedef tuple<PartialFdctFunc, int /* size */, vpx_bit_depth_t>
    PartialFdctParam;
 tran_low_t partial_fdct_ref(const Buffer<int16_t> &in, int size) {
  int64_t sum = 0;
  for (int y = 0; y < size; ++y) {
    for (int x = 0; x < size; ++x) {
      sum += in.TopLeftPixel()[y * in.stride() + x];
    }
  }
  switch (size) {
    case 4: sum *= 2; break;
    case 8: /*sum = sum;*/ break;
    case 16: sum >>= 1; break;
    case 32: sum >>= 3; break;
  }
  return static_cast<tran_low_t>(sum);
 }
 class PartialFdctTest : public ::testing::TestWithParam<PartialFdctParam> {
 public:
  PartialFdctTest() {
    fwd_txfm_ = GET_PARAM(0);
    size_ = GET_PARAM(1);
    bit_depth_ = GET_PARAM(2);
  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
 protected:
  void RunTest() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int16_t maxvalue =
        clip_pixel_highbd(std::numeric_limits<int16_t>::max(), bit_depth_);
    const int16_t minvalue = -maxvalue;
    Buffer<int16_t> input_block =
        Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
    ASSERT_TRUE(input_block.Init());
    Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
    ASSERT_TRUE(output_block.Init());
    for (int i = 0; i < 100; ++i) {
      if (i == 0) {
        input_block.Set(maxvalue);
      } else if (i == 1) {
        input_block.Set(minvalue);
      } else {
        input_block.Set(&rnd, minvalue, maxvalue);
      }
      ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block.TopLeftPixel(),
                                         output_block.TopLeftPixel(),
                                         input_block.stride()));
      EXPECT_EQ(partial_fdct_ref(input_block, size_),
                output_block.TopLeftPixel()[0]);
    }
  }
  PartialFdctFunc fwd_txfm_;
  vpx_bit_depth_t bit_depth_;
  int size_;
 };
 TEST_P(PartialFdctTest, PartialFdctTest) { RunTest(); }
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, PartialFdctTest,
    ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c, 32, VPX_BITS_12),
                      make_tuple(&vpx_highbd_fdct32x32_1_c, 32, VPX_BITS_10),
                      make_tuple(&vpx_fdct32x32_1_c, 32, VPX_BITS_8),
                      make_tuple(&vpx_highbd_fdct16x16_1_c, 16, VPX_BITS_12),
                      make_tuple(&vpx_highbd_fdct16x16_1_c, 16, VPX_BITS_10),
                      make_tuple(&vpx_fdct16x16_1_c, 16, VPX_BITS_8),
                      make_tuple(&vpx_highbd_fdct8x8_1_c, 8, VPX_BITS_12),
                      make_tuple(&vpx_highbd_fdct8x8_1_c, 8, VPX_BITS_10),
                      make_tuple(&vpx_fdct8x8_1_c, 8, VPX_BITS_8),
                      make_tuple(&vpx_fdct4x4_1_c, 4, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, PartialFdctTest,
    ::testing::Values(make_tuple(&vpx_fdct32x32_1_c, 32, VPX_BITS_8),
                      make_tuple(&vpx_fdct16x16_1_c, 16, VPX_BITS_8),
                      make_tuple(&vpx_fdct8x8_1_c, 8, VPX_BITS_8),
                      make_tuple(&vpx_fdct4x4_1_c, 4, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
    SSE2, PartialFdctTest,
    ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2, 32, VPX_BITS_8),
                      make_tuple(&vpx_fdct16x16_1_sse2, 16, VPX_BITS_8),
                      make_tuple(&vpx_fdct8x8_1_sse2, 8, VPX_BITS_8),
                      make_tuple(&vpx_fdct4x4_1_sse2, 4, VPX_BITS_8)));
 #endif  // HAVE_SSE2
 #if HAVE_NEON
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    NEON, PartialFdctTest,
    ::testing::Values(make_tuple(&vpx_fdct32x32_1_neon, 32, VPX_BITS_8),
                      make_tuple(&vpx_fdct16x16_1_neon, 16, VPX_BITS_8),
                      make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_12),
                      make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_10),
                      make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_8),
                      make_tuple(&vpx_fdct4x4_1_neon, 4, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    NEON, PartialFdctTest,
    ::testing::Values(make_tuple(&vpx_fdct32x32_1_neon, 32, VPX_BITS_8),
                      make_tuple(&vpx_fdct16x16_1_neon, 16, VPX_BITS_8),
                      make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_8),
                      make_tuple(&vpx_fdct4x4_1_neon, 4, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_NEON
 #if HAVE_MSA
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(MSA, PartialFdctTest,
                        ::testing::Values(make_tuple(&vpx_fdct8x8_1_msa, 8,
                                                     VPX_BITS_8)));
 #else   // !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    MSA, PartialFdctTest,
    ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa, 32, VPX_BITS_8),
                      make_tuple(&vpx_fdct16x16_1_msa, 16, VPX_BITS_8),
                      make_tuple(&vpx_fdct8x8_1_msa, 8, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_MSA
 }  // namespace
--- a/test/dct_test.cc
+++ b/test/dct_test.cc
@ -1,728 +0,0 @@
 /*
 *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vp9_rtcd.h"
 #include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/buffer.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_ports/mem.h"
 using ::testing::make_tuple;
 using ::testing::tuple;
 using libvpx_test::ACMRandom;
 using libvpx_test::Buffer;
 namespace {
 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
                        int tx_type);
 typedef void (*FhtFuncRef)(const Buffer<int16_t> &in, Buffer<tran_low_t> *out,
                           int size, int tx_type);
 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                        int tx_type);
 typedef void (*IhtWithBdFunc)(const tran_low_t *in, uint8_t *out, int stride,
                              int tx_type, int bd);
 template <FdctFunc fn>
 void fdct_wrapper(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
  (void)tx_type;
  fn(in, out, stride);
 }
 template <IdctFunc fn>
 void idct_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type,
                  int bd) {
  (void)tx_type;
  (void)bd;
  fn(in, out, stride);
 }
 template <IhtFunc fn>
 void iht_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type,
                 int bd) {
  (void)bd;
  fn(in, out, stride, tx_type);
 }
 #if CONFIG_VP9_HIGHBITDEPTH
 typedef void (*HighbdIdctFunc)(const tran_low_t *in, uint16_t *out, int stride,
                               int bd);
 typedef void (*HighbdIhtFunc)(const tran_low_t *in, uint16_t *out, int stride,
                              int tx_type, int bd);
 template <HighbdIdctFunc fn>
 void highbd_idct_wrapper(const tran_low_t *in, uint8_t *out, int stride,
                         int tx_type, int bd) {
  (void)tx_type;
  fn(in, CAST_TO_SHORTPTR(out), stride, bd);
 }
 template <HighbdIhtFunc fn>
 void highbd_iht_wrapper(const tran_low_t *in, uint8_t *out, int stride,
                        int tx_type, int bd) {
  fn(in, CAST_TO_SHORTPTR(out), stride, tx_type, bd);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 struct FuncInfo {
  FhtFunc ft_func;
  IhtWithBdFunc it_func;
  int size;
  int pixel_size;
 };
 /* forward transform, inverse transform, size, transform type, bit depth */
 typedef tuple<int, const FuncInfo *, int, vpx_bit_depth_t> DctParam;
 void fdct_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
              int /*tx_type*/) {
  const int16_t *i = in.TopLeftPixel();
  const int i_stride = in.stride();
  tran_low_t *o = out->TopLeftPixel();
  if (size == 4) {
    vpx_fdct4x4_c(i, o, i_stride);
  } else if (size == 8) {
    vpx_fdct8x8_c(i, o, i_stride);
  } else if (size == 16) {
    vpx_fdct16x16_c(i, o, i_stride);
  } else if (size == 32) {
    vpx_fdct32x32_c(i, o, i_stride);
  }
 }
 void fht_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
             int tx_type) {
  const int16_t *i = in.TopLeftPixel();
  const int i_stride = in.stride();
  tran_low_t *o = out->TopLeftPixel();
  if (size == 4) {
    vp9_fht4x4_c(i, o, i_stride, tx_type);
  } else if (size == 8) {
    vp9_fht8x8_c(i, o, i_stride, tx_type);
  } else if (size == 16) {
    vp9_fht16x16_c(i, o, i_stride, tx_type);
  }
 }
 void fwht_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
              int /*tx_type*/) {
  ASSERT_EQ(size, 4);
  vp9_fwht4x4_c(in.TopLeftPixel(), out->TopLeftPixel(), in.stride());
 }
 class TransTestBase : public ::testing::TestWithParam<DctParam> {
 public:
  virtual void SetUp() {
    rnd_.Reset(ACMRandom::DeterministicSeed());
    const int idx = GET_PARAM(0);
    const FuncInfo *func_info = &(GET_PARAM(1)[idx]);
    tx_type_ = GET_PARAM(2);
    bit_depth_ = GET_PARAM(3);
    fwd_txfm_ = func_info->ft_func;
    inv_txfm_ = func_info->it_func;
    size_ = func_info->size;
    pixel_size_ = func_info->pixel_size;
    max_pixel_value_ = (1 << bit_depth_) - 1;
    // Randomize stride_ to a value less than or equal to 1024
    stride_ = rnd_(1024) + 1;
    if (stride_ < size_) {
      stride_ = size_;
    }
    // Align stride_ to 16 if it's bigger than 16.
    if (stride_ > 16) {
      stride_ &= ~15;
    }
    block_size_ = size_ * stride_;
    src_ = reinterpret_cast<uint8_t *>(
        vpx_memalign(16, pixel_size_ * block_size_));
    ASSERT_TRUE(src_ != NULL);
    dst_ = reinterpret_cast<uint8_t *>(
        vpx_memalign(16, pixel_size_ * block_size_));
    ASSERT_TRUE(dst_ != NULL);
  }
  virtual void TearDown() {
    vpx_free(src_);
    src_ = NULL;
    vpx_free(dst_);
    dst_ = NULL;
    libvpx_test::ClearSystemState();
  }
  void InitMem() {
    if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
    if (pixel_size_ == 1) {
      for (int j = 0; j < block_size_; ++j) {
        src_[j] = rnd_.Rand16() & max_pixel_value_;
      }
      for (int j = 0; j < block_size_; ++j) {
        dst_[j] = rnd_.Rand16() & max_pixel_value_;
      }
    } else {
      ASSERT_EQ(pixel_size_, 2);
      uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
      uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
      for (int j = 0; j < block_size_; ++j) {
        src[j] = rnd_.Rand16() & max_pixel_value_;
      }
      for (int j = 0; j < block_size_; ++j) {
        dst[j] = rnd_.Rand16() & max_pixel_value_;
      }
    }
  }
  void RunFwdTxfm(const Buffer<int16_t> &in, Buffer<tran_low_t> *out) {
    fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride(), tx_type_);
  }
  void RunInvTxfm(const Buffer<tran_low_t> &in, uint8_t *out) {
    inv_txfm_(in.TopLeftPixel(), out, stride_, tx_type_, bit_depth_);
  }
 protected:
  void RunAccuracyCheck(int limit) {
    if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    Buffer<int16_t> test_input_block =
        Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
    ASSERT_TRUE(test_input_block.Init());
    Buffer<tran_low_t> test_temp_block =
        Buffer<tran_low_t>(size_, size_, 0, 16);
    ASSERT_TRUE(test_temp_block.Init());
    uint32_t max_error = 0;
    int64_t total_error = 0;
    const int count_test_block = 10000;
    for (int i = 0; i < count_test_block; ++i) {
      InitMem();
      for (int h = 0; h < size_; ++h) {
        for (int w = 0; w < size_; ++w) {
          if (pixel_size_ == 1) {
            test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] =
                src_[h * stride_ + w] - dst_[h * stride_ + w];
          } else {
            ASSERT_EQ(pixel_size_, 2);
            const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
            const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
            test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] =
                src[h * stride_ + w] - dst[h * stride_ + w];
          }
        }
      }
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block, &test_temp_block));
      ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst_));
      for (int h = 0; h < size_; ++h) {
        for (int w = 0; w < size_; ++w) {
          int diff;
          if (pixel_size_ == 1) {
            diff = dst_[h * stride_ + w] - src_[h * stride_ + w];
          } else {
            ASSERT_EQ(pixel_size_, 2);
            const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
            const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
            diff = dst[h * stride_ + w] - src[h * stride_ + w];
          }
          const uint32_t error = diff * diff;
          if (max_error < error) max_error = error;
          total_error += error;
        }
      }
    }
    EXPECT_GE(static_cast<uint32_t>(limit), max_error)
        << "Error: " << size_ << "x" << size_
        << " transform/inverse transform has an individual round trip error > "
        << limit;
    EXPECT_GE(count_test_block * limit, total_error)
        << "Error: " << size_ << "x" << size_
        << " transform/inverse transform has average round trip error > "
        << limit << " per block";
  }
  void RunCoeffCheck() {
    if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 5000;
    Buffer<int16_t> input_block =
        Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
    ASSERT_TRUE(input_block.Init());
    Buffer<tran_low_t> output_ref_block = Buffer<tran_low_t>(size_, size_, 0);
    ASSERT_TRUE(output_ref_block.Init());
    Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
    ASSERT_TRUE(output_block.Init());
    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-max_pixel_value_,
      // max_pixel_value_].
      input_block.Set(&rnd, -max_pixel_value_, max_pixel_value_);
      fwd_txfm_ref(input_block, &output_ref_block, size_, tx_type_);
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, &output_block));
      // The minimum quant value is 4.
      EXPECT_TRUE(output_block.CheckValues(output_ref_block));
      if (::testing::Test::HasFailure()) {
        printf("Size: %d Transform type: %d\n", size_, tx_type_);
        output_block.PrintDifference(output_ref_block);
        return;
      }
    }
  }
  void RunMemCheck() {
    if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 5000;
    Buffer<int16_t> input_extreme_block =
        Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
    ASSERT_TRUE(input_extreme_block.Init());
    Buffer<tran_low_t> output_ref_block = Buffer<tran_low_t>(size_, size_, 0);
    ASSERT_TRUE(output_ref_block.Init());
    Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
    ASSERT_TRUE(output_block.Init());
    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with -max_pixel_value_ or max_pixel_value_.
      if (i == 0) {
        input_extreme_block.Set(max_pixel_value_);
      } else if (i == 1) {
        input_extreme_block.Set(-max_pixel_value_);
      } else {
        for (int h = 0; h < size_; ++h) {
          for (int w = 0; w < size_; ++w) {
            input_extreme_block
                .TopLeftPixel()[h * input_extreme_block.stride() + w] =
                rnd.Rand8() % 2 ? max_pixel_value_ : -max_pixel_value_;
          }
        }
      }
      fwd_txfm_ref(input_extreme_block, &output_ref_block, size_, tx_type_);
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block, &output_block));
      // The minimum quant value is 4.
      EXPECT_TRUE(output_block.CheckValues(output_ref_block));
      for (int h = 0; h < size_; ++h) {
        for (int w = 0; w < size_; ++w) {
          EXPECT_GE(
              4 * DCT_MAX_VALUE << (bit_depth_ - 8),
              abs(output_block.TopLeftPixel()[h * output_block.stride() + w]))
              << "Error: " << size_ << "x" << size_
              << " transform has coefficient larger than 4*DCT_MAX_VALUE"
              << " at " << w << "," << h;
          if (::testing::Test::HasFailure()) {
            printf("Size: %d Transform type: %d\n", size_, tx_type_);
            output_block.DumpBuffer();
            return;
          }
        }
      }
    }
  }
  void RunInvAccuracyCheck(int limit) {
    if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
    Buffer<int16_t> in = Buffer<int16_t>(size_, size_, 4);
    ASSERT_TRUE(in.Init());
    Buffer<tran_low_t> coeff = Buffer<tran_low_t>(size_, size_, 0, 16);
    ASSERT_TRUE(coeff.Init());
    Buffer<uint8_t> dst = Buffer<uint8_t>(size_, size_, 0, 16);
    ASSERT_TRUE(dst.Init());
    Buffer<uint8_t> src = Buffer<uint8_t>(size_, size_, 0);
    ASSERT_TRUE(src.Init());
    Buffer<uint16_t> dst16 = Buffer<uint16_t>(size_, size_, 0, 16);
    ASSERT_TRUE(dst16.Init());
    Buffer<uint16_t> src16 = Buffer<uint16_t>(size_, size_, 0);
    ASSERT_TRUE(src16.Init());
    for (int i = 0; i < count_test_block; ++i) {
      InitMem();
      // Initialize a test block with input range [-max_pixel_value_,
      // max_pixel_value_].
      for (int h = 0; h < size_; ++h) {
        for (int w = 0; w < size_; ++w) {
          if (pixel_size_ == 1) {
            in.TopLeftPixel()[h * in.stride() + w] =
                src_[h * stride_ + w] - dst_[h * stride_ + w];
          } else {
            ASSERT_EQ(pixel_size_, 2);
            const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
            const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
            in.TopLeftPixel()[h * in.stride() + w] =
                src[h * stride_ + w] - dst[h * stride_ + w];
          }
        }
      }
      fwd_txfm_ref(in, &coeff, size_, tx_type_);
      ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst_));
      for (int h = 0; h < size_; ++h) {
        for (int w = 0; w < size_; ++w) {
          int diff;
          if (pixel_size_ == 1) {
            diff = dst_[h * stride_ + w] - src_[h * stride_ + w];
          } else {
            ASSERT_EQ(pixel_size_, 2);
            const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
            const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
            diff = dst[h * stride_ + w] - src[h * stride_ + w];
          }
          const uint32_t error = diff * diff;
          EXPECT_GE(static_cast<uint32_t>(limit), error)
              << "Error: " << size_ << "x" << size_
              << " inverse transform has error " << error << " at " << w << ","
              << h;
          if (::testing::Test::HasFailure()) {
            printf("Size: %d Transform type: %d\n", size_, tx_type_);
            return;
          }
        }
      }
    }
  }
  FhtFunc fwd_txfm_;
  FhtFuncRef fwd_txfm_ref;
  IhtWithBdFunc inv_txfm_;
  ACMRandom rnd_;
  uint8_t *src_;
  uint8_t *dst_;
  vpx_bit_depth_t bit_depth_;
  int tx_type_;
  int max_pixel_value_;
  int size_;
  int stride_;
  int pixel_size_;
  int block_size_;
 };
 /* -------------------------------------------------------------------------- */
 class TransDCT : public TransTestBase {
 public:
  TransDCT() { fwd_txfm_ref = fdct_ref; }
 };
 TEST_P(TransDCT, AccuracyCheck) {
  int t = 1;
  if (size_ == 16 && bit_depth_ > 10 && pixel_size_ == 2) {
    t = 2;
  } else if (size_ == 32 && bit_depth_ > 10 && pixel_size_ == 2) {
    t = 7;
  }
  RunAccuracyCheck(t);
 }
 TEST_P(TransDCT, CoeffCheck) { RunCoeffCheck(); }
 TEST_P(TransDCT, MemCheck) { RunMemCheck(); }
 TEST_P(TransDCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
 static const FuncInfo dct_c_func_info[] = {
 #if CONFIG_VP9_HIGHBITDEPTH
  { &fdct_wrapper<vpx_highbd_fdct4x4_c>,
    &highbd_idct_wrapper<vpx_highbd_idct4x4_16_add_c>, 4, 2 },
  { &fdct_wrapper<vpx_highbd_fdct8x8_c>,
    &highbd_idct_wrapper<vpx_highbd_idct8x8_64_add_c>, 8, 2 },
  { &fdct_wrapper<vpx_highbd_fdct16x16_c>,
    &highbd_idct_wrapper<vpx_highbd_idct16x16_256_add_c>, 16, 2 },
  { &fdct_wrapper<vpx_highbd_fdct32x32_c>,
    &highbd_idct_wrapper<vpx_highbd_idct32x32_1024_add_c>, 32, 2 },
 #endif
  { &fdct_wrapper<vpx_fdct4x4_c>, &idct_wrapper<vpx_idct4x4_16_add_c>, 4, 1 },
  { &fdct_wrapper<vpx_fdct8x8_c>, &idct_wrapper<vpx_idct8x8_64_add_c>, 8, 1 },
  { &fdct_wrapper<vpx_fdct16x16_c>, &idct_wrapper<vpx_idct16x16_256_add_c>, 16,
    1 },
  { &fdct_wrapper<vpx_fdct32x32_c>, &idct_wrapper<vpx_idct32x32_1024_add_c>, 32,
    1 }
 };
 INSTANTIATE_TEST_CASE_P(
    C, TransDCT,
    ::testing::Combine(
        ::testing::Range(0, static_cast<int>(sizeof(dct_c_func_info) /
                                             sizeof(dct_c_func_info[0]))),
        ::testing::Values(dct_c_func_info), ::testing::Values(0),
        ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
 #if !CONFIG_EMULATE_HARDWARE
 #if HAVE_SSE2
 static const FuncInfo dct_sse2_func_info[] = {
 #if CONFIG_VP9_HIGHBITDEPTH
  { &fdct_wrapper<vpx_highbd_fdct4x4_sse2>,
    &highbd_idct_wrapper<vpx_highbd_idct4x4_16_add_sse2>, 4, 2 },
  { &fdct_wrapper<vpx_highbd_fdct8x8_sse2>,
    &highbd_idct_wrapper<vpx_highbd_idct8x8_64_add_sse2>, 8, 2 },
  { &fdct_wrapper<vpx_highbd_fdct16x16_sse2>,
    &highbd_idct_wrapper<vpx_highbd_idct16x16_256_add_sse2>, 16, 2 },
  { &fdct_wrapper<vpx_highbd_fdct32x32_sse2>,
    &highbd_idct_wrapper<vpx_highbd_idct32x32_1024_add_sse2>, 32, 2 },
 #endif
  { &fdct_wrapper<vpx_fdct4x4_sse2>, &idct_wrapper<vpx_idct4x4_16_add_sse2>, 4,
    1 },
  { &fdct_wrapper<vpx_fdct8x8_sse2>, &idct_wrapper<vpx_idct8x8_64_add_sse2>, 8,
    1 },
  { &fdct_wrapper<vpx_fdct16x16_sse2>,
    &idct_wrapper<vpx_idct16x16_256_add_sse2>, 16, 1 },
  { &fdct_wrapper<vpx_fdct32x32_sse2>,
    &idct_wrapper<vpx_idct32x32_1024_add_sse2>, 32, 1 }
 };
 INSTANTIATE_TEST_CASE_P(
    SSE2, TransDCT,
    ::testing::Combine(
        ::testing::Range(0, static_cast<int>(sizeof(dct_sse2_func_info) /
                                             sizeof(dct_sse2_func_info[0]))),
        ::testing::Values(dct_sse2_func_info), ::testing::Values(0),
        ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
 #endif  // HAVE_SSE2
 #if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
 // vpx_fdct8x8_ssse3 is only available in 64 bit builds.
 static const FuncInfo dct_ssse3_func_info = {
  &fdct_wrapper<vpx_fdct8x8_ssse3>, &idct_wrapper<vpx_idct8x8_64_add_sse2>, 8, 1
 };
 // TODO(johannkoenig): high bit depth fdct8x8.
 INSTANTIATE_TEST_CASE_P(SSSE3, TransDCT,
                        ::testing::Values(make_tuple(0, &dct_ssse3_func_info, 0,
                                                     VPX_BITS_8)));
 #endif  // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
 static const FuncInfo dct_avx2_func_info = {
  &fdct_wrapper<vpx_fdct32x32_avx2>, &idct_wrapper<vpx_idct32x32_1024_add_sse2>,
  32, 1
 };
 // TODO(johannkoenig): high bit depth fdct32x32.
 INSTANTIATE_TEST_CASE_P(AVX2, TransDCT,
                        ::testing::Values(make_tuple(0, &dct_avx2_func_info, 0,
                                                     VPX_BITS_8)));
 #endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
 #if HAVE_NEON
 static const FuncInfo dct_neon_func_info[4] = {
  { &fdct_wrapper<vpx_fdct4x4_neon>, &idct_wrapper<vpx_idct4x4_16_add_neon>, 4,
    1 },
  { &fdct_wrapper<vpx_fdct8x8_neon>, &idct_wrapper<vpx_idct8x8_64_add_neon>, 8,
    1 },
  { &fdct_wrapper<vpx_fdct16x16_neon>,
    &idct_wrapper<vpx_idct16x16_256_add_neon>, 16, 1 },
  { &fdct_wrapper<vpx_fdct32x32_neon>,
    &idct_wrapper<vpx_idct32x32_1024_add_neon>, 32, 1 }
 };
 INSTANTIATE_TEST_CASE_P(
    NEON, TransDCT,
    ::testing::Combine(::testing::Range(0, 4),
                       ::testing::Values(dct_neon_func_info),
                       ::testing::Values(0), ::testing::Values(VPX_BITS_8)));
 #endif  // HAVE_NEON
 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH
 static const FuncInfo dct_msa_func_info[4] = {
  { &fdct_wrapper<vpx_fdct4x4_msa>, &idct_wrapper<vpx_idct4x4_16_add_msa>, 4,
    1 },
  { &fdct_wrapper<vpx_fdct8x8_msa>, &idct_wrapper<vpx_idct8x8_64_add_msa>, 8,
    1 },
  { &fdct_wrapper<vpx_fdct16x16_msa>, &idct_wrapper<vpx_idct16x16_256_add_msa>,
    16, 1 },
  { &fdct_wrapper<vpx_fdct32x32_msa>, &idct_wrapper<vpx_idct32x32_1024_add_msa>,
    32, 1 }
 };
 INSTANTIATE_TEST_CASE_P(MSA, TransDCT,
                        ::testing::Combine(::testing::Range(0, 4),
                                           ::testing::Values(dct_msa_func_info),
                                           ::testing::Values(0),
                                           ::testing::Values(VPX_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH
 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
 static const FuncInfo dct_vsx_func_info = {
  &fdct_wrapper<vpx_fdct4x4_c>, &idct_wrapper<vpx_idct4x4_16_add_vsx>, 4, 1
 };
 INSTANTIATE_TEST_CASE_P(VSX, TransDCT,
                        ::testing::Values(make_tuple(0, &dct_vsx_func_info, 0,
                                                     VPX_BITS_8)));
 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH &&
 #endif  // !CONFIG_EMULATE_HARDWARE
 /* -------------------------------------------------------------------------- */
 class TransHT : public TransTestBase {
 public:
  TransHT() { fwd_txfm_ref = fht_ref; }
 };
 TEST_P(TransHT, AccuracyCheck) {
  RunAccuracyCheck(size_ == 16 && bit_depth_ > 10 && pixel_size_ == 2 ? 2 : 1);
 }
 TEST_P(TransHT, CoeffCheck) { RunCoeffCheck(); }
 TEST_P(TransHT, MemCheck) { RunMemCheck(); }
 TEST_P(TransHT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
 static const FuncInfo ht_c_func_info[] = {
 #if CONFIG_VP9_HIGHBITDEPTH
  { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_c>, 4,
    2 },
  { &vp9_highbd_fht8x8_c, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_c>, 8,
    2 },
  { &vp9_highbd_fht16x16_c, &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_c>,
    16, 2 },
 #endif
  { &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_c>, 4, 1 },
  { &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_c>, 8, 1 },
  { &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_c>, 16, 1 }
 };
 INSTANTIATE_TEST_CASE_P(
    C, TransHT,
    ::testing::Combine(
        ::testing::Range(0, static_cast<int>(sizeof(ht_c_func_info) /
                                             sizeof(ht_c_func_info[0]))),
        ::testing::Values(ht_c_func_info), ::testing::Range(0, 4),
        ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
 #if !CONFIG_EMULATE_HARDWARE
 #if HAVE_NEON
 static const FuncInfo ht_neon_func_info[] = {
 #if CONFIG_VP9_HIGHBITDEPTH
  { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_neon>, 4,
    2 },
  { &vp9_highbd_fht8x8_c, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_neon>, 8,
    2 },
  { &vp9_highbd_fht16x16_c,
    &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_neon>, 16, 2 },
 #endif
  { &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_neon>, 4, 1 },
  { &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_neon>, 8, 1 },
  { &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_neon>, 16, 1 }
 };
 INSTANTIATE_TEST_CASE_P(
    NEON, TransHT,
    ::testing::Combine(
        ::testing::Range(0, static_cast<int>(sizeof(ht_neon_func_info) /
                                             sizeof(ht_neon_func_info[0]))),
        ::testing::Values(ht_neon_func_info), ::testing::Range(0, 4),
        ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
 #endif  // HAVE_NEON
 #if HAVE_SSE2
 static const FuncInfo ht_sse2_func_info[3] = {
  { &vp9_fht4x4_sse2, &iht_wrapper<vp9_iht4x4_16_add_sse2>, 4, 1 },
  { &vp9_fht8x8_sse2, &iht_wrapper<vp9_iht8x8_64_add_sse2>, 8, 1 },
  { &vp9_fht16x16_sse2, &iht_wrapper<vp9_iht16x16_256_add_sse2>, 16, 1 }
 };
 INSTANTIATE_TEST_CASE_P(SSE2, TransHT,
                        ::testing::Combine(::testing::Range(0, 3),
                                           ::testing::Values(ht_sse2_func_info),
                                           ::testing::Range(0, 4),
                                           ::testing::Values(VPX_BITS_8)));
 #endif  // HAVE_SSE2
 #if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
 static const FuncInfo ht_sse4_1_func_info[3] = {
  { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_sse4_1>,
    4, 2 },
  { vp9_highbd_fht8x8_c, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_sse4_1>,
    8, 2 },
  { &vp9_highbd_fht16x16_c,
    &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_sse4_1>, 16, 2 }
 };
 INSTANTIATE_TEST_CASE_P(
    SSE4_1, TransHT,
    ::testing::Combine(::testing::Range(0, 3),
                       ::testing::Values(ht_sse4_1_func_info),
                       ::testing::Range(0, 4),
                       ::testing::Values(VPX_BITS_8, VPX_BITS_10,
                                         VPX_BITS_12)));
 #endif  // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
 #endif  // !CONFIG_EMULATE_HARDWARE
 /* -------------------------------------------------------------------------- */
 class TransWHT : public TransTestBase {
 public:
  TransWHT() { fwd_txfm_ref = fwht_ref; }
 };
 TEST_P(TransWHT, AccuracyCheck) { RunAccuracyCheck(0); }
 TEST_P(TransWHT, CoeffCheck) { RunCoeffCheck(); }
 TEST_P(TransWHT, MemCheck) { RunMemCheck(); }
 TEST_P(TransWHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
 static const FuncInfo wht_c_func_info[] = {
 #if CONFIG_VP9_HIGHBITDEPTH
  { &fdct_wrapper<vp9_highbd_fwht4x4_c>,
    &highbd_idct_wrapper<vpx_highbd_iwht4x4_16_add_c>, 4, 2 },
 #endif
  { &fdct_wrapper<vp9_fwht4x4_c>, &idct_wrapper<vpx_iwht4x4_16_add_c>, 4, 1 }
 };
 INSTANTIATE_TEST_CASE_P(
    C, TransWHT,
    ::testing::Combine(
        ::testing::Range(0, static_cast<int>(sizeof(wht_c_func_info) /
                                             sizeof(wht_c_func_info[0]))),
        ::testing::Values(wht_c_func_info), ::testing::Values(0),
        ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
 #if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
 static const FuncInfo wht_sse2_func_info = {
  &fdct_wrapper<vp9_fwht4x4_sse2>, &idct_wrapper<vpx_iwht4x4_16_add_sse2>, 4, 1
 };
 INSTANTIATE_TEST_CASE_P(SSE2, TransWHT,
                        ::testing::Values(make_tuple(0, &wht_sse2_func_info, 0,
                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/decode_api_test.cc
+++ b/test/decode_api_test.cc
@ -172,21 +172,4 @@ TEST(DecodeAPI, Vp9PeekSI) {
 }
 #endif  // CONFIG_VP9_DECODER
 TEST(DecodeAPI, HighBitDepthCapability) {
 // VP8 should not claim VP9 HBD as a capability.
 #if CONFIG_VP8_DECODER
  const vpx_codec_caps_t vp8_caps = vpx_codec_get_caps(&vpx_codec_vp8_dx_algo);
  EXPECT_EQ(vp8_caps & VPX_CODEC_CAP_HIGHBITDEPTH, 0);
 #endif
 #if CONFIG_VP9_DECODER
  const vpx_codec_caps_t vp9_caps = vpx_codec_get_caps(&vpx_codec_vp9_dx_algo);
 #if CONFIG_VP9_HIGHBITDEPTH
  EXPECT_EQ(vp9_caps & VPX_CODEC_CAP_HIGHBITDEPTH, VPX_CODEC_CAP_HIGHBITDEPTH);
 #else
  EXPECT_EQ(vp9_caps & VPX_CODEC_CAP_HIGHBITDEPTH, 0);
 #endif
 #endif
 }
 }  // namespace
--- a/test/decode_perf_test.cc
+++ b/test/decode_perf_test.cc
@ -21,7 +21,7 @@
 #include "./ivfenc.h"
 #include "./vpx_version.h"
-using ::testing::make_tuple;
+using std::tr1::make_tuple;
 namespace {
@ -34,7 +34,7 @@ const char kNewEncodeOutputFile[] = "new_encode.ivf";
 /*
 DecodePerfTest takes a tuple of filename + number of threads to decode with
 */
-typedef ::testing::tuple<const char *, unsigned> DecodePerfParam;
+typedef std::tr1::tuple<const char *, unsigned> DecodePerfParam;
 const DecodePerfParam kVP9DecodePerfVectors[] = {
  make_tuple("vp90-2-bbb_426x240_tile_1x1_180kbps.webm", 1),
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@ -52,15 +52,14 @@ void DecoderTest::HandlePeekResult(Decoder *const decoder,
    /* Vp8's implementation of PeekStream returns an error if the frame you
     * pass it is not a keyframe, so we only expect VPX_CODEC_OK on the first
     * frame, which must be a keyframe. */
-    if (video->frame_number() == 0) {
+    if (video->frame_number() == 0)
-      ASSERT_EQ(VPX_CODEC_OK, res_peek)
+      ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: "
-          << "Peek return failed: " << vpx_codec_err_to_string(res_peek);
+                                        << vpx_codec_err_to_string(res_peek);
    }
  } else {
    /* The Vp9 implementation of PeekStream returns an error only if the
     * data passed to it isn't a valid Vp9 chunk. */
-    ASSERT_EQ(VPX_CODEC_OK, res_peek)
+    ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: "
-        << "Peek return failed: " << vpx_codec_err_to_string(res_peek);
+                                      << vpx_codec_err_to_string(res_peek);
  }
 }
--- a/test/encode_api_test.cc
+++ b/test/encode_api_test.cc
@ -62,134 +62,4 @@ TEST(EncodeAPI, InvalidParams) {
  }
 }
 TEST(EncodeAPI, HighBitDepthCapability) {
 // VP8 should not claim VP9 HBD as a capability.
 #if CONFIG_VP8_ENCODER
  const vpx_codec_caps_t vp8_caps = vpx_codec_get_caps(&vpx_codec_vp8_cx_algo);
  EXPECT_EQ(vp8_caps & VPX_CODEC_CAP_HIGHBITDEPTH, 0);
 #endif
 #if CONFIG_VP9_ENCODER
  const vpx_codec_caps_t vp9_caps = vpx_codec_get_caps(&vpx_codec_vp9_cx_algo);
 #if CONFIG_VP9_HIGHBITDEPTH
  EXPECT_EQ(vp9_caps & VPX_CODEC_CAP_HIGHBITDEPTH, VPX_CODEC_CAP_HIGHBITDEPTH);
 #else
  EXPECT_EQ(vp9_caps & VPX_CODEC_CAP_HIGHBITDEPTH, 0);
 #endif
 #endif
 }
 #if CONFIG_VP8_ENCODER
 TEST(EncodeAPI, ImageSizeSetting) {
  const int width = 711;
  const int height = 360;
  const int bps = 12;
  vpx_image_t img;
  vpx_codec_ctx_t enc;
  vpx_codec_enc_cfg_t cfg;
  uint8_t *img_buf = reinterpret_cast<uint8_t *>(
      calloc(width * height * bps / 8, sizeof(*img_buf)));
  vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &cfg, 0);
  cfg.g_w = width;
  cfg.g_h = height;
  vpx_img_wrap(&img, VPX_IMG_FMT_I420, width, height, 1, img_buf);
  vpx_codec_enc_init(&enc, vpx_codec_vp8_cx(), &cfg, 0);
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_encode(&enc, &img, 0, 1, 0, 0));
  free(img_buf);
  vpx_codec_destroy(&enc);
 }
 #endif
 // Set up 2 spatial streams with 2 temporal layers per stream, and generate
 // invalid configuration by setting the temporal layer rate allocation
 // (ts_target_bitrate[]) to 0 for both layers. This should fail independent of
 // CONFIG_MULTI_RES_ENCODING.
 TEST(EncodeAPI, MultiResEncode) {
  static const vpx_codec_iface_t *kCodecs[] = {
 #if CONFIG_VP8_ENCODER
    &vpx_codec_vp8_cx_algo,
 #endif
 #if CONFIG_VP9_ENCODER
    &vpx_codec_vp9_cx_algo,
 #endif
  };
  const int width = 1280;
  const int height = 720;
  const int width_down = width / 2;
  const int height_down = height / 2;
  const int target_bitrate = 1000;
  const int framerate = 30;
  for (int c = 0; c < NELEMENTS(kCodecs); ++c) {
    const vpx_codec_iface_t *const iface = kCodecs[c];
    vpx_codec_ctx_t enc[2];
    vpx_codec_enc_cfg_t cfg[2];
    vpx_rational_t dsf[2] = { { 2, 1 }, { 2, 1 } };
    memset(enc, 0, sizeof(enc));
    for (int i = 0; i < 2; i++) {
      vpx_codec_enc_config_default(iface, &cfg[i], 0);
    }
    /* Highest-resolution encoder settings */
    cfg[0].g_w = width;
    cfg[0].g_h = height;
    cfg[0].rc_dropframe_thresh = 0;
    cfg[0].rc_end_usage = VPX_CBR;
    cfg[0].rc_resize_allowed = 0;
    cfg[0].rc_min_quantizer = 2;
    cfg[0].rc_max_quantizer = 56;
    cfg[0].rc_undershoot_pct = 100;
    cfg[0].rc_overshoot_pct = 15;
    cfg[0].rc_buf_initial_sz = 500;
    cfg[0].rc_buf_optimal_sz = 600;
    cfg[0].rc_buf_sz = 1000;
    cfg[0].g_error_resilient = 1; /* Enable error resilient mode */
    cfg[0].g_lag_in_frames = 0;
    cfg[0].kf_mode = VPX_KF_AUTO;
    cfg[0].kf_min_dist = 3000;
    cfg[0].kf_max_dist = 3000;
    cfg[0].rc_target_bitrate = target_bitrate; /* Set target bitrate */
    cfg[0].g_timebase.num = 1;                 /* Set fps */
    cfg[0].g_timebase.den = framerate;
    memcpy(&cfg[1], &cfg[0], sizeof(cfg[0]));
    cfg[1].rc_target_bitrate = 500;
    cfg[1].g_w = width_down;
    cfg[1].g_h = height_down;
    for (int i = 0; i < 2; i++) {
      cfg[i].ts_number_layers = 2;
      cfg[i].ts_periodicity = 2;
      cfg[i].ts_rate_decimator[0] = 2;
      cfg[i].ts_rate_decimator[1] = 1;
      cfg[i].ts_layer_id[0] = 0;
      cfg[i].ts_layer_id[1] = 1;
      // Invalid parameters.
      cfg[i].ts_target_bitrate[0] = 0;
      cfg[i].ts_target_bitrate[1] = 0;
    }
    // VP9 should report incapable, VP8 invalid for all configurations.
    const char kVP9Name[] = "WebM Project VP9";
    const bool is_vp9 = strncmp(kVP9Name, vpx_codec_iface_name(iface),
                                sizeof(kVP9Name) - 1) == 0;
    EXPECT_EQ(is_vp9 ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM,
              vpx_codec_enc_init_multi(&enc[0], iface, &cfg[0], 2, 0, &dsf[0]));
    for (int i = 0; i < 2; i++) {
      vpx_codec_destroy(&enc[i]);
    }
  }
 }
 }  // namespace
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@ -201,8 +201,6 @@ void EncoderTest::RunLoop(VideoSource *video) {
      PreEncodeFrameHook(video, encoder.get());
      encoder->EncodeFrame(video, frame_flags_);
      PostEncodeFrameHook();
      CxDataIterator iter = encoder->GetCxData();
      bool has_cxdata = false;
@ -228,8 +226,6 @@ void EncoderTest::RunLoop(VideoSource *video) {
          case VPX_CODEC_PSNR_PKT: PSNRPktHook(pkt); break;
          case VPX_CODEC_STATS_PKT: StatsPktHook(pkt); break;
          default: break;
        }
      }
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@ -128,31 +128,17 @@ class Encoder {
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }
  void Control(int ctrl_id, struct vpx_svc_ref_frame_config *arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }
  void Control(int ctrl_id, struct vpx_svc_parameters *arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }
  void Control(int ctrl_id, struct vpx_svc_frame_drop *arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }
 #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
  void Control(int ctrl_id, vpx_active_map_t *arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }
  void Control(int ctrl_id, vpx_roi_map_t *arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }
 #endif
  void Config(const vpx_codec_enc_cfg_t *cfg) {
    const vpx_codec_err_t res = vpx_codec_enc_config_set(&encoder_, cfg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
@ -226,17 +212,12 @@ class EncoderTest {
  virtual void PreEncodeFrameHook(VideoSource * /*video*/,
                                  Encoder * /*encoder*/) {}
  virtual void PostEncodeFrameHook() {}
  // Hook to be called on every compressed data packet.
  virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}
  // Hook to be called on every PSNR packet.
  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}
  // Hook to be called on every first pass stats packet.
  virtual void StatsPktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}
  // Hook to determine whether the encode loop should continue.
  virtual bool Continue() const {
    return !(::testing::Test::HasFatalFailure() || abort_);
--- a/test/examples.sh
+++ b/test/examples.sh
@ -15,7 +15,7 @@
 example_tests=$(ls $(dirname $0)/*.sh)
 # List of script names to exclude.
-exclude_list="examples stress tools_common"
+exclude_list="examples tools_common"
 # Filter out the scripts in $exclude_list.
 for word in ${exclude_list}; do
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@ -34,8 +34,7 @@ struct ExternalFrameBuffer {
 // Class to manipulate a list of external frame buffers.
 class ExternalFrameBufferList {
 public:
-  ExternalFrameBufferList()
+  ExternalFrameBufferList() : num_buffers_(0), ext_fb_list_(NULL) {}
      : num_buffers_(0), num_used_buffers_(0), ext_fb_list_(NULL) {}
  virtual ~ExternalFrameBufferList() {
    for (int i = 0; i < num_buffers_; ++i) {
@ -72,8 +71,6 @@ class ExternalFrameBufferList {
    }
    SetFrameBuffer(idx, fb);
    num_used_buffers_++;
    return 0;
  }
@ -109,7 +106,6 @@ class ExternalFrameBufferList {
    }
    EXPECT_EQ(1, ext_fb->in_use);
    ext_fb->in_use = 0;
    num_used_buffers_--;
    return 0;
  }
@ -125,8 +121,6 @@ class ExternalFrameBufferList {
    }
  }
  int num_used_buffers() const { return num_used_buffers_; }
 private:
  // Returns the index of the first free frame buffer. Returns |num_buffers_|
  // if there are no free frame buffers.
@ -151,7 +145,6 @@ class ExternalFrameBufferList {
  }
  int num_buffers_;
  int num_used_buffers_;
  ExternalFrameBuffer *ext_fb_list_;
 };
@ -227,8 +220,8 @@ class ExternalFrameBufferMD5Test
  void OpenMD5File(const std::string &md5_file_name_) {
    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
-    ASSERT_TRUE(md5_file_ != NULL)
+    ASSERT_TRUE(md5_file_ != NULL) << "Md5 file open failed. Filename: "
-        << "Md5 file open failed. Filename: " << md5_file_name_;
+                                   << md5_file_name_;
  }
  virtual void DecompressedFrameHook(const vpx_image_t &img,
@ -280,7 +273,6 @@ class ExternalFrameBufferMD5Test
 #if CONFIG_WEBM_IO
 const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
 const char kVP9NonRefTestFile[] = "vp90-2-22-svc_1280x720_1.webm";
 // Class for testing passing in external frame buffers to libvpx.
 class ExternalFrameBufferTest : public ::testing::Test {
@ -300,9 +292,7 @@ class ExternalFrameBufferTest : public ::testing::Test {
  virtual void TearDown() {
    delete decoder_;
    decoder_ = NULL;
    delete video_;
    video_ = NULL;
  }
  // Passes the external frame buffer information to libvpx.
@ -335,7 +325,7 @@ class ExternalFrameBufferTest : public ::testing::Test {
    return VPX_CODEC_OK;
  }
- protected:
+ private:
  void CheckDecodedFrames() {
    libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData();
    const vpx_image_t *img = NULL;
@ -351,25 +341,6 @@ class ExternalFrameBufferTest : public ::testing::Test {
  int num_buffers_;
  ExternalFrameBufferList fb_list_;
 };
 class ExternalFrameBufferNonRefTest : public ExternalFrameBufferTest {
 protected:
  virtual void SetUp() {
    video_ = new libvpx_test::WebMVideoSource(kVP9NonRefTestFile);
    ASSERT_TRUE(video_ != NULL);
    video_->Init();
    video_->Begin();
    vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
    decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
    ASSERT_TRUE(decoder_ != NULL);
  }
  virtual void CheckFrameBufferRelease() {
    TearDown();
    ASSERT_EQ(0, fb_list_.num_used_buffers());
  }
 };
 #endif  // CONFIG_WEBM_IO
 // This test runs through the set of test vectors, and decodes them.
@ -448,8 +419,6 @@ TEST_F(ExternalFrameBufferTest, NotEnoughBuffers) {
            SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer,
                                    release_vp9_frame_buffer));
  ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame());
  // Only run this on long clips. Decoding a very short clip will return
  // VPX_CODEC_OK even with only 2 buffers.
  ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeRemainingFrames());
 }
@ -498,15 +467,6 @@ TEST_F(ExternalFrameBufferTest, SetAfterDecode) {
            SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer,
                                    release_vp9_frame_buffer));
 }
 TEST_F(ExternalFrameBufferNonRefTest, ReleaseNonRefFrameBuffer) {
  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
  ASSERT_EQ(VPX_CODEC_OK,
            SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer,
                                    release_vp9_frame_buffer));
  ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames());
  CheckFrameBufferRelease();
 }
 #endif  // CONFIG_WEBM_IO
 VP9_INSTANTIATE_TEST_CASE(
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@ -0,0 +1,512 @@
 /*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vp9_rtcd.h"
 #include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_ports/mem.h"
 using libvpx_test::ACMRandom;
 namespace {
 const int kNumCoeffs = 16;
 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
                        int tx_type);
 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                        int tx_type);
 typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct4x4Param;
 typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param;
 void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
                 int /*tx_type*/) {
  vpx_fdct4x4_c(in, out, stride);
 }
 void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
  vp9_fht4x4_c(in, out, stride, tx_type);
 }
 void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
                 int /*tx_type*/) {
  vp9_fwht4x4_c(in, out, stride);
 }
 #if CONFIG_VP9_HIGHBITDEPTH
 void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
  vpx_highbd_idct4x4_16_add_c(in, out, stride, 10);
 }
 void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
  vpx_highbd_idct4x4_16_add_c(in, out, stride, 12);
 }
 void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  vp9_highbd_iht4x4_16_add_c(in, out, stride, tx_type, 10);
 }
 void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  vp9_highbd_iht4x4_16_add_c(in, out, stride, tx_type, 12);
 }
 void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 10);
 }
 void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 12);
 }
 #if HAVE_SSE2
 void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
 }
 void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
 }
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 class Trans4x4TestBase {
 public:
  virtual ~Trans4x4TestBase() {}
 protected:
  virtual void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) = 0;
  virtual void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) = 0;
  void RunAccuracyCheck(int limit) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    uint32_t max_error = 0;
    int64_t total_error = 0;
    const int count_test_block = 10000;
    for (int i = 0; i < count_test_block; ++i) {
      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 #endif
      // Initialize a test block with input range [-255, 255].
      for (int j = 0; j < kNumCoeffs; ++j) {
        if (bit_depth_ == VPX_BITS_8) {
          src[j] = rnd.Rand8();
          dst[j] = rnd.Rand8();
          test_input_block[j] = src[j] - dst[j];
 #if CONFIG_VP9_HIGHBITDEPTH
        } else {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          test_input_block[j] = src16[j] - dst16[j];
 #endif
        }
      }
      ASM_REGISTER_STATE_CHECK(
          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }
      for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
        const int diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
        ASSERT_EQ(VPX_BITS_8, bit_depth_);
        const int diff = dst[j] - src[j];
 #endif
        const uint32_t error = diff * diff;
        if (max_error < error) max_error = error;
        total_error += error;
      }
    }
    EXPECT_GE(static_cast<uint32_t>(limit), max_error)
        << "Error: 4x4 FHT/IHT has an individual round trip error > " << limit;
    EXPECT_GE(count_test_block * limit, total_error)
        << "Error: 4x4 FHT/IHT has average round trip error > " << limit
        << " per block";
  }
  void RunCoeffCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 5000;
    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < kNumCoeffs; ++j) {
        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
      }
      fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
      // The minimum quant value is 4.
      for (int j = 0; j < kNumCoeffs; ++j)
        EXPECT_EQ(output_block[j], output_ref_block[j]);
    }
  }
  void RunMemCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 5000;
    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < kNumCoeffs; ++j) {
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
      if (i == 0) {
        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
      } else if (i == 1) {
        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
      }
      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
      ASM_REGISTER_STATE_CHECK(
          RunFwdTxfm(input_extreme_block, output_block, pitch_));
      // The minimum quant value is 4.
      for (int j = 0; j < kNumCoeffs; ++j) {
        EXPECT_EQ(output_block[j], output_ref_block[j]);
        EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
            << "Error: 4x4 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
      }
    }
  }
  void RunInvAccuracyCheck(int limit) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 #endif
    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < kNumCoeffs; ++j) {
        if (bit_depth_ == VPX_BITS_8) {
          src[j] = rnd.Rand8();
          dst[j] = rnd.Rand8();
          in[j] = src[j] - dst[j];
 #if CONFIG_VP9_HIGHBITDEPTH
        } else {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          in[j] = src16[j] - dst16[j];
 #endif
        }
      }
      fwd_txfm_ref(in, coeff, pitch_, tx_type_);
      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }
      for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
        const int diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
        const int diff = dst[j] - src[j];
 #endif
        const uint32_t error = diff * diff;
        EXPECT_GE(static_cast<uint32_t>(limit), error)
            << "Error: 4x4 IDCT has error " << error << " at index " << j;
      }
    }
  }
  int pitch_;
  int tx_type_;
  FhtFunc fwd_txfm_ref;
  vpx_bit_depth_t bit_depth_;
  int mask_;
 };
 class Trans4x4DCT : public Trans4x4TestBase,
                    public ::testing::TestWithParam<Dct4x4Param> {
 public:
  virtual ~Trans4x4DCT() {}
  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    tx_type_ = GET_PARAM(2);
    pitch_ = 4;
    fwd_txfm_ref = fdct4x4_ref;
    bit_depth_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
 protected:
  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
    fwd_txfm_(in, out, stride);
  }
  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
    inv_txfm_(out, dst, stride);
  }
  FdctFunc fwd_txfm_;
  IdctFunc inv_txfm_;
 };
 TEST_P(Trans4x4DCT, AccuracyCheck) { RunAccuracyCheck(1); }
 TEST_P(Trans4x4DCT, CoeffCheck) { RunCoeffCheck(); }
 TEST_P(Trans4x4DCT, MemCheck) { RunMemCheck(); }
 TEST_P(Trans4x4DCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
 class Trans4x4HT : public Trans4x4TestBase,
                   public ::testing::TestWithParam<Ht4x4Param> {
 public:
  virtual ~Trans4x4HT() {}
  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    tx_type_ = GET_PARAM(2);
    pitch_ = 4;
    fwd_txfm_ref = fht4x4_ref;
    bit_depth_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
 protected:
  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
    fwd_txfm_(in, out, stride, tx_type_);
  }
  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
    inv_txfm_(out, dst, stride, tx_type_);
  }
  FhtFunc fwd_txfm_;
  IhtFunc inv_txfm_;
 };
 TEST_P(Trans4x4HT, AccuracyCheck) { RunAccuracyCheck(1); }
 TEST_P(Trans4x4HT, CoeffCheck) { RunCoeffCheck(); }
 TEST_P(Trans4x4HT, MemCheck) { RunMemCheck(); }
 TEST_P(Trans4x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
 class Trans4x4WHT : public Trans4x4TestBase,
                    public ::testing::TestWithParam<Dct4x4Param> {
 public:
  virtual ~Trans4x4WHT() {}
  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    tx_type_ = GET_PARAM(2);
    pitch_ = 4;
    fwd_txfm_ref = fwht4x4_ref;
    bit_depth_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
 protected:
  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
    fwd_txfm_(in, out, stride);
  }
  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
    inv_txfm_(out, dst, stride);
  }
  FdctFunc fwd_txfm_;
  IdctFunc inv_txfm_;
 };
 TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0); }
 TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); }
 TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); }
 TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
 using std::tr1::make_tuple;
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4DCT,
    ::testing::Values(
        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(C, Trans4x4DCT,
                        ::testing::Values(make_tuple(&vpx_fdct4x4_c,
                                                     &vpx_idct4x4_16_add_c, 0,
                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4HT,
    ::testing::Values(
        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 1, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 2, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 3, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 0, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 1, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 2, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 3, VPX_BITS_12),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4HT,
    ::testing::Values(
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4WHT,
    ::testing::Values(
        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12),
        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(C, Trans4x4WHT,
                        ::testing::Values(make_tuple(&vp9_fwht4x4_c,
                                                     &vpx_iwht4x4_16_add_c, 0,
                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(NEON, Trans4x4DCT,
                        ::testing::Values(make_tuple(&vpx_fdct4x4_c,
                                                     &vpx_idct4x4_16_add_neon,
                                                     0, VPX_BITS_8)));
 #endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans4x4HT,
    ::testing::Values(
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
 #endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4WHT,
    ::testing::Values(
        make_tuple(&vp9_fwht4x4_sse2, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));
 #endif
 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(SSE2, Trans4x4DCT,
                        ::testing::Values(make_tuple(&vpx_fdct4x4_sse2,
                                                     &vpx_idct4x4_16_add_sse2,
                                                     0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4HT,
    ::testing::Values(
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3, VPX_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4DCT,
    ::testing::Values(
        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10_sse2, 0, VPX_BITS_10),
        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12_sse2, 0, VPX_BITS_12),
        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
        make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4HT,
    ::testing::Values(
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(MSA, Trans4x4DCT,
                        ::testing::Values(make_tuple(&vpx_fdct4x4_msa,
                                                     &vpx_idct4x4_16_add_msa, 0,
                                                     VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    MSA, Trans4x4HT,
    ::testing::Values(
        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 3, VPX_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@ -43,9 +43,9 @@ typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                        int tx_type);
-typedef ::testing::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
+typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
-typedef ::testing::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
-typedef ::testing::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
+typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
 void reference_8x8_dct_1d(const double in[8], double out[8]) {
  const double kInvSqrt2 = 0.707106781186547524400844362104;
@ -88,45 +88,45 @@ void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
 #if CONFIG_VP9_HIGHBITDEPTH
 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct8x8_64_add_c(in, out, stride, 10);
 }
 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct8x8_64_add_c(in, out, stride, 12);
 }
 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
-  vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
+  vp9_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 10);
 }
 void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
-  vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
+  vp9_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 12);
 }
 #if HAVE_SSE2
 void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct8x8_12_add_c(in, out, stride, 10);
 }
 void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct8x8_12_add_c(in, out, stride, 12);
 }
 void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct8x8_12_add_sse2(in, out, stride, 10);
 }
 void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct8x8_12_add_sse2(in, out, stride, 12);
 }
 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
 }
 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
 }
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@ -257,7 +257,7 @@ class FwdTrans8x8TestBase {
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }
@ -340,7 +340,7 @@ class FwdTrans8x8TestBase {
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }
@ -413,7 +413,7 @@ class FwdTrans8x8TestBase {
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }
@ -497,9 +497,9 @@ class FwdTrans8x8TestBase {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
-        ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
+        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }
@ -511,8 +511,8 @@ class FwdTrans8x8TestBase {
        const int diff = dst[j] - ref[j];
 #endif
        const uint32_t error = diff * diff;
-        EXPECT_EQ(0u, error)
+        EXPECT_EQ(0u, error) << "Error: 8x8 IDCT has error " << error
-            << "Error: 8x8 IDCT has error " << error << " at index " << j;
+                             << " at index " << j;
      }
    }
  }
@ -628,7 +628,7 @@ TEST_P(InvTrans8x8DCT, CompareReference) {
  CompareInvReference(ref_txfm_, thresh_);
 }
-using ::testing::make_tuple;
+using std::tr1::make_tuple;
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@ -670,13 +670,14 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
                        ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
                                                     &vpx_idct8x8_64_add_neon,
                                                     0, VPX_BITS_8)));
 #endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-#if !CONFIG_VP9_HIGHBITDEPTH
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, FwdTrans8x8HT,
    ::testing::Values(
@ -684,8 +685,7 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
-#endif  // !CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
@ -740,7 +740,7 @@ INSTANTIATE_TEST_CASE_P(
    !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT,
                        ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
-                                                     &vpx_idct8x8_64_add_sse2,
+                                                     &vpx_idct8x8_64_add_ssse3,
                                                     0, VPX_BITS_8)));
 #endif
@ -757,11 +757,4 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(VSX, FwdTrans8x8DCT,
                        ::testing::Values(make_tuple(&vpx_fdct8x8_c,
                                                     &vpx_idct8x8_64_add_vsx, 0,
                                                     VPX_BITS_8)));
 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/hadamard_test.cc
+++ b/test/hadamard_test.cc
@ -13,7 +13,6 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_ports/vpx_timer.h"
 #include "test/acm_random.h"
 #include "test/register_state_check.h"
@ -22,8 +21,7 @@ namespace {
 using ::libvpx_test::ACMRandom;
-typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride,
+typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b);
                             tran_low_t *b);
 void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
  int16_t b[8];
@ -48,16 +46,18 @@ void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
  out[5] = c[3] - c[7];
 }
-void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) {
+void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) {
  int16_t buf[64];
-  int16_t buf2[64];
+  for (int i = 0; i < 8; ++i) {
-  for (int i = 0; i < 8; ++i) hadamard_loop(a + i, a_stride, buf + i * 8);
+    hadamard_loop(a + i, a_stride, buf + i * 8);
-  for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, 8, buf2 + i * 8);
+  }
-  for (int i = 0; i < 64; ++i) b[i] = (tran_low_t)buf2[i];
+  for (int i = 0; i < 8; ++i) {
    hadamard_loop(buf + i, 8, b + i * 8);
  }
 }
-void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
+void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) {
  /* The source is a 16x16 block. The destination is rearranged to 8x32.
   * Input is 9 bit. */
  reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
@ -68,16 +68,16 @@ void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
  /* Overlay the 8x8 blocks and combine. */
  for (int i = 0; i < 64; ++i) {
    /* 8x8 steps the range up to 15 bits. */
-    const tran_low_t a0 = b[0];
+    const int16_t a0 = b[0];
-    const tran_low_t a1 = b[64];
+    const int16_t a1 = b[64];
-    const tran_low_t a2 = b[128];
+    const int16_t a2 = b[128];
-    const tran_low_t a3 = b[192];
+    const int16_t a3 = b[192];
    /* Prevent the result from escaping int16_t. */
-    const tran_low_t b0 = (a0 + a1) >> 1;
+    const int16_t b0 = (a0 + a1) >> 1;
-    const tran_low_t b1 = (a0 - a1) >> 1;
+    const int16_t b1 = (a0 - a1) >> 1;
-    const tran_low_t b2 = (a2 + a3) >> 1;
+    const int16_t b2 = (a2 + a3) >> 1;
-    const tran_low_t b3 = (a2 - a3) >> 1;
+    const int16_t b3 = (a2 - a3) >> 1;
    /* Store a 16 bit value. */
    b[0] = b0 + b2;
@ -101,35 +101,12 @@ class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
  ACMRandom rnd_;
 };
 void HadamardSpeedTest(const char *name, HadamardFunc const func,
                       const int16_t *input, int stride, tran_low_t *output,
                       int times) {
  int i;
  vpx_usec_timer timer;
  vpx_usec_timer_start(&timer);
  for (i = 0; i < times; ++i) {
    func(input, stride, output);
  }
  vpx_usec_timer_mark(&timer);
  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  printf("%s[%12d runs]: %d us\n", name, times, elapsed_time);
 }
 class Hadamard8x8Test : public HadamardTestBase {};
 void HadamardSpeedTest8x8(HadamardFunc const func, int times) {
  DECLARE_ALIGNED(16, int16_t, input[64]);
  DECLARE_ALIGNED(16, tran_low_t, output[64]);
  memset(input, 1, sizeof(input));
  HadamardSpeedTest("Hadamard8x8", func, input, 8, output, times);
 }
 TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
  DECLARE_ALIGNED(16, int16_t, a[64]);
-  DECLARE_ALIGNED(16, tran_low_t, b[64]);
+  DECLARE_ALIGNED(16, int16_t, b[64]);
-  tran_low_t b_ref[64];
+  int16_t b_ref[64];
  for (int i = 0; i < 64; ++i) {
    a[i] = rnd_.Rand9Signed();
  }
@ -147,8 +124,8 @@ TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
 TEST_P(Hadamard8x8Test, VaryStride) {
  DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
-  DECLARE_ALIGNED(16, tran_low_t, b[64]);
+  DECLARE_ALIGNED(16, int16_t, b[64]);
-  tran_low_t b_ref[64];
+  int16_t b_ref[64];
  for (int i = 0; i < 64 * 8; ++i) {
    a[i] = rnd_.Rand9Signed();
  }
@ -167,12 +144,6 @@ TEST_P(Hadamard8x8Test, VaryStride) {
  }
 }
 TEST_P(Hadamard8x8Test, DISABLED_Speed) {
  HadamardSpeedTest8x8(h_func_, 10);
  HadamardSpeedTest8x8(h_func_, 10000);
  HadamardSpeedTest8x8(h_func_, 10000000);
 }
 INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
                        ::testing::Values(&vpx_hadamard_8x8_c));
@ -191,33 +162,12 @@ INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
                        ::testing::Values(&vpx_hadamard_8x8_neon));
 #endif  // HAVE_NEON
 // TODO(jingning): Remove highbitdepth flag when the SIMD functions are
 // in place and turn on the unit test.
 #if !CONFIG_VP9_HIGHBITDEPTH
 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(MSA, Hadamard8x8Test,
                        ::testing::Values(&vpx_hadamard_8x8_msa));
 #endif  // HAVE_MSA
 #endif  // !CONFIG_VP9_HIGHBITDEPTH
 #if HAVE_VSX
 INSTANTIATE_TEST_CASE_P(VSX, Hadamard8x8Test,
                        ::testing::Values(&vpx_hadamard_8x8_vsx));
 #endif  // HAVE_VSX
 class Hadamard16x16Test : public HadamardTestBase {};
 void HadamardSpeedTest16x16(HadamardFunc const func, int times) {
  DECLARE_ALIGNED(16, int16_t, input[256]);
  DECLARE_ALIGNED(16, tran_low_t, output[256]);
  memset(input, 1, sizeof(input));
  HadamardSpeedTest("Hadamard16x16", func, input, 16, output, times);
 }
 TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
  DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
-  DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]);
+  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
-  tran_low_t b_ref[16 * 16];
+  int16_t b_ref[16 * 16];
  for (int i = 0; i < 16 * 16; ++i) {
    a[i] = rnd_.Rand9Signed();
  }
@ -235,8 +185,8 @@ TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
 TEST_P(Hadamard16x16Test, VaryStride) {
  DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
-  DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]);
+  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
-  tran_low_t b_ref[16 * 16];
+  int16_t b_ref[16 * 16];
  for (int i = 0; i < 16 * 16 * 8; ++i) {
    a[i] = rnd_.Rand9Signed();
  }
@ -255,12 +205,6 @@ TEST_P(Hadamard16x16Test, VaryStride) {
  }
 }
 TEST_P(Hadamard16x16Test, DISABLED_Speed) {
  HadamardSpeedTest16x16(h_func_, 10);
  HadamardSpeedTest16x16(h_func_, 10000);
  HadamardSpeedTest16x16(h_func_, 10000000);
 }
 INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
                        ::testing::Values(&vpx_hadamard_16x16_c));
@ -269,25 +213,8 @@ INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
                        ::testing::Values(&vpx_hadamard_16x16_sse2));
 #endif  // HAVE_SSE2
 #if HAVE_AVX2
 INSTANTIATE_TEST_CASE_P(AVX2, Hadamard16x16Test,
                        ::testing::Values(&vpx_hadamard_16x16_avx2));
 #endif  // HAVE_AVX2
 #if HAVE_VSX
 INSTANTIATE_TEST_CASE_P(VSX, Hadamard16x16Test,
                        ::testing::Values(&vpx_hadamard_16x16_vsx));
 #endif  // HAVE_VSX
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
                        ::testing::Values(&vpx_hadamard_16x16_neon));
 #endif  // HAVE_NEON
 #if !CONFIG_VP9_HIGHBITDEPTH
 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(MSA, Hadamard16x16Test,
                        ::testing::Values(&vpx_hadamard_16x16_msa));
 #endif  // HAVE_MSA
 #endif  // !CONFIG_VP9_HIGHBITDEPTH
 }  // namespace
--- a/test/idct_test.cc
+++ b/test/idct_test.cc
@ -13,7 +13,6 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "test/buffer.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "vpx/vpx_integer.h"
@ -22,156 +21,110 @@ typedef void (*IdctFunc)(int16_t *input, unsigned char *pred_ptr,
                         int pred_stride, unsigned char *dst_ptr,
                         int dst_stride);
 namespace {
 using libvpx_test::Buffer;
 class IDCTTest : public ::testing::TestWithParam<IdctFunc> {
 protected:
  virtual void SetUp() {
    int i;
    UUT = GetParam();
-
+    memset(input, 0, sizeof(input));
-    input = new Buffer<int16_t>(4, 4, 0);
+    /* Set up guard blocks */
-    ASSERT_TRUE(input != NULL);
+    for (i = 0; i < 256; i++) output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1;
    ASSERT_TRUE(input->Init());
    predict = new Buffer<uint8_t>(4, 4, 3);
    ASSERT_TRUE(predict != NULL);
    ASSERT_TRUE(predict->Init());
    output = new Buffer<uint8_t>(4, 4, 3);
    ASSERT_TRUE(output != NULL);
    ASSERT_TRUE(output->Init());
  }
-  virtual void TearDown() {
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
    delete input;
    delete predict;
    delete output;
    libvpx_test::ClearSystemState();
  }
  IdctFunc UUT;
-  Buffer<int16_t> *input;
+  int16_t input[16];
-  Buffer<uint8_t> *predict;
+  unsigned char output[256];
-  Buffer<uint8_t> *output;
+  unsigned char predict[256];
 };
 TEST_P(IDCTTest, TestGuardBlocks) {
  int i;
  for (i = 0; i < 256; i++) {
    if ((i & 0xF) < 4 && i < 64)
      EXPECT_EQ(0, output[i]) << i;
    else
      EXPECT_EQ(255, output[i]);
  }
 }
 TEST_P(IDCTTest, TestAllZeros) {
-  // When the input is '0' the output will be '0'.
+  int i;
  input->Set(0);
  predict->Set(0);
  output->Set(0);
-  ASM_REGISTER_STATE_CHECK(UUT(input->TopLeftPixel(), predict->TopLeftPixel(),
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
                               predict->stride(), output->TopLeftPixel(),
                               output->stride()));
-  ASSERT_TRUE(input->CheckValues(0));
+  for (i = 0; i < 256; i++) {
-  ASSERT_TRUE(input->CheckPadding());
+    if ((i & 0xF) < 4 && i < 64)
-  ASSERT_TRUE(output->CheckValues(0));
+      EXPECT_EQ(0, output[i]) << "i==" << i;
-  ASSERT_TRUE(output->CheckPadding());
+    else
      EXPECT_EQ(255, output[i]) << "i==" << i;
  }
 }
 TEST_P(IDCTTest, TestAllOnes) {
-  input->Set(0);
+  int i;
  // When the first element is '4' it will fill the output buffer with '1'.
  input->TopLeftPixel()[0] = 4;
  predict->Set(0);
  output->Set(0);
-  ASM_REGISTER_STATE_CHECK(UUT(input->TopLeftPixel(), predict->TopLeftPixel(),
+  input[0] = 4;
-                               predict->stride(), output->TopLeftPixel(),
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
                               output->stride()));
-  ASSERT_TRUE(output->CheckValues(1));
+  for (i = 0; i < 256; i++) {
-  ASSERT_TRUE(output->CheckPadding());
+    if ((i & 0xF) < 4 && i < 64)
      EXPECT_EQ(1, output[i]) << "i==" << i;
    else
      EXPECT_EQ(255, output[i]) << "i==" << i;
  }
 }
 TEST_P(IDCTTest, TestAddOne) {
-  // Set the transform output to '1' and make sure it gets added to the
+  int i;
  // prediction buffer.
  input->Set(0);
  input->TopLeftPixel()[0] = 4;
  output->Set(0);
-  uint8_t *pred = predict->TopLeftPixel();
+  for (i = 0; i < 256; i++) predict[i] = i;
-  for (int y = 0; y < 4; ++y) {
+  input[0] = 4;
-    for (int x = 0; x < 4; ++x) {
+  ASM_REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
-      pred[y * predict->stride() + x] = y * 4 + x;
+
-    }
+  for (i = 0; i < 256; i++) {
    if ((i & 0xF) < 4 && i < 64)
      EXPECT_EQ(i + 1, output[i]) << "i==" << i;
    else
      EXPECT_EQ(255, output[i]) << "i==" << i;
  }
  ASM_REGISTER_STATE_CHECK(UUT(input->TopLeftPixel(), predict->TopLeftPixel(),
                               predict->stride(), output->TopLeftPixel(),
                               output->stride()));
  uint8_t const *out = output->TopLeftPixel();
  for (int y = 0; y < 4; ++y) {
    for (int x = 0; x < 4; ++x) {
      EXPECT_EQ(1 + y * 4 + x, out[y * output->stride() + x]);
    }
  }
  if (HasFailure()) {
    output->DumpBuffer();
  }
  ASSERT_TRUE(output->CheckPadding());
 }
 TEST_P(IDCTTest, TestWithData) {
-  // Test a single known input.
+  int i;
  predict->Set(0);
-  int16_t *in = input->TopLeftPixel();
+  for (i = 0; i < 16; i++) input[i] = i;
-  for (int y = 0; y < 4; ++y) {
+
-    for (int x = 0; x < 4; ++x) {
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
-      in[y * input->stride() + x] = y * 4 + x;
+
-    }
+  for (i = 0; i < 256; i++) {
    if ((i & 0xF) > 3 || i > 63)
      EXPECT_EQ(255, output[i]) << "i==" << i;
    else if (i == 0)
      EXPECT_EQ(11, output[i]) << "i==" << i;
    else if (i == 34)
      EXPECT_EQ(1, output[i]) << "i==" << i;
    else if (i == 2 || i == 17 || i == 32)
      EXPECT_EQ(3, output[i]) << "i==" << i;
    else
      EXPECT_EQ(0, output[i]) << "i==" << i;
  }
  ASM_REGISTER_STATE_CHECK(UUT(input->TopLeftPixel(), predict->TopLeftPixel(),
                               predict->stride(), output->TopLeftPixel(),
                               output->stride()));
  uint8_t *out = output->TopLeftPixel();
  for (int y = 0; y < 4; ++y) {
    for (int x = 0; x < 4; ++x) {
      switch (y * 4 + x) {
        case 0: EXPECT_EQ(11, out[y * output->stride() + x]); break;
        case 2:
        case 5:
        case 8: EXPECT_EQ(3, out[y * output->stride() + x]); break;
        case 10: EXPECT_EQ(1, out[y * output->stride() + x]); break;
        default: EXPECT_EQ(0, out[y * output->stride() + x]);
      }
    }
  }
  if (HasFailure()) {
    output->DumpBuffer();
  }
  ASSERT_TRUE(output->CheckPadding());
 }
 INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c));
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(NEON, IDCTTest,
                        ::testing::Values(vp8_short_idct4x4llm_neon));
-#endif  // HAVE_NEON
+#endif
 #if HAVE_MMX
 INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
                        ::testing::Values(vp8_short_idct4x4llm_mmx));
-#endif  // HAVE_MMX
+#endif
 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(MSA, IDCTTest,
                        ::testing::Values(vp8_short_idct4x4llm_msa));
-#endif  // HAVE_MSA
+#endif
-
+}
 #if HAVE_MMI
 INSTANTIATE_TEST_CASE_P(MMI, IDCTTest,
                        ::testing::Values(vp8_short_idct4x4llm_mmi));
 #endif  // HAVE_MMI
 }  // namespace
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@ -45,8 +45,8 @@ class InvalidFileTest : public ::libvpx_test::DecoderTest,
  void OpenResFile(const std::string &res_file_name_) {
    res_file_ = libvpx_test::OpenTestDataFile(res_file_name_);
-    ASSERT_TRUE(res_file_ != NULL)
+    ASSERT_TRUE(res_file_ != NULL) << "Result file open failed. Filename: "
-        << "Result file open failed. Filename: " << res_file_name_;
+                                   << res_file_name_;
  }
  virtual bool HandleDecodeResult(
@ -120,24 +120,11 @@ class InvalidFileTest : public ::libvpx_test::DecoderTest,
 TEST_P(InvalidFileTest, ReturnCode) { RunTest(); }
 #if CONFIG_VP8_DECODER
 const DecodeParam kVP8InvalidFileTests[] = {
  { 1, "invalid-bug-1443.ivf" },
  { 1, "invalid-token-partition.ivf" },
 };
 VP8_INSTANTIATE_TEST_CASE(InvalidFileTest,
                          ::testing::ValuesIn(kVP8InvalidFileTests));
 #endif  // CONFIG_VP8_DECODER
 #if CONFIG_VP9_DECODER
 const DecodeParam kVP9InvalidFileTests[] = {
  { 1, "invalid-vp90-02-v2.webm" },
 #if CONFIG_VP9_HIGHBITDEPTH
  { 1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf" },
  { 1,
    "invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-."
    "ivf" },
 #endif
  { 1, "invalid-vp90-03-v3.webm" },
  { 1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf" },
@ -177,12 +164,12 @@ class InvalidFileInvalidPeekTest : public InvalidFileTest {
 TEST_P(InvalidFileInvalidPeekTest, ReturnCode) { RunTest(); }
 #if CONFIG_VP8_DECODER
-const DecodeParam kVP8InvalidPeekTests[] = {
+const DecodeParam kVP8InvalidFileTests[] = {
  { 1, "invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf" },
 };
 VP8_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,
-                          ::testing::ValuesIn(kVP8InvalidPeekTests));
+                          ::testing::ValuesIn(kVP8InvalidFileTests));
 #endif  // CONFIG_VP8_DECODER
 #if CONFIG_VP9_DECODER
--- a/test/ivf_video_source.h
+++ b/test/ivf_video_source.h
@ -47,8 +47,8 @@ class IVFVideoSource : public CompressedVideoSource {
  virtual void Begin() {
    input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL)
+    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
-        << "Input file open failed. Filename: " << file_name_;
+                                     << file_name_;
    // Read file header
    uint8_t file_hdr[kIvfFileHdrSize];
--- a/test/keyframe_test.cc
+++ b/test/keyframe_test.cc
@ -68,9 +68,7 @@ TEST_P(KeyframeTest, TestRandomVideoSource) {
  // In realtime mode - auto placed keyframes are exceedingly rare,  don't
  // bother with this check   if(GetParam() > 0)
-  if (GET_PARAM(1) > 0) {
+  if (GET_PARAM(1) > 0) EXPECT_GT(kf_count_, 1);
    EXPECT_GT(kf_count_, 1);
  }
 }
 TEST_P(KeyframeTest, TestDisableKeyframes) {
@ -130,16 +128,15 @@ TEST_P(KeyframeTest, TestAutoKeyframe) {
  // In realtime mode - auto placed keyframes are exceedingly rare,  don't
  // bother with this check
-  if (GET_PARAM(1) > 0) {
+  if (GET_PARAM(1) > 0)
    EXPECT_EQ(2u, kf_pts_list_.size()) << " Not the right number of keyframes ";
  }
  // Verify that keyframes match the file keyframes in the file.
  for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
       iter != kf_pts_list_.end(); ++iter) {
    if (deadline_ == VPX_DL_REALTIME && *iter > 0)
-      EXPECT_EQ(0, (*iter - 1) % 30)
+      EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame "
-          << "Unexpected keyframe at frame " << *iter;
+                                     << *iter;
    else
      EXPECT_EQ(0, *iter % 30) << "Unexpected keyframe at frame " << *iter;
  }
--- a/test/level_test.cc
+++ b/test/level_test.cc
@ -66,36 +66,6 @@ class LevelTest
  int level_;
 };
 TEST_P(LevelTest, TestTargetLevel11Large) {
  ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
                                       60);
  target_level_ = 11;
  cfg_.rc_target_bitrate = 150;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(target_level_, level_);
 }
 TEST_P(LevelTest, TestTargetLevel20Large) {
  ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 60);
  target_level_ = 20;
  cfg_.rc_target_bitrate = 1200;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(target_level_, level_);
 }
 TEST_P(LevelTest, TestTargetLevel31Large) {
  ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, 30,
                                       1, 0, 60);
  target_level_ = 31;
  cfg_.rc_target_bitrate = 8000;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(target_level_, level_);
 }
 // Test for keeping level stats only
 TEST_P(LevelTest, TestTargetLevel0) {
  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
@ -103,11 +73,11 @@ TEST_P(LevelTest, TestTargetLevel0) {
  target_level_ = 0;
  min_gf_internal_ = 4;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(11, level_);
+  ASSERT_EQ(11, level_);
  cfg_.rc_target_bitrate = 1600;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(20, level_);
+  ASSERT_EQ(20, level_);
 }
 // Test for level control being turned off
@ -124,13 +94,12 @@ TEST_P(LevelTest, TestTargetLevelApi) {
  vpx_codec_ctx_t enc;
  vpx_codec_enc_cfg_t cfg;
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(codec, &cfg, 0));
  cfg.rc_target_bitrate = 100;
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, codec, &cfg, 0));
  for (int level = 0; level <= 256; ++level) {
    if (level == 10 || level == 11 || level == 20 || level == 21 ||
        level == 30 || level == 31 || level == 40 || level == 41 ||
        level == 50 || level == 51 || level == 52 || level == 60 ||
-        level == 61 || level == 62 || level == 0 || level == 1 || level == 255)
+        level == 61 || level == 62 || level == 0 || level == 255)
      EXPECT_EQ(VPX_CODEC_OK,
                vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level));
    else
--- a/test/lpf_test.cc
+++ b/test/lpf_test.cc
@ -56,8 +56,8 @@ typedef void (*dual_loop_op_t)(Pixel *s, int p, const uint8_t *blimit0,
                               const uint8_t *thresh1);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-typedef ::testing::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
+typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
-typedef ::testing::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
+typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
 void InitInput(Pixel *s, Pixel *ref_s, ACMRandom *rnd, const uint8_t limit,
               const int mask, const int32_t p, const int i) {
@ -114,18 +114,6 @@ void InitInput(Pixel *s, Pixel *ref_s, ACMRandom *rnd, const uint8_t limit,
  }
 }
 uint8_t GetOuterThresh(ACMRandom *rnd) {
  return static_cast<uint8_t>(rnd->RandRange(3 * MAX_LOOP_FILTER + 5));
 }
 uint8_t GetInnerThresh(ACMRandom *rnd) {
  return static_cast<uint8_t>(rnd->RandRange(MAX_LOOP_FILTER + 1));
 }
 uint8_t GetHevThresh(ACMRandom *rnd) {
  return static_cast<uint8_t>(rnd->RandRange(MAX_LOOP_FILTER + 1) >> 4);
 }
 class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
 public:
  virtual ~Loop8Test6Param() {}
@ -174,15 +162,15 @@ TEST_P(Loop8Test6Param, OperationCheck) {
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = GetOuterThresh(&rnd);
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t,
                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetInnerThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t,
                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetHevThresh(&rnd);
+    tmp = rnd.Rand8();
    DECLARE_ALIGNED(16, const uint8_t,
                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@ -233,15 +221,15 @@ TEST_P(Loop8Test6Param, ValueCheck) {
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = GetOuterThresh(&rnd);
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t,
                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetInnerThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t,
                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetHevThresh(&rnd);
+    tmp = rnd.Rand8();
    DECLARE_ALIGNED(16, const uint8_t,
                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@ -283,27 +271,27 @@ TEST_P(Loop8Test9Param, OperationCheck) {
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = GetOuterThresh(&rnd);
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t,
                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetInnerThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t,
                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetHevThresh(&rnd);
+    tmp = rnd.Rand8();
    DECLARE_ALIGNED(16, const uint8_t,
                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetOuterThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t,
                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetInnerThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t,
                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetHevThresh(&rnd);
+    tmp = rnd.Rand8();
    DECLARE_ALIGNED(16, const uint8_t,
                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@ -346,27 +334,27 @@ TEST_P(Loop8Test9Param, ValueCheck) {
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = GetOuterThresh(&rnd);
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t,
                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetInnerThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t,
                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetHevThresh(&rnd);
+    tmp = rnd.Rand8();
    DECLARE_ALIGNED(16, const uint8_t,
                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetOuterThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t,
                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetInnerThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t,
                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetHevThresh(&rnd);
+    tmp = rnd.Rand8();
    DECLARE_ALIGNED(16, const uint8_t,
                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@ -402,7 +390,7 @@ TEST_P(Loop8Test9Param, ValueCheck) {
      << "First failed at test case " << first_failure;
 }
-using ::testing::make_tuple;
+using std::tr1::make_tuple;
 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
--- a/test/minmax_test.cc
+++ b/test/minmax_test.cc
@ -107,10 +107,10 @@ TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
      int min_ref, max_ref, min, max;
      reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
      ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
-      EXPECT_EQ(max_ref, max)
+      EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride
-          << "when a_stride = " << a_stride << " and b_stride = " << b_stride;
+                              << " and b_stride = " << b_stride;
-      EXPECT_EQ(min_ref, min)
+      EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride
-          << "when a_stride = " << a_stride << " and b_stride = " << b_stride;
+                              << " and b_stride = " << b_stride;
    }
  }
 }
@ -127,9 +127,4 @@ INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
                        ::testing::Values(&vpx_minmax_8x8_neon));
 #endif
 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(MSA, MinMaxTest,
                        ::testing::Values(&vpx_minmax_8x8_msa));
 #endif
 }  // namespace
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@ -7,40 +7,21 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include <limits.h>
 #include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/buffer.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
 using libvpx_test::ACMRandom;
 using libvpx_test::Buffer;
 typedef void (*VpxPostProcDownAndAcrossMbRowFunc)(
    unsigned char *src_ptr, unsigned char *dst_ptr, int src_pixels_per_line,
    int dst_pixels_per_line, int cols, unsigned char *flimit, int size);
 typedef void (*VpxMbPostProcAcrossIpFunc)(unsigned char *src, int pitch,
                                          int rows, int cols, int flimit);
 typedef void (*VpxMbPostProcDownFunc)(unsigned char *dst, int pitch, int rows,
                                      int cols, int flimit);
 namespace {
 // Compute the filter level used in post proc from the loop filter strength
 int q2mbl(int x) {
  if (x < 20) x = 20;
  x = 50 + (x - 50) * 10 / 8;
  return x * x / 3;
 }
 class VpxPostProcDownAndAcrossMbRowTest
    : public ::testing::TestWithParam<VpxPostProcDownAndAcrossMbRowFunc> {
 public:
@ -56,16 +37,25 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckFilterOutput) {
  const int block_height = 16;
  // 5-tap filter needs 2 padding rows above and below the block in the input.
-  Buffer<uint8_t> src_image = Buffer<uint8_t>(block_width, block_height, 2);
+  const int input_width = block_width;
-  ASSERT_TRUE(src_image.Init());
+  const int input_height = block_height + 4;
  const int input_stride = input_width;
  const int input_size = input_width * input_height;
  // Filter extends output block by 8 samples at left and right edges.
-  // Though the left padding is only 8 bytes, the assembly code tries to
+  const int output_width = block_width + 16;
-  // read 16 bytes before the pointer.
+  const int output_height = block_height;
-  Buffer<uint8_t> dst_image =
+  const int output_stride = output_width;
-      Buffer<uint8_t>(block_width, block_height, 8, 16, 8, 8);
+  const int output_size = output_width * output_height;
  ASSERT_TRUE(dst_image.Init());
  uint8_t *const src_image =
      reinterpret_cast<uint8_t *>(vpx_calloc(input_size, 1));
  uint8_t *const dst_image =
      reinterpret_cast<uint8_t *>(vpx_calloc(output_size, 1));
  // Pointers to top-left pixel of block in the input and output images.
  uint8_t *const src_image_ptr = src_image + (input_stride << 1);
  uint8_t *const dst_image_ptr = dst_image + 8;
  uint8_t *const flimits =
      reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
  (void)memset(flimits, 255, block_width);
@ -73,412 +63,53 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckFilterOutput) {
  // Initialize pixels in the input:
  //   block pixels to value 1,
  //   border pixels to value 10.
-  src_image.SetPadding(10);
+  (void)memset(src_image, 10, input_size);
-  src_image.Set(1);
+  uint8_t *pixel_ptr = src_image_ptr;
  for (int i = 0; i < block_height; ++i) {
    for (int j = 0; j < block_width; ++j) {
      pixel_ptr[j] = 1;
    }
    pixel_ptr += input_stride;
  }
  // Initialize pixels in the output to 99.
-  dst_image.Set(99);
+  (void)memset(dst_image, 99, output_size);
-  ASM_REGISTER_STATE_CHECK(GetParam()(
+  ASM_REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr,
-      src_image.TopLeftPixel(), dst_image.TopLeftPixel(), src_image.stride(),
+                                      input_stride, output_stride, block_width,
-      dst_image.stride(), block_width, flimits, 16));
+                                      flimits, 16));
  static const uint8_t kExpectedOutput[block_height] = {
    4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4
  };
-  uint8_t *pixel_ptr = dst_image.TopLeftPixel();
+  pixel_ptr = dst_image_ptr;
  for (int i = 0; i < block_height; ++i) {
    for (int j = 0; j < block_width; ++j) {
-      ASSERT_EQ(kExpectedOutput[i], pixel_ptr[j])
+      ASSERT_EQ(kExpectedOutput[i], pixel_ptr[j]);
          << "at (" << i << ", " << j << ")";
    }
-    pixel_ptr += dst_image.stride();
+    pixel_ptr += output_stride;
  }
  vpx_free(src_image);
  vpx_free(dst_image);
  vpx_free(flimits);
 };
 TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) {
  // Size of the underlying data block that will be filtered.
  // Y blocks are always a multiple of 16 wide and exactly 16 high. U and V
  // blocks are always a multiple of 8 wide and exactly 8 high.
  const int block_width = 136;
  const int block_height = 16;
  // 5-tap filter needs 2 padding rows above and below the block in the input.
  // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16.
  Buffer<uint8_t> src_image =
      Buffer<uint8_t>(block_width, block_height, 2, 2, 10, 2);
  ASSERT_TRUE(src_image.Init());
  // Filter extends output block by 8 samples at left and right edges.
  // Though the left padding is only 8 bytes, there is 'above' padding as well
  // so when the assembly code tries to read 16 bytes before the pointer it is
  // not a problem.
  // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16.
  Buffer<uint8_t> dst_image =
      Buffer<uint8_t>(block_width, block_height, 8, 8, 16, 8);
  ASSERT_TRUE(dst_image.Init());
  Buffer<uint8_t> dst_image_ref = Buffer<uint8_t>(block_width, block_height, 8);
  ASSERT_TRUE(dst_image_ref.Init());
  // Filter values are set in blocks of 16 for Y and 8 for U/V. Each macroblock
  // can have a different filter. SSE2 assembly reads flimits in blocks of 16 so
  // it must be padded out.
  const int flimits_width = block_width % 16 ? block_width + 8 : block_width;
  uint8_t *const flimits =
      reinterpret_cast<uint8_t *>(vpx_memalign(16, flimits_width));
  ACMRandom rnd;
  rnd.Reset(ACMRandom::DeterministicSeed());
  // Initialize pixels in the input:
  //   block pixels to random values.
  //   border pixels to value 10.
  src_image.SetPadding(10);
  src_image.Set(&rnd, &ACMRandom::Rand8);
  for (int blocks = 0; blocks < block_width; blocks += 8) {
    (void)memset(flimits, 0, sizeof(*flimits) * flimits_width);
    for (int f = 0; f < 255; f++) {
      (void)memset(flimits + blocks, f, sizeof(*flimits) * 8);
      dst_image.Set(0);
      dst_image_ref.Set(0);
      vpx_post_proc_down_and_across_mb_row_c(
          src_image.TopLeftPixel(), dst_image_ref.TopLeftPixel(),
          src_image.stride(), dst_image_ref.stride(), block_width, flimits,
          block_height);
      ASM_REGISTER_STATE_CHECK(
          GetParam()(src_image.TopLeftPixel(), dst_image.TopLeftPixel(),
                     src_image.stride(), dst_image.stride(), block_width,
                     flimits, block_height));
      ASSERT_TRUE(dst_image.CheckValues(dst_image_ref));
    }
  }
  vpx_free(flimits);
 }
 class VpxMbPostProcAcrossIpTest
    : public ::testing::TestWithParam<VpxMbPostProcAcrossIpFunc> {
 public:
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
 protected:
  void SetCols(unsigned char *s, int rows, int cols, int src_width) {
    for (int r = 0; r < rows; r++) {
      for (int c = 0; c < cols; c++) {
        s[c] = c;
      }
      s += src_width;
    }
  }
  void RunComparison(const unsigned char *expected_output, unsigned char *src_c,
                     int rows, int cols, int src_pitch) {
    for (int r = 0; r < rows; r++) {
      for (int c = 0; c < cols; c++) {
        ASSERT_EQ(expected_output[c], src_c[c])
            << "at (" << r << ", " << c << ")";
      }
      src_c += src_pitch;
    }
  }
  void RunFilterLevel(unsigned char *s, int rows, int cols, int src_width,
                      int filter_level, const unsigned char *expected_output) {
    ASM_REGISTER_STATE_CHECK(
        GetParam()(s, src_width, rows, cols, filter_level));
    RunComparison(expected_output, s, rows, cols, src_width);
  }
 };
 TEST_P(VpxMbPostProcAcrossIpTest, CheckLowFilterOutput) {
  const int rows = 16;
  const int cols = 16;
  Buffer<uint8_t> src = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
  ASSERT_TRUE(src.Init());
  src.SetPadding(10);
  SetCols(src.TopLeftPixel(), rows, cols, src.stride());
  Buffer<uint8_t> expected_output = Buffer<uint8_t>(cols, rows, 0);
  ASSERT_TRUE(expected_output.Init());
  SetCols(expected_output.TopLeftPixel(), rows, cols, expected_output.stride());
  RunFilterLevel(src.TopLeftPixel(), rows, cols, src.stride(), q2mbl(0),
                 expected_output.TopLeftPixel());
 }
 TEST_P(VpxMbPostProcAcrossIpTest, CheckMediumFilterOutput) {
  const int rows = 16;
  const int cols = 16;
  Buffer<uint8_t> src = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
  ASSERT_TRUE(src.Init());
  src.SetPadding(10);
  SetCols(src.TopLeftPixel(), rows, cols, src.stride());
  static const unsigned char kExpectedOutput[cols] = {
    2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13
  };
  RunFilterLevel(src.TopLeftPixel(), rows, cols, src.stride(), q2mbl(70),
                 kExpectedOutput);
 }
 TEST_P(VpxMbPostProcAcrossIpTest, CheckHighFilterOutput) {
  const int rows = 16;
  const int cols = 16;
  Buffer<uint8_t> src = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
  ASSERT_TRUE(src.Init());
  src.SetPadding(10);
  SetCols(src.TopLeftPixel(), rows, cols, src.stride());
  static const unsigned char kExpectedOutput[cols] = {
    2, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13
  };
  RunFilterLevel(src.TopLeftPixel(), rows, cols, src.stride(), INT_MAX,
                 kExpectedOutput);
  SetCols(src.TopLeftPixel(), rows, cols, src.stride());
  RunFilterLevel(src.TopLeftPixel(), rows, cols, src.stride(), q2mbl(100),
                 kExpectedOutput);
 }
 TEST_P(VpxMbPostProcAcrossIpTest, CheckCvsAssembly) {
  const int rows = 16;
  const int cols = 16;
  Buffer<uint8_t> c_mem = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
  ASSERT_TRUE(c_mem.Init());
  Buffer<uint8_t> asm_mem = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
  ASSERT_TRUE(asm_mem.Init());
  // When level >= 100, the filter behaves the same as the level = INT_MAX
  // When level < 20, it behaves the same as the level = 0
  for (int level = 0; level < 100; level++) {
    c_mem.SetPadding(10);
    asm_mem.SetPadding(10);
    SetCols(c_mem.TopLeftPixel(), rows, cols, c_mem.stride());
    SetCols(asm_mem.TopLeftPixel(), rows, cols, asm_mem.stride());
    vpx_mbpost_proc_across_ip_c(c_mem.TopLeftPixel(), c_mem.stride(), rows,
                                cols, q2mbl(level));
    ASM_REGISTER_STATE_CHECK(GetParam()(
        asm_mem.TopLeftPixel(), asm_mem.stride(), rows, cols, q2mbl(level)));
    ASSERT_TRUE(asm_mem.CheckValues(c_mem));
  }
 }
 class VpxMbPostProcDownTest
    : public ::testing::TestWithParam<VpxMbPostProcDownFunc> {
 public:
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
 protected:
  void SetRows(unsigned char *src_c, int rows, int cols, int src_width) {
    for (int r = 0; r < rows; r++) {
      memset(src_c, r, cols);
      src_c += src_width;
    }
  }
  void RunComparison(const unsigned char *expected_output, unsigned char *src_c,
                     int rows, int cols, int src_pitch) {
    for (int r = 0; r < rows; r++) {
      for (int c = 0; c < cols; c++) {
        ASSERT_EQ(expected_output[r * rows + c], src_c[c])
            << "at (" << r << ", " << c << ")";
      }
      src_c += src_pitch;
    }
  }
  void RunFilterLevel(unsigned char *s, int rows, int cols, int src_width,
                      int filter_level, const unsigned char *expected_output) {
    ASM_REGISTER_STATE_CHECK(
        GetParam()(s, src_width, rows, cols, filter_level));
    RunComparison(expected_output, s, rows, cols, src_width);
  }
 };
 TEST_P(VpxMbPostProcDownTest, CheckHighFilterOutput) {
  const int rows = 16;
  const int cols = 16;
  Buffer<uint8_t> src_c = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
  ASSERT_TRUE(src_c.Init());
  src_c.SetPadding(10);
  SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride());
  static const unsigned char kExpectedOutput[rows * cols] = {
    2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  2,
    2,  3,  2,  2,  2,  2,  2,  2,  2,  3,  2,  2,  2,  3,  3,  3,  3,  3,  3,
    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  3,  4,  4,  3,  3,  3,
    4,  4,  3,  4,  4,  3,  3,  4,  5,  4,  4,  4,  4,  4,  4,  4,  5,  4,  4,
    4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
    5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,
    7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
    8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  8,  9,  9,  8,  8,  8,  9,
    9,  8,  9,  9,  8,  8,  8,  9,  9,  10, 10, 9,  9,  9,  10, 10, 9,  10, 10,
    9,  9,  9,  10, 10, 10, 11, 10, 10, 10, 11, 10, 11, 10, 11, 10, 10, 10, 11,
    10, 11, 11, 11, 11, 11, 11, 11, 12, 11, 11, 11, 11, 11, 11, 11, 12, 11, 12,
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 12,
    13, 12, 13, 12, 12, 12, 13, 12, 13, 12, 13, 12, 13, 13, 13, 14, 13, 13, 13,
    13, 13, 13, 13, 14, 13, 13, 13, 13
  };
  RunFilterLevel(src_c.TopLeftPixel(), rows, cols, src_c.stride(), INT_MAX,
                 kExpectedOutput);
  src_c.SetPadding(10);
  SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride());
  RunFilterLevel(src_c.TopLeftPixel(), rows, cols, src_c.stride(), q2mbl(100),
                 kExpectedOutput);
 }
 TEST_P(VpxMbPostProcDownTest, CheckMediumFilterOutput) {
  const int rows = 16;
  const int cols = 16;
  Buffer<uint8_t> src_c = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
  ASSERT_TRUE(src_c.Init());
  src_c.SetPadding(10);
  SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride());
  static const unsigned char kExpectedOutput[rows * cols] = {
    2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  2,
    2,  3,  2,  2,  2,  2,  2,  2,  2,  3,  2,  2,  2,  2,  2,  2,  2,  2,  2,
    2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,
    3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
    4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
    5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,
    7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
    8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,
    9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
    10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
    11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,
    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 13, 12,
    13, 12, 13, 12, 12, 12, 13, 12, 13, 12, 13, 12, 13, 13, 13, 14, 13, 13, 13,
    13, 13, 13, 13, 14, 13, 13, 13, 13
  };
  RunFilterLevel(src_c.TopLeftPixel(), rows, cols, src_c.stride(), q2mbl(70),
                 kExpectedOutput);
 }
 TEST_P(VpxMbPostProcDownTest, CheckLowFilterOutput) {
  const int rows = 16;
  const int cols = 16;
  Buffer<uint8_t> src_c = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
  ASSERT_TRUE(src_c.Init());
  src_c.SetPadding(10);
  SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride());
  unsigned char *expected_output = new unsigned char[rows * cols];
  ASSERT_TRUE(expected_output != NULL);
  SetRows(expected_output, rows, cols, cols);
  RunFilterLevel(src_c.TopLeftPixel(), rows, cols, src_c.stride(), q2mbl(0),
                 expected_output);
  delete[] expected_output;
 }
 TEST_P(VpxMbPostProcDownTest, CheckCvsAssembly) {
  const int rows = 16;
  const int cols = 16;
  ACMRandom rnd;
  rnd.Reset(ACMRandom::DeterministicSeed());
  Buffer<uint8_t> src_c = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
  ASSERT_TRUE(src_c.Init());
  Buffer<uint8_t> src_asm = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
  ASSERT_TRUE(src_asm.Init());
  for (int level = 0; level < 100; level++) {
    src_c.SetPadding(10);
    src_asm.SetPadding(10);
    src_c.Set(&rnd, &ACMRandom::Rand8);
    src_asm.CopyFrom(src_c);
    vpx_mbpost_proc_down_c(src_c.TopLeftPixel(), src_c.stride(), rows, cols,
                           q2mbl(level));
    ASM_REGISTER_STATE_CHECK(GetParam()(
        src_asm.TopLeftPixel(), src_asm.stride(), rows, cols, q2mbl(level)));
    ASSERT_TRUE(src_asm.CheckValues(src_c));
    src_c.SetPadding(10);
    src_asm.SetPadding(10);
    src_c.Set(&rnd, &ACMRandom::Rand8Extremes);
    src_asm.CopyFrom(src_c);
    vpx_mbpost_proc_down_c(src_c.TopLeftPixel(), src_c.stride(), rows, cols,
                           q2mbl(level));
    ASM_REGISTER_STATE_CHECK(GetParam()(
        src_asm.TopLeftPixel(), src_asm.stride(), rows, cols, q2mbl(level)));
    ASSERT_TRUE(src_asm.CheckValues(src_c));
  }
 }
 INSTANTIATE_TEST_CASE_P(
    C, VpxPostProcDownAndAcrossMbRowTest,
    ::testing::Values(vpx_post_proc_down_and_across_mb_row_c));
 INSTANTIATE_TEST_CASE_P(C, VpxMbPostProcAcrossIpTest,
                        ::testing::Values(vpx_mbpost_proc_across_ip_c));
 INSTANTIATE_TEST_CASE_P(C, VpxMbPostProcDownTest,
                        ::testing::Values(vpx_mbpost_proc_down_c));
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
    SSE2, VpxPostProcDownAndAcrossMbRowTest,
    ::testing::Values(vpx_post_proc_down_and_across_mb_row_sse2));
-
+#endif
 INSTANTIATE_TEST_CASE_P(SSE2, VpxMbPostProcAcrossIpTest,
                        ::testing::Values(vpx_mbpost_proc_across_ip_sse2));
 INSTANTIATE_TEST_CASE_P(SSE2, VpxMbPostProcDownTest,
                        ::testing::Values(vpx_mbpost_proc_down_sse2));
 #endif  // HAVE_SSE2
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(
    NEON, VpxPostProcDownAndAcrossMbRowTest,
    ::testing::Values(vpx_post_proc_down_and_across_mb_row_neon));
 INSTANTIATE_TEST_CASE_P(NEON, VpxMbPostProcAcrossIpTest,
                        ::testing::Values(vpx_mbpost_proc_across_ip_neon));
 INSTANTIATE_TEST_CASE_P(NEON, VpxMbPostProcDownTest,
                        ::testing::Values(vpx_mbpost_proc_down_neon));
 #endif  // HAVE_NEON
 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(
    MSA, VpxPostProcDownAndAcrossMbRowTest,
    ::testing::Values(vpx_post_proc_down_and_across_mb_row_msa));
-
+#endif
 INSTANTIATE_TEST_CASE_P(MSA, VpxMbPostProcAcrossIpTest,
                        ::testing::Values(vpx_mbpost_proc_across_ip_msa));
 INSTANTIATE_TEST_CASE_P(MSA, VpxMbPostProcDownTest,
                        ::testing::Values(vpx_mbpost_proc_down_msa));
 #endif  // HAVE_MSA
 }  // namespace
--- a/test/predict_test.cc
+++ b/test/predict_test.cc
@ -24,14 +24,14 @@
 namespace {
 using ::testing::make_tuple;
 using libvpx_test::ACMRandom;
 using std::tr1::make_tuple;
 typedef void (*PredictFunc)(uint8_t *src_ptr, int src_pixels_per_line,
                            int xoffset, int yoffset, uint8_t *dst_ptr,
                            int dst_pitch);
-typedef ::testing::tuple<int, int, PredictFunc> PredictParam;
+typedef std::tr1::tuple<int, int, PredictFunc> PredictParam;
 class PredictTestBase : public ::testing::TestWithParam<PredictParam> {
 public:
@ -324,15 +324,6 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(4, 4, &vp8_sixtap_predict4x4_msa)));
 #endif
 #if HAVE_MMI
 INSTANTIATE_TEST_CASE_P(
    MMI, SixtapPredictTest,
    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_mmi),
                      make_tuple(8, 8, &vp8_sixtap_predict8x8_mmi),
                      make_tuple(8, 4, &vp8_sixtap_predict8x4_mmi),
                      make_tuple(4, 4, &vp8_sixtap_predict4x4_mmi)));
 #endif
 class BilinearPredictTest : public PredictTestBase {};
 TEST_P(BilinearPredictTest, TestWithRandomData) {
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@ -33,10 +33,10 @@ const int kNumBlockEntries = 16;
 typedef void (*VP8Quantize)(BLOCK *b, BLOCKD *d);
-typedef ::testing::tuple<VP8Quantize, VP8Quantize> VP8QuantizeParam;
+typedef std::tr1::tuple<VP8Quantize, VP8Quantize> VP8QuantizeParam;
 using ::testing::make_tuple;
 using libvpx_test::ACMRandom;
 using std::tr1::make_tuple;
 // Create and populate a VP8_COMP instance which has a complete set of
 // quantization inputs as well as a second MACROBLOCKD for output.
@ -200,12 +200,4 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp8_fast_quantize_b_msa, &vp8_fast_quantize_b_c),
        make_tuple(&vp8_regular_quantize_b_msa, &vp8_regular_quantize_b_c)));
 #endif  // HAVE_MSA
 #if HAVE_MMI
 INSTANTIATE_TEST_CASE_P(
    MMI, QuantizeTest,
    ::testing::Values(
        make_tuple(&vp8_fast_quantize_b_mmi, &vp8_fast_quantize_b_c),
        make_tuple(&vp8_regular_quantize_b_mmi, &vp8_regular_quantize_b_c)));
 #endif  // HAVE_MMI
 }  // namespace
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@ -28,13 +28,11 @@
 //   See platform implementations of RegisterStateCheckXXX for details.
 //
-#if defined(_WIN64) && ARCH_X86_64
+#if defined(_WIN64)
 #undef NOMINMAX
 #define NOMINMAX
 #ifndef WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
 #endif
 #include <windows.h>
 #include <winnt.h>
@ -113,8 +111,8 @@ class RegisterStateCheck {
    int64_t post_store[8];
    vpx_push_neon(post_store);
    for (int i = 0; i < 8; ++i) {
-      EXPECT_EQ(pre_store_[i], post_store[i])
+      EXPECT_EQ(pre_store_[i], post_store[i]) << "d" << i + 8
-          << "d" << i + 8 << " has been modified";
+                                              << " has been modified";
    }
  }
@ -138,7 +136,7 @@ class RegisterStateCheck {};
 }  // namespace libvpx_test
-#endif  // _WIN64 && ARCH_X86_64
+#endif  // _WIN64
 #if ARCH_X86 || ARCH_X86_64
 #if defined(__GNUC__)
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@ -277,29 +277,12 @@ class ResizeTest
    SetMode(GET_PARAM(1));
  }
  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
    ASSERT_NE(static_cast<int>(pkt->data.frame.width[0]), 0);
    ASSERT_NE(static_cast<int>(pkt->data.frame.height[0]), 0);
    encode_frame_width_.push_back(pkt->data.frame.width[0]);
    encode_frame_height_.push_back(pkt->data.frame.height[0]);
  }
  unsigned int GetFrameWidth(size_t idx) const {
    return encode_frame_width_[idx];
  }
  unsigned int GetFrameHeight(size_t idx) const {
    return encode_frame_height_[idx];
  }
  virtual void DecompressedFrameHook(const vpx_image_t &img,
                                     vpx_codec_pts_t pts) {
    frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
  }
  std::vector<FrameInfo> frame_info_list_;
  std::vector<unsigned int> encode_frame_width_;
  std::vector<unsigned int> encode_frame_height_;
 };
 TEST_P(ResizeTest, TestExternalResizeWorks) {
@ -313,15 +296,12 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
    const unsigned int frame = static_cast<unsigned>(info->pts);
    unsigned int expected_w;
    unsigned int expected_h;
    const size_t idx = info - frame_info_list_.begin();
    ASSERT_EQ(info->w, GetFrameWidth(idx));
    ASSERT_EQ(info->h, GetFrameHeight(idx));
    ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
                        &expected_h, 0);
-    EXPECT_EQ(expected_w, info->w)
+    EXPECT_EQ(expected_w, info->w) << "Frame " << frame
-        << "Frame " << frame << " had unexpected width";
+                                   << " had unexpected width";
-    EXPECT_EQ(expected_h, info->h)
+    EXPECT_EQ(expected_h, info->h) << "Frame " << frame
-        << "Frame " << frame << " had unexpected height";
+                                   << " had unexpected height";
  }
 }
@ -484,23 +464,8 @@ class ResizeRealtimeTest
    ++mismatch_nframes_;
  }
  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
    ASSERT_NE(static_cast<int>(pkt->data.frame.width[0]), 0);
    ASSERT_NE(static_cast<int>(pkt->data.frame.height[0]), 0);
    encode_frame_width_.push_back(pkt->data.frame.width[0]);
    encode_frame_height_.push_back(pkt->data.frame.height[0]);
  }
  unsigned int GetMismatchFrames() { return mismatch_nframes_; }
  unsigned int GetFrameWidth(size_t idx) const {
    return encode_frame_width_[idx];
  }
  unsigned int GetFrameHeight(size_t idx) const {
    return encode_frame_height_[idx];
  }
  void DefaultConfig() {
    cfg_.rc_buf_initial_sz = 500;
    cfg_.rc_buf_optimal_sz = 600;
@ -528,8 +493,6 @@ class ResizeRealtimeTest
  bool change_bitrate_;
  double mismatch_psnr_;
  int mismatch_nframes_;
  std::vector<unsigned int> encode_frame_width_;
  std::vector<unsigned int> encode_frame_height_;
 };
 TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
@ -550,10 +513,10 @@ TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
    unsigned int expected_h;
    ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
                        &expected_h, 1);
-    EXPECT_EQ(expected_w, info->w)
+    EXPECT_EQ(expected_w, info->w) << "Frame " << frame
-        << "Frame " << frame << " had unexpected width";
+                                   << " had unexpected width";
-    EXPECT_EQ(expected_h, info->h)
+    EXPECT_EQ(expected_h, info->h) << "Frame " << frame
-        << "Frame " << frame << " had unexpected height";
+                                   << " had unexpected height";
    EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
  }
 }
@ -619,9 +582,6 @@ TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
  int resize_count = 0;
  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
       info != frame_info_list_.end(); ++info) {
    const size_t idx = info - frame_info_list_.begin();
    ASSERT_EQ(info->w, GetFrameWidth(idx));
    ASSERT_EQ(info->h, GetFrameHeight(idx));
    if (info->w != last_w || info->h != last_h) {
      resize_count++;
      if (resize_count == 1) {
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@ -23,7 +23,6 @@
 #include "vpx/vpx_codec.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
 #include "vpx_ports/vpx_timer.h"
 template <typename Function>
 struct TestParams {
@ -85,7 +84,7 @@ class SADTestBase : public ::testing::TestWithParam<ParamType> {
 #endif  // CONFIG_VP9_HIGHBITDEPTH
    }
    mask_ = (1 << bit_depth_) - 1;
-    source_stride_ = (params_.width + 63) & ~63;
+    source_stride_ = (params_.width + 31) & ~31;
    reference_stride_ = params_.width * 2;
    rnd_.Reset(ACMRandom::DeterministicSeed());
  }
@ -109,7 +108,7 @@ class SADTestBase : public ::testing::TestWithParam<ParamType> {
 protected:
  // Handle blocks up to 4 blocks 64x64 with stride up to 128
-  static const int kDataAlignment = 32;
+  static const int kDataAlignment = 16;
  static const int kDataBlockSize = 64 * 128;
  static const int kDataBufferSize = 4 * kDataBlockSize;
@ -464,38 +463,6 @@ TEST_P(SADx4Test, SrcAlignedByWidth) {
  source_data_ = tmp_source_data;
 }
 TEST_P(SADx4Test, DISABLED_Speed) {
  int tmp_stride = reference_stride_;
  reference_stride_ -= 1;
  FillRandom(source_data_, source_stride_);
  FillRandom(GetReference(0), reference_stride_);
  FillRandom(GetReference(1), reference_stride_);
  FillRandom(GetReference(2), reference_stride_);
  FillRandom(GetReference(3), reference_stride_);
  const int kCountSpeedTestBlock = 500000000 / (params_.width * params_.height);
  uint32_t reference_sad[4], exp_sad[4];
  vpx_usec_timer timer;
  memset(reference_sad, 0, sizeof(reference_sad));
  SADs(exp_sad);
  vpx_usec_timer_start(&timer);
  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
    for (int block = 0; block < 4; ++block) {
      reference_sad[block] = ReferenceSAD(block);
    }
  }
  vpx_usec_timer_mark(&timer);
  for (int block = 0; block < 4; ++block) {
    EXPECT_EQ(reference_sad[block], exp_sad[block]) << "block " << block;
  }
  const int elapsed_time =
      static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
  printf("sad%dx%dx4 (%2dbit) time: %5d ms\n", params_.width, params_.height,
         bit_depth_, elapsed_time);
  reference_stride_ = tmp_stride;
 }
 //------------------------------------------------------------------------------
 // C functions
 const SadMxNParam c_tests[] = {
@ -677,50 +644,19 @@ INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
 #if HAVE_NEON
 const SadMxNParam neon_tests[] = {
  SadMxNParam(64, 64, &vpx_sad64x64_neon),
  SadMxNParam(64, 32, &vpx_sad64x32_neon),
  SadMxNParam(32, 32, &vpx_sad32x32_neon),
  SadMxNParam(16, 32, &vpx_sad16x32_neon),
  SadMxNParam(16, 16, &vpx_sad16x16_neon),
  SadMxNParam(16, 8, &vpx_sad16x8_neon),
  SadMxNParam(8, 16, &vpx_sad8x16_neon),
  SadMxNParam(8, 8, &vpx_sad8x8_neon),
  SadMxNParam(8, 4, &vpx_sad8x4_neon),
  SadMxNParam(4, 8, &vpx_sad4x8_neon),
  SadMxNParam(4, 4, &vpx_sad4x4_neon),
 };
 INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests));
 const SadMxNAvgParam avg_neon_tests[] = {
  SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_neon),
  SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_neon),
  SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_neon),
  SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_neon),
  SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_neon),
  SadMxNAvgParam(16, 32, &vpx_sad16x32_avg_neon),
  SadMxNAvgParam(16, 16, &vpx_sad16x16_avg_neon),
  SadMxNAvgParam(16, 8, &vpx_sad16x8_avg_neon),
  SadMxNAvgParam(8, 16, &vpx_sad8x16_avg_neon),
  SadMxNAvgParam(8, 8, &vpx_sad8x8_avg_neon),
  SadMxNAvgParam(8, 4, &vpx_sad8x4_avg_neon),
  SadMxNAvgParam(4, 8, &vpx_sad4x8_avg_neon),
  SadMxNAvgParam(4, 4, &vpx_sad4x4_avg_neon),
 };
 INSTANTIATE_TEST_CASE_P(NEON, SADavgTest, ::testing::ValuesIn(avg_neon_tests));
 const SadMxNx4Param x4d_neon_tests[] = {
  SadMxNx4Param(64, 64, &vpx_sad64x64x4d_neon),
  SadMxNx4Param(64, 32, &vpx_sad64x32x4d_neon),
  SadMxNx4Param(32, 64, &vpx_sad32x64x4d_neon),
  SadMxNx4Param(32, 32, &vpx_sad32x32x4d_neon),
  SadMxNx4Param(32, 16, &vpx_sad32x16x4d_neon),
  SadMxNx4Param(16, 32, &vpx_sad16x32x4d_neon),
  SadMxNx4Param(16, 16, &vpx_sad16x16x4d_neon),
  SadMxNx4Param(16, 8, &vpx_sad16x8x4d_neon),
  SadMxNx4Param(8, 16, &vpx_sad8x16x4d_neon),
  SadMxNx4Param(8, 8, &vpx_sad8x8x4d_neon),
  SadMxNx4Param(8, 4, &vpx_sad8x4x4d_neon),
  SadMxNx4Param(4, 8, &vpx_sad4x8x4d_neon),
  SadMxNx4Param(4, 4, &vpx_sad4x4x4d_neon),
 };
 INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
 #endif  // HAVE_NEON
@ -929,14 +865,6 @@ const SadMxNx4Param x4d_avx2_tests[] = {
 INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
 #endif  // HAVE_AVX2
 #if HAVE_AVX512
 const SadMxNx4Param x4d_avx512_tests[] = {
  SadMxNx4Param(64, 64, &vpx_sad64x64x4d_avx512),
 };
 INSTANTIATE_TEST_CASE_P(AVX512, SADx4Test,
                        ::testing::ValuesIn(x4d_avx512_tests));
 #endif  // HAVE_AVX512
 //------------------------------------------------------------------------------
 // MIPS functions
 #if HAVE_MSA
@ -992,98 +920,4 @@ const SadMxNx4Param x4d_msa_tests[] = {
 INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests));
 #endif  // HAVE_MSA
 //------------------------------------------------------------------------------
 // VSX functions
 #if HAVE_VSX
 const SadMxNParam vsx_tests[] = {
  SadMxNParam(64, 64, &vpx_sad64x64_vsx),
  SadMxNParam(64, 32, &vpx_sad64x32_vsx),
  SadMxNParam(32, 64, &vpx_sad32x64_vsx),
  SadMxNParam(32, 32, &vpx_sad32x32_vsx),
  SadMxNParam(32, 16, &vpx_sad32x16_vsx),
  SadMxNParam(16, 32, &vpx_sad16x32_vsx),
  SadMxNParam(16, 16, &vpx_sad16x16_vsx),
  SadMxNParam(16, 8, &vpx_sad16x8_vsx),
 };
 INSTANTIATE_TEST_CASE_P(VSX, SADTest, ::testing::ValuesIn(vsx_tests));
 const SadMxNAvgParam avg_vsx_tests[] = {
  SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_vsx),
  SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_vsx),
  SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_vsx),
  SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_vsx),
  SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_vsx),
  SadMxNAvgParam(16, 32, &vpx_sad16x32_avg_vsx),
  SadMxNAvgParam(16, 16, &vpx_sad16x16_avg_vsx),
  SadMxNAvgParam(16, 8, &vpx_sad16x8_avg_vsx),
 };
 INSTANTIATE_TEST_CASE_P(VSX, SADavgTest, ::testing::ValuesIn(avg_vsx_tests));
 const SadMxNx4Param x4d_vsx_tests[] = {
  SadMxNx4Param(64, 64, &vpx_sad64x64x4d_vsx),
  SadMxNx4Param(64, 32, &vpx_sad64x32x4d_vsx),
  SadMxNx4Param(32, 64, &vpx_sad32x64x4d_vsx),
  SadMxNx4Param(32, 32, &vpx_sad32x32x4d_vsx),
  SadMxNx4Param(32, 16, &vpx_sad32x16x4d_vsx),
  SadMxNx4Param(16, 32, &vpx_sad16x32x4d_vsx),
  SadMxNx4Param(16, 16, &vpx_sad16x16x4d_vsx),
  SadMxNx4Param(16, 8, &vpx_sad16x8x4d_vsx),
 };
 INSTANTIATE_TEST_CASE_P(VSX, SADx4Test, ::testing::ValuesIn(x4d_vsx_tests));
 #endif  // HAVE_VSX
 //------------------------------------------------------------------------------
 // Loongson functions
 #if HAVE_MMI
 const SadMxNParam mmi_tests[] = {
  SadMxNParam(64, 64, &vpx_sad64x64_mmi),
  SadMxNParam(64, 32, &vpx_sad64x32_mmi),
  SadMxNParam(32, 64, &vpx_sad32x64_mmi),
  SadMxNParam(32, 32, &vpx_sad32x32_mmi),
  SadMxNParam(32, 16, &vpx_sad32x16_mmi),
  SadMxNParam(16, 32, &vpx_sad16x32_mmi),
  SadMxNParam(16, 16, &vpx_sad16x16_mmi),
  SadMxNParam(16, 8, &vpx_sad16x8_mmi),
  SadMxNParam(8, 16, &vpx_sad8x16_mmi),
  SadMxNParam(8, 8, &vpx_sad8x8_mmi),
  SadMxNParam(8, 4, &vpx_sad8x4_mmi),
  SadMxNParam(4, 8, &vpx_sad4x8_mmi),
  SadMxNParam(4, 4, &vpx_sad4x4_mmi),
 };
 INSTANTIATE_TEST_CASE_P(MMI, SADTest, ::testing::ValuesIn(mmi_tests));
 const SadMxNAvgParam avg_mmi_tests[] = {
  SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_mmi),
  SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_mmi),
  SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_mmi),
  SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_mmi),
  SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_mmi),
  SadMxNAvgParam(16, 32, &vpx_sad16x32_avg_mmi),
  SadMxNAvgParam(16, 16, &vpx_sad16x16_avg_mmi),
  SadMxNAvgParam(16, 8, &vpx_sad16x8_avg_mmi),
  SadMxNAvgParam(8, 16, &vpx_sad8x16_avg_mmi),
  SadMxNAvgParam(8, 8, &vpx_sad8x8_avg_mmi),
  SadMxNAvgParam(8, 4, &vpx_sad8x4_avg_mmi),
  SadMxNAvgParam(4, 8, &vpx_sad4x8_avg_mmi),
  SadMxNAvgParam(4, 4, &vpx_sad4x4_avg_mmi),
 };
 INSTANTIATE_TEST_CASE_P(MMI, SADavgTest, ::testing::ValuesIn(avg_mmi_tests));
 const SadMxNx4Param x4d_mmi_tests[] = {
  SadMxNx4Param(64, 64, &vpx_sad64x64x4d_mmi),
  SadMxNx4Param(64, 32, &vpx_sad64x32x4d_mmi),
  SadMxNx4Param(32, 64, &vpx_sad32x64x4d_mmi),
  SadMxNx4Param(32, 32, &vpx_sad32x32x4d_mmi),
  SadMxNx4Param(32, 16, &vpx_sad32x16x4d_mmi),
  SadMxNx4Param(16, 32, &vpx_sad16x32x4d_mmi),
  SadMxNx4Param(16, 16, &vpx_sad16x16x4d_mmi),
  SadMxNx4Param(16, 8, &vpx_sad16x8x4d_mmi),
  SadMxNx4Param(8, 16, &vpx_sad8x16x4d_mmi),
  SadMxNx4Param(8, 8, &vpx_sad8x8x4d_mmi),
  SadMxNx4Param(8, 4, &vpx_sad8x4x4d_mmi),
  SadMxNx4Param(4, 8, &vpx_sad4x8x4d_mmi),
  SadMxNx4Param(4, 4, &vpx_sad4x4x4d_mmi),
 };
 INSTANTIATE_TEST_CASE_P(MMI, SADx4Test, ::testing::ValuesIn(x4d_mmi_tests));
 #endif  // HAVE_MMI
 }  // namespace
--- a/test/set_roi.cc
+++ b/test/set_roi.cc
@ -146,6 +146,14 @@ TEST(VP8RoiMapTest, ParameterCheck) {
      if (deltas_valid != roi_retval) break;
    }
    // Test that we report and error if cyclic refresh is enabled.
    cpi.cyclic_refresh_mode_enabled = 1;
    roi_retval =
        vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, cpi.common.mb_cols,
                       delta_q, delta_lf, threshold);
    EXPECT_EQ(-1, roi_retval) << "cyclic refresh check error";
    cpi.cyclic_refresh_mode_enabled = 0;
    // Test invalid number of rows or colums.
    roi_retval =
        vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows + 1,
--- a/test/stress.sh
+++ b/test/stress.sh
@ -1,169 +0,0 @@
 #!/bin/sh
 ##
 ##  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
 ##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 ##  This file performs a stress test. It runs (STRESS_ONEPASS_MAX_JOBS,
 ##  default=5) one, (STRESS_TWOPASS_MAX_JOBS, default=5) two pass &
 ##  (STRESS_RT_MAX_JOBS, default=5) encodes and (STRESS_<codec>_DECODE_MAX_JOBS,
 ##  default=30) decodes in parallel.
 . $(dirname $0)/tools_common.sh
 YUV="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.yuv"
 VP8="${LIBVPX_TEST_DATA_PATH}/tos_vp8.webm"
 VP9="${LIBVPX_TEST_DATA_PATH}/vp90-2-sintel_1920x818_tile_1x4_fpm_2279kbps.webm"
 DATA_URL="http://downloads.webmproject.org/test_data/libvpx/"
 SHA1_FILE="$(dirname $0)/test-data.sha1"
 # Set sha1sum to proper sha program (sha1sum, shasum, sha1). This code is
 # cribbed from libs.mk.
 [ -x "$(which sha1sum)" ] && sha1sum=sha1sum
 [ -x "$(which shasum)" ] && sha1sum=shasum
 [ -x "$(which sha1)" ] && sha1sum=sha1
 # Download a file from the url and check its sha1sum.
 download_and_check_file() {
  # Get the file from the file path.
  local readonly root="${1#${LIBVPX_TEST_DATA_PATH}/}"
  # Download the file using curl. Trap to insure non partial file.
  (trap "rm -f $1" INT TERM \
    && eval "curl --retry 1 -L -o $1 ${DATA_URL}${root} ${devnull}")
  # Check the sha1 sum of the file.
  if [ -n "${sha1sum}" ]; then
    set -e
    grep ${root} ${SHA1_FILE} \
      | (cd ${LIBVPX_TEST_DATA_PATH}; ${sha1sum} -c);
  fi
 }
 # Environment check: Make sure input is available.
 stress_verify_environment() {
  if [ ! -e "${SHA1_FILE}" ] ; then
    echo "Missing ${SHA1_FILE}"
    return 1
  fi
  for file in "${YUV}" "${VP8}" "${VP9}"; do
    if [ ! -e "${file}" ] ; then
      download_and_check_file "${file}"
    fi
  done
  if [ ! -e "${YUV}" ] || [ ! -e "${VP8}" ] || [ ! -e "${VP9}" ] ; then
    elog "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
    return 1
  fi
  if [ -z "$(vpx_tool_path vpxenc)" ]; then
    elog "vpxenc not found. It must exist in LIBVPX_BIN_PATH or its parent."
    return 1
  fi
  if [ -z "$(vpx_tool_path vpxdec)" ]; then
    elog "vpxdec not found. It must exist in LIBVPX_BIN_PATH or its parent."
    return 1
  fi
 }
 # This function runs tests on libvpx that run multiple encodes and decodes
 # in parallel in hopes of catching synchronization and/or threading issues.
 stress() {
  local readonly decoder="$(vpx_tool_path vpxdec)"
  local readonly encoder="$(vpx_tool_path vpxenc)"
  local readonly codec="$1"
  local readonly webm="$2"
  local readonly decode_count="$3"
  local readonly threads="$4"
  local readonly enc_args="$5"
  local pids=""
  local rt_max_jobs=${STRESS_RT_MAX_JOBS:-5}
  local onepass_max_jobs=${STRESS_ONEPASS_MAX_JOBS:-5}
  local twopass_max_jobs=${STRESS_TWOPASS_MAX_JOBS:-5}
  # Enable job control, so we can run multiple processes.
  set -m
  # Start $onepass_max_jobs encode jobs in parallel.
  for i in $(seq ${onepass_max_jobs}); do
    bitrate=$(($i * 20 + 300))
    eval "${VPX_TEST_PREFIX}" "${encoder}" "--codec=${codec} -w 1280 -h 720" \
      "${YUV}" "-t ${threads} --limit=150 --test-decode=fatal --passes=1" \
      "--target-bitrate=${bitrate} -o ${VPX_TEST_OUTPUT_DIR}/${i}.1pass.webm" \
      "${enc_args}" ${devnull} &
    pids="${pids} $!"
  done
  # Start $twopass_max_jobs encode jobs in parallel.
  for i in $(seq ${twopass_max_jobs}); do
    bitrate=$(($i * 20 + 300))
    eval "${VPX_TEST_PREFIX}" "${encoder}" "--codec=${codec} -w 1280 -h 720" \
      "${YUV}" "-t ${threads} --limit=150 --test-decode=fatal --passes=2" \
      "--target-bitrate=${bitrate} -o ${VPX_TEST_OUTPUT_DIR}/${i}.2pass.webm" \
      "${enc_args}" ${devnull} &
    pids="${pids} $!"
  done
  # Start $rt_max_jobs rt encode jobs in parallel.
  for i in $(seq ${rt_max_jobs}); do
    bitrate=$(($i * 20 + 300))
    eval "${VPX_TEST_PREFIX}" "${encoder}" "--codec=${codec} -w 1280 -h 720" \
      "${YUV}" "-t ${threads} --limit=150 --test-decode=fatal " \
      "--target-bitrate=${bitrate} --lag-in-frames=0 --error-resilient=1" \
      "--kf-min-dist=3000 --kf-max-dist=3000 --cpu-used=-6 --static-thresh=1" \
      "--end-usage=cbr --min-q=2 --max-q=56 --undershoot-pct=100" \
      "--overshoot-pct=15 --buf-sz=1000 --buf-initial-sz=500" \
      "--buf-optimal-sz=600 --max-intra-rate=900 --resize-allowed=0" \
      "--drop-frame=0 --passes=1 --rt --noise-sensitivity=4" \
      "-o ${VPX_TEST_OUTPUT_DIR}/${i}.rt.webm" ${devnull} &
    pids="${pids} $!"
  done
  # Start $decode_count decode jobs in parallel.
  for i in $(seq "${decode_count}"); do
    eval "${decoder}" "-t ${threads}" "${webm}" "--noblit" ${devnull} &
    pids="${pids} $!"
  done
  # Wait for all parallel jobs to finish.
  fail=0
  for job in "${pids}"; do
    wait $job || fail=$(($fail + 1))
  done
  return $fail
 }
 vp8_stress_test() {
  local vp8_max_jobs=${STRESS_VP8_DECODE_MAX_JOBS:-40}
  if [ "$(vp8_decode_available)" = "yes" -a \
       "$(vp8_encode_available)" = "yes" ]; then
    stress vp8 "${VP8}" "${vp8_max_jobs}" 4
  fi
 }
 vp9_stress() {
  local vp9_max_jobs=${STRESS_VP9_DECODE_MAX_JOBS:-25}
  if [ "$(vp9_decode_available)" = "yes" -a \
       "$(vp9_encode_available)" = "yes" ]; then
    stress vp9 "${VP9}" "${vp9_max_jobs}" "$@"
  fi
 }
 vp9_stress_test() {
  for threads in 4 8 100; do
    vp9_stress "$threads" "--row-mt=0"
  done
 }
 vp9_stress_test_row_mt() {
  for threads in 4 8 100; do
    vp9_stress "$threads" "--row-mt=1"
  done
 }
 run_tests stress_verify_environment \
  "vp8_stress_test vp9_stress_test vp9_stress_test_row_mt"
--- a/test/sum_squares_test.cc
+++ b/test/sum_squares_test.cc
@ -28,7 +28,7 @@ namespace {
 const int kNumIterations = 10000;
 typedef uint64_t (*SSI16Func)(const int16_t *src, int stride, int size);
-typedef ::testing::tuple<SSI16Func, SSI16Func> SumSquaresParam;
+typedef std::tr1::tuple<SSI16Func, SSI16Func> SumSquaresParam;
 class SumSquaresTest : public ::testing::TestWithParam<SumSquaresParam> {
 public:
@ -102,7 +102,7 @@ TEST_P(SumSquaresTest, ExtremeValues) {
  }
 }
-using ::testing::make_tuple;
+using std::tr1::make_tuple;
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
@ -110,11 +110,4 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(make_tuple(&vpx_sum_squares_2d_i16_c,
                                 &vpx_sum_squares_2d_i16_sse2)));
 #endif  // HAVE_SSE2
 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(
    MSA, SumSquaresTest,
    ::testing::Values(make_tuple(&vpx_sum_squares_2d_i16_c,
                                 &vpx_sum_squares_2d_i16_msa)));
 #endif  // HAVE_MSA
 }  // namespace
--- a/test/superframe_test.cc
+++ b/test/superframe_test.cc
@ -18,7 +18,7 @@ namespace {
 const int kTestMode = 0;
-typedef ::testing::tuple<libvpx_test::TestMode, int> SuperframeTestParam;
+typedef std::tr1::tuple<libvpx_test::TestMode, int> SuperframeTestParam;
 class SuperframeTest
    : public ::libvpx_test::EncoderTest,
@ -31,7 +31,7 @@ class SuperframeTest
  virtual void SetUp() {
    InitializeConfig();
    const SuperframeTestParam input = GET_PARAM(1);
-    const libvpx_test::TestMode mode = ::testing::get<kTestMode>(input);
+    const libvpx_test::TestMode mode = std::tr1::get<kTestMode>(input);
    SetMode(mode);
    sf_count_ = 0;
    sf_count_max_ = INT_MAX;
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@ -0,0 +1,789 @@
 /*
 *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include <string>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
 #include "test/i420_video_source.h"
 #include "vp9/decoder/vp9_decoder.h"
 #include "vpx/svc_context.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
 namespace {
 using libvpx_test::CodecFactory;
 using libvpx_test::Decoder;
 using libvpx_test::DxDataIterator;
 using libvpx_test::VP9CodecFactory;
 class SvcTest : public ::testing::Test {
 protected:
  static const uint32_t kWidth = 352;
  static const uint32_t kHeight = 288;
  SvcTest()
      : codec_iface_(0), test_file_name_("hantro_collage_w352h288.yuv"),
        codec_initialized_(false), decoder_(0) {
    memset(&svc_, 0, sizeof(svc_));
    memset(&codec_, 0, sizeof(codec_));
    memset(&codec_enc_, 0, sizeof(codec_enc_));
  }
  virtual ~SvcTest() {}
  virtual void SetUp() {
    svc_.log_level = SVC_LOG_DEBUG;
    svc_.log_print = 0;
    codec_iface_ = vpx_codec_vp9_cx();
    const vpx_codec_err_t res =
        vpx_codec_enc_config_default(codec_iface_, &codec_enc_, 0);
    EXPECT_EQ(VPX_CODEC_OK, res);
    codec_enc_.g_w = kWidth;
    codec_enc_.g_h = kHeight;
    codec_enc_.g_timebase.num = 1;
    codec_enc_.g_timebase.den = 60;
    codec_enc_.kf_min_dist = 100;
    codec_enc_.kf_max_dist = 100;
    vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
    VP9CodecFactory codec_factory;
    decoder_ = codec_factory.CreateDecoder(dec_cfg, 0);
    tile_columns_ = 0;
    tile_rows_ = 0;
  }
  virtual void TearDown() {
    ReleaseEncoder();
    delete (decoder_);
  }
  void InitializeEncoder() {
    const vpx_codec_err_t res =
        vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
    EXPECT_EQ(VPX_CODEC_OK, res);
    vpx_codec_control(&codec_, VP8E_SET_CPUUSED, 4);  // Make the test faster
    vpx_codec_control(&codec_, VP9E_SET_TILE_COLUMNS, tile_columns_);
    vpx_codec_control(&codec_, VP9E_SET_TILE_ROWS, tile_rows_);
    codec_initialized_ = true;
  }
  void ReleaseEncoder() {
    vpx_svc_release(&svc_);
    if (codec_initialized_) vpx_codec_destroy(&codec_);
    codec_initialized_ = false;
  }
  void GetStatsData(std::string *const stats_buf) {
    vpx_codec_iter_t iter = NULL;
    const vpx_codec_cx_pkt_t *cx_pkt;
    while ((cx_pkt = vpx_codec_get_cx_data(&codec_, &iter)) != NULL) {
      if (cx_pkt->kind == VPX_CODEC_STATS_PKT) {
        EXPECT_GT(cx_pkt->data.twopass_stats.sz, 0U);
        ASSERT_TRUE(cx_pkt->data.twopass_stats.buf != NULL);
        stats_buf->append(static_cast<char *>(cx_pkt->data.twopass_stats.buf),
                          cx_pkt->data.twopass_stats.sz);
      }
    }
  }
  void Pass1EncodeNFrames(const int n, const int layers,
                          std::string *const stats_buf) {
    vpx_codec_err_t res;
    ASSERT_GT(n, 0);
    ASSERT_GT(layers, 0);
    svc_.spatial_layers = layers;
    codec_enc_.g_pass = VPX_RC_FIRST_PASS;
    InitializeEncoder();
    libvpx_test::I420VideoSource video(
        test_file_name_, codec_enc_.g_w, codec_enc_.g_h,
        codec_enc_.g_timebase.den, codec_enc_.g_timebase.num, 0, 30);
    video.Begin();
    for (int i = 0; i < n; ++i) {
      res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                           video.duration(), VPX_DL_GOOD_QUALITY);
      ASSERT_EQ(VPX_CODEC_OK, res);
      GetStatsData(stats_buf);
      video.Next();
    }
    // Flush encoder and test EOS packet.
    res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(), video.duration(),
                         VPX_DL_GOOD_QUALITY);
    ASSERT_EQ(VPX_CODEC_OK, res);
    GetStatsData(stats_buf);
    ReleaseEncoder();
  }
  void StoreFrames(const size_t max_frame_received,
                   struct vpx_fixed_buf *const outputs,
                   size_t *const frame_received) {
    vpx_codec_iter_t iter = NULL;
    const vpx_codec_cx_pkt_t *cx_pkt;
    while ((cx_pkt = vpx_codec_get_cx_data(&codec_, &iter)) != NULL) {
      if (cx_pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
        const size_t frame_size = cx_pkt->data.frame.sz;
        EXPECT_GT(frame_size, 0U);
        ASSERT_TRUE(cx_pkt->data.frame.buf != NULL);
        ASSERT_LT(*frame_received, max_frame_received);
        if (*frame_received == 0)
          EXPECT_EQ(1, !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY));
        outputs[*frame_received].buf = malloc(frame_size + 16);
        ASSERT_TRUE(outputs[*frame_received].buf != NULL);
        memcpy(outputs[*frame_received].buf, cx_pkt->data.frame.buf,
               frame_size);
        outputs[*frame_received].sz = frame_size;
        ++(*frame_received);
      }
    }
  }
  void Pass2EncodeNFrames(std::string *const stats_buf, const int n,
                          const int layers,
                          struct vpx_fixed_buf *const outputs) {
    vpx_codec_err_t res;
    size_t frame_received = 0;
    ASSERT_TRUE(outputs != NULL);
    ASSERT_GT(n, 0);
    ASSERT_GT(layers, 0);
    svc_.spatial_layers = layers;
    codec_enc_.rc_target_bitrate = 500;
    if (codec_enc_.g_pass == VPX_RC_LAST_PASS) {
      ASSERT_TRUE(stats_buf != NULL);
      ASSERT_GT(stats_buf->size(), 0U);
      codec_enc_.rc_twopass_stats_in.buf = &(*stats_buf)[0];
      codec_enc_.rc_twopass_stats_in.sz = stats_buf->size();
    }
    InitializeEncoder();
    libvpx_test::I420VideoSource video(
        test_file_name_, codec_enc_.g_w, codec_enc_.g_h,
        codec_enc_.g_timebase.den, codec_enc_.g_timebase.num, 0, 30);
    video.Begin();
    for (int i = 0; i < n; ++i) {
      res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                           video.duration(), VPX_DL_GOOD_QUALITY);
      ASSERT_EQ(VPX_CODEC_OK, res);
      StoreFrames(n, outputs, &frame_received);
      video.Next();
    }
    // Flush encoder.
    res = vpx_svc_encode(&svc_, &codec_, NULL, 0, video.duration(),
                         VPX_DL_GOOD_QUALITY);
    EXPECT_EQ(VPX_CODEC_OK, res);
    StoreFrames(n, outputs, &frame_received);
    EXPECT_EQ(frame_received, static_cast<size_t>(n));
    ReleaseEncoder();
  }
  void DecodeNFrames(const struct vpx_fixed_buf *const inputs, const int n) {
    int decoded_frames = 0;
    int received_frames = 0;
    ASSERT_TRUE(inputs != NULL);
    ASSERT_GT(n, 0);
    for (int i = 0; i < n; ++i) {
      ASSERT_TRUE(inputs[i].buf != NULL);
      ASSERT_GT(inputs[i].sz, 0U);
      const vpx_codec_err_t res_dec = decoder_->DecodeFrame(
          static_cast<const uint8_t *>(inputs[i].buf), inputs[i].sz);
      ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
      ++decoded_frames;
      DxDataIterator dec_iter = decoder_->GetDxData();
      while (dec_iter.Next() != NULL) {
        ++received_frames;
      }
    }
    EXPECT_EQ(decoded_frames, n);
    EXPECT_EQ(received_frames, n);
  }
  void DropEnhancementLayers(struct vpx_fixed_buf *const inputs,
                             const int num_super_frames,
                             const int remained_spatial_layers) {
    ASSERT_TRUE(inputs != NULL);
    ASSERT_GT(num_super_frames, 0);
    ASSERT_GT(remained_spatial_layers, 0);
    for (int i = 0; i < num_super_frames; ++i) {
      uint32_t frame_sizes[8] = { 0 };
      int frame_count = 0;
      int frames_found = 0;
      int frame;
      ASSERT_TRUE(inputs[i].buf != NULL);
      ASSERT_GT(inputs[i].sz, 0U);
      vpx_codec_err_t res = vp9_parse_superframe_index(
          static_cast<const uint8_t *>(inputs[i].buf), inputs[i].sz,
          frame_sizes, &frame_count, NULL, NULL);
      ASSERT_EQ(VPX_CODEC_OK, res);
      if (frame_count == 0) {
        // There's no super frame but only a single frame.
        ASSERT_EQ(1, remained_spatial_layers);
      } else {
        // Found a super frame.
        uint8_t *frame_data = static_cast<uint8_t *>(inputs[i].buf);
        uint8_t *frame_start = frame_data;
        for (frame = 0; frame < frame_count; ++frame) {
          // Looking for a visible frame.
          if (frame_data[0] & 0x02) {
            ++frames_found;
            if (frames_found == remained_spatial_layers) break;
          }
          frame_data += frame_sizes[frame];
        }
        ASSERT_LT(frame, frame_count)
            << "Couldn't find a visible frame. "
            << "remained_spatial_layers: " << remained_spatial_layers
            << "    super_frame: " << i;
        if (frame == frame_count - 1) continue;
        frame_data += frame_sizes[frame];
        // We need to add one more frame for multiple frame contexts.
        uint8_t marker =
            static_cast<const uint8_t *>(inputs[i].buf)[inputs[i].sz - 1];
        const uint32_t mag = ((marker >> 3) & 0x3) + 1;
        const size_t index_sz = 2 + mag * frame_count;
        const size_t new_index_sz = 2 + mag * (frame + 1);
        marker &= 0x0f8;
        marker |= frame;
        // Copy existing frame sizes.
        memmove(frame_data + 1, frame_start + inputs[i].sz - index_sz + 1,
                new_index_sz - 2);
        // New marker.
        frame_data[0] = marker;
        frame_data += (mag * (frame + 1) + 1);
        *frame_data++ = marker;
        inputs[i].sz = frame_data - frame_start;
      }
    }
  }
  void FreeBitstreamBuffers(struct vpx_fixed_buf *const inputs, const int n) {
    ASSERT_TRUE(inputs != NULL);
    ASSERT_GT(n, 0);
    for (int i = 0; i < n; ++i) {
      free(inputs[i].buf);
      inputs[i].buf = NULL;
      inputs[i].sz = 0;
    }
  }
  SvcContext svc_;
  vpx_codec_ctx_t codec_;
  struct vpx_codec_enc_cfg codec_enc_;
  vpx_codec_iface_t *codec_iface_;
  std::string test_file_name_;
  bool codec_initialized_;
  Decoder *decoder_;
  int tile_columns_;
  int tile_rows_;
 };
 TEST_F(SvcTest, SvcInit) {
  // test missing parameters
  vpx_codec_err_t res = vpx_svc_init(NULL, &codec_, codec_iface_, &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_init(&svc_, NULL, codec_iface_, &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_init(&svc_, &codec_, NULL, &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_init(&svc_, &codec_, codec_iface_, NULL);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  svc_.spatial_layers = 6;  // too many layers
  res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  svc_.spatial_layers = 0;  // use default layers
  InitializeEncoder();
  EXPECT_EQ(VPX_SS_DEFAULT_LAYERS, svc_.spatial_layers);
 }
 TEST_F(SvcTest, InitTwoLayers) {
  svc_.spatial_layers = 2;
  InitializeEncoder();
 }
 TEST_F(SvcTest, InvalidOptions) {
  vpx_codec_err_t res = vpx_svc_set_options(&svc_, NULL);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_set_options(&svc_, "not-an-option=1");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
 }
 TEST_F(SvcTest, SetLayersOption) {
  vpx_codec_err_t res = vpx_svc_set_options(&svc_, "spatial-layers=3");
  EXPECT_EQ(VPX_CODEC_OK, res);
  InitializeEncoder();
  EXPECT_EQ(3, svc_.spatial_layers);
 }
 TEST_F(SvcTest, SetMultipleOptions) {
  vpx_codec_err_t res =
      vpx_svc_set_options(&svc_, "spatial-layers=2 scale-factors=1/3,2/3");
  EXPECT_EQ(VPX_CODEC_OK, res);
  InitializeEncoder();
  EXPECT_EQ(2, svc_.spatial_layers);
 }
 TEST_F(SvcTest, SetScaleFactorsOption) {
  svc_.spatial_layers = 2;
  vpx_codec_err_t res =
      vpx_svc_set_options(&svc_, "scale-factors=not-scale-factors");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_set_options(&svc_, "scale-factors=1/3, 3*3");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_set_options(&svc_, "scale-factors=1/3");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_set_options(&svc_, "scale-factors=1/3,2/3");
  EXPECT_EQ(VPX_CODEC_OK, res);
  InitializeEncoder();
 }
 TEST_F(SvcTest, SetQuantizersOption) {
  svc_.spatial_layers = 2;
  vpx_codec_err_t res = vpx_svc_set_options(&svc_, "max-quantizers=nothing");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_set_options(&svc_, "min-quantizers=nothing");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_set_options(&svc_, "max-quantizers=40");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_set_options(&svc_, "min-quantizers=40");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_set_options(&svc_, "max-quantizers=30,30 min-quantizers=40,40");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_set_options(&svc_, "max-quantizers=40,40 min-quantizers=30,30");
  InitializeEncoder();
 }
 TEST_F(SvcTest, SetAutoAltRefOption) {
  svc_.spatial_layers = 5;
  vpx_codec_err_t res = vpx_svc_set_options(&svc_, "auto-alt-refs=none");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  res = vpx_svc_set_options(&svc_, "auto-alt-refs=1,1,1,1,0");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  vpx_svc_set_options(&svc_, "auto-alt-refs=0,1,1,1,0");
  InitializeEncoder();
 }
 // Test that decoder can handle an SVC frame as the first frame in a sequence.
 TEST_F(SvcTest, OnePassEncodeOneFrame) {
  codec_enc_.g_pass = VPX_RC_ONE_PASS;
  vpx_fixed_buf output = { 0 };
  Pass2EncodeNFrames(NULL, 1, 2, &output);
  DecodeNFrames(&output, 1);
  FreeBitstreamBuffers(&output, 1);
 }
 TEST_F(SvcTest, OnePassEncodeThreeFrames) {
  codec_enc_.g_pass = VPX_RC_ONE_PASS;
  codec_enc_.g_lag_in_frames = 0;
  vpx_fixed_buf outputs[3];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(NULL, 3, 2, &outputs[0]);
  DecodeNFrames(&outputs[0], 3);
  FreeBitstreamBuffers(&outputs[0], 3);
 }
 TEST_F(SvcTest, TwoPassEncode10Frames) {
  // First pass encode
  std::string stats_buf;
  Pass1EncodeNFrames(10, 2, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 TEST_F(SvcTest, TwoPassEncode20FramesWithAltRef) {
  // First pass encode
  std::string stats_buf;
  Pass1EncodeNFrames(20, 2, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1");
  vpx_fixed_buf outputs[20];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 20, 2, &outputs[0]);
  DecodeNFrames(&outputs[0], 20);
  FreeBitstreamBuffers(&outputs[0], 20);
 }
 TEST_F(SvcTest, TwoPassEncode2SpatialLayersDecodeBaseLayerOnly) {
  // First pass encode
  std::string stats_buf;
  Pass1EncodeNFrames(10, 2, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1");
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
  DropEnhancementLayers(&outputs[0], 10, 1);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 TEST_F(SvcTest, TwoPassEncode5SpatialLayersDecode54321Layers) {
  // First pass encode
  std::string stats_buf;
  Pass1EncodeNFrames(10, 5, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  vpx_svc_set_options(&svc_, "auto-alt-refs=0,1,1,1,0");
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]);
  DecodeNFrames(&outputs[0], 10);
  DropEnhancementLayers(&outputs[0], 10, 4);
  DecodeNFrames(&outputs[0], 10);
  DropEnhancementLayers(&outputs[0], 10, 3);
  DecodeNFrames(&outputs[0], 10);
  DropEnhancementLayers(&outputs[0], 10, 2);
  DecodeNFrames(&outputs[0], 10);
  DropEnhancementLayers(&outputs[0], 10, 1);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 TEST_F(SvcTest, TwoPassEncode2SNRLayers) {
  // First pass encode
  std::string stats_buf;
  vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1");
  Pass1EncodeNFrames(20, 2, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 scale-factors=1/1,1/1");
  vpx_fixed_buf outputs[20];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 20, 2, &outputs[0]);
  DecodeNFrames(&outputs[0], 20);
  FreeBitstreamBuffers(&outputs[0], 20);
 }
 TEST_F(SvcTest, TwoPassEncode3SNRLayersDecode321Layers) {
  // First pass encode
  std::string stats_buf;
  vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1");
  Pass1EncodeNFrames(20, 3, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1,1 scale-factors=1/1,1/1,1/1");
  vpx_fixed_buf outputs[20];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]);
  DecodeNFrames(&outputs[0], 20);
  DropEnhancementLayers(&outputs[0], 20, 2);
  DecodeNFrames(&outputs[0], 20);
  DropEnhancementLayers(&outputs[0], 20, 1);
  DecodeNFrames(&outputs[0], 20);
  FreeBitstreamBuffers(&outputs[0], 20);
 }
 TEST_F(SvcTest, SetMultipleFrameContextsOption) {
  svc_.spatial_layers = 5;
  vpx_codec_err_t res = vpx_svc_set_options(&svc_, "multi-frame-contexts=1");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
  svc_.spatial_layers = 2;
  res = vpx_svc_set_options(&svc_, "multi-frame-contexts=1");
  InitializeEncoder();
 }
 TEST_F(SvcTest, TwoPassEncode2SpatialLayersWithMultipleFrameContexts) {
  // First pass encode
  std::string stats_buf;
  Pass1EncodeNFrames(10, 2, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  codec_enc_.g_error_resilient = 0;
  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1");
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 TEST_F(SvcTest,
       TwoPassEncode2SpatialLayersWithMultipleFrameContextsDecodeBaselayer) {
  // First pass encode
  std::string stats_buf;
  Pass1EncodeNFrames(10, 2, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  codec_enc_.g_error_resilient = 0;
  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1");
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
  DropEnhancementLayers(&outputs[0], 10, 1);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContexts) {
  // First pass encode
  std::string stats_buf;
  vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1");
  Pass1EncodeNFrames(10, 2, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  codec_enc_.g_error_resilient = 0;
  vpx_svc_set_options(&svc_,
                      "auto-alt-refs=1,1 scale-factors=1/1,1/1 "
                      "multi-frame-contexts=1");
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 TEST_F(SvcTest,
       TwoPassEncode3SNRLayersWithMultipleFrameContextsDecode321Layer) {
  // First pass encode
  std::string stats_buf;
  vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1");
  Pass1EncodeNFrames(10, 3, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  codec_enc_.g_error_resilient = 0;
  vpx_svc_set_options(&svc_,
                      "auto-alt-refs=1,1,1 scale-factors=1/1,1/1,1/1 "
                      "multi-frame-contexts=1");
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 3, &outputs[0]);
  DecodeNFrames(&outputs[0], 10);
  DropEnhancementLayers(&outputs[0], 10, 2);
  DecodeNFrames(&outputs[0], 10);
  DropEnhancementLayers(&outputs[0], 10, 1);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 TEST_F(SvcTest, TwoPassEncode2TemporalLayers) {
  // First pass encode
  std::string stats_buf;
  vpx_svc_set_options(&svc_, "scale-factors=1/1");
  svc_.temporal_layers = 2;
  Pass1EncodeNFrames(10, 1, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  svc_.temporal_layers = 2;
  vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1");
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithMultipleFrameContexts) {
  // First pass encode
  std::string stats_buf;
  vpx_svc_set_options(&svc_, "scale-factors=1/1");
  svc_.temporal_layers = 2;
  Pass1EncodeNFrames(10, 1, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  svc_.temporal_layers = 2;
  codec_enc_.g_error_resilient = 0;
  vpx_svc_set_options(&svc_,
                      "auto-alt-refs=1 scale-factors=1/1 "
                      "multi-frame-contexts=1");
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 TEST_F(SvcTest, TwoPassEncode2TemporalLayersDecodeBaseLayer) {
  // First pass encode
  std::string stats_buf;
  vpx_svc_set_options(&svc_, "scale-factors=1/1");
  svc_.temporal_layers = 2;
  Pass1EncodeNFrames(10, 1, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  svc_.temporal_layers = 2;
  vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1");
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
  vpx_fixed_buf base_layer[5];
  for (int i = 0; i < 5; ++i) base_layer[i] = outputs[i * 2];
  DecodeNFrames(&base_layer[0], 5);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 TEST_F(SvcTest,
       TwoPassEncode2TemporalLayersWithMultipleFrameContextsDecodeBaseLayer) {
  // First pass encode
  std::string stats_buf;
  vpx_svc_set_options(&svc_, "scale-factors=1/1");
  svc_.temporal_layers = 2;
  Pass1EncodeNFrames(10, 1, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  svc_.temporal_layers = 2;
  codec_enc_.g_error_resilient = 0;
  vpx_svc_set_options(&svc_,
                      "auto-alt-refs=1 scale-factors=1/1 "
                      "multi-frame-contexts=1");
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
  vpx_fixed_buf base_layer[5];
  for (int i = 0; i < 5; ++i) base_layer[i] = outputs[i * 2];
  DecodeNFrames(&base_layer[0], 5);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithTiles) {
  // First pass encode
  std::string stats_buf;
  vpx_svc_set_options(&svc_, "scale-factors=1/1");
  svc_.temporal_layers = 2;
  Pass1EncodeNFrames(10, 1, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  svc_.temporal_layers = 2;
  vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1");
  codec_enc_.g_w = 704;
  codec_enc_.g_h = 144;
  tile_columns_ = 1;
  tile_rows_ = 1;
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithMultipleFrameContextsAndTiles) {
  // First pass encode
  std::string stats_buf;
  vpx_svc_set_options(&svc_, "scale-factors=1/1");
  svc_.temporal_layers = 2;
  Pass1EncodeNFrames(10, 1, &stats_buf);
  // Second pass encode
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
  svc_.temporal_layers = 2;
  codec_enc_.g_error_resilient = 0;
  codec_enc_.g_w = 704;
  codec_enc_.g_h = 144;
  tile_columns_ = 1;
  tile_rows_ = 1;
  vpx_svc_set_options(&svc_,
                      "auto-alt-refs=1 scale-factors=1/1 "
                      "multi-frame-contexts=1");
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
 }  // namespace
--- a/test/temporal_filter_test.cc
+++ b/test/temporal_filter_test.cc
@ -1,277 +0,0 @@
 /*
 *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include <limits>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vp9_rtcd.h"
 #include "test/acm_random.h"
 #include "test/buffer.h"
 #include "test/register_state_check.h"
 #include "vpx_ports/vpx_timer.h"
 namespace {
 using ::libvpx_test::ACMRandom;
 using ::libvpx_test::Buffer;
 typedef void (*TemporalFilterFunc)(const uint8_t *a, unsigned int stride,
                                   const uint8_t *b, unsigned int w,
                                   unsigned int h, int filter_strength,
                                   int filter_weight, unsigned int *accumulator,
                                   uint16_t *count);
 // Calculate the difference between 'a' and 'b', sum in blocks of 9, and apply
 // filter based on strength and weight. Store the resulting filter amount in
 // 'count' and apply it to 'b' and store it in 'accumulator'.
 void reference_filter(const Buffer<uint8_t> &a, const Buffer<uint8_t> &b, int w,
                      int h, int filter_strength, int filter_weight,
                      Buffer<unsigned int> *accumulator,
                      Buffer<uint16_t> *count) {
  Buffer<int> diff_sq = Buffer<int>(w, h, 0);
  ASSERT_TRUE(diff_sq.Init());
  diff_sq.Set(0);
  int rounding = 0;
  if (filter_strength > 0) {
    rounding = 1 << (filter_strength - 1);
  }
  // Calculate all the differences. Avoids re-calculating a bunch of extra
  // values.
  for (int height = 0; height < h; ++height) {
    for (int width = 0; width < w; ++width) {
      int diff = a.TopLeftPixel()[height * a.stride() + width] -
                 b.TopLeftPixel()[height * b.stride() + width];
      diff_sq.TopLeftPixel()[height * diff_sq.stride() + width] = diff * diff;
    }
  }
  // For any given point, sum the neighboring values and calculate the
  // modifier.
  for (int height = 0; height < h; ++height) {
    for (int width = 0; width < w; ++width) {
      // Determine how many values are being summed.
      int summed_values = 9;
      if (height == 0 || height == (h - 1)) {
        summed_values -= 3;
      }
      if (width == 0 || width == (w - 1)) {
        if (summed_values == 6) {  // corner
          summed_values -= 2;
        } else {
          summed_values -= 3;
        }
      }
      // Sum the diff_sq of the surrounding values.
      int sum = 0;
      for (int idy = -1; idy <= 1; ++idy) {
        for (int idx = -1; idx <= 1; ++idx) {
          const int y = height + idy;
          const int x = width + idx;
          // If inside the border.
          if (y >= 0 && y < h && x >= 0 && x < w) {
            sum += diff_sq.TopLeftPixel()[y * diff_sq.stride() + x];
          }
        }
      }
      sum *= 3;
      sum /= summed_values;
      sum += rounding;
      sum >>= filter_strength;
      // Clamp the value and invert it.
      if (sum > 16) sum = 16;
      sum = 16 - sum;
      sum *= filter_weight;
      count->TopLeftPixel()[height * count->stride() + width] += sum;
      accumulator->TopLeftPixel()[height * accumulator->stride() + width] +=
          sum * b.TopLeftPixel()[height * b.stride() + width];
    }
  }
 }
 class TemporalFilterTest : public ::testing::TestWithParam<TemporalFilterFunc> {
 public:
  virtual void SetUp() {
    filter_func_ = GetParam();
    rnd_.Reset(ACMRandom::DeterministicSeed());
  }
 protected:
  TemporalFilterFunc filter_func_;
  ACMRandom rnd_;
 };
 TEST_P(TemporalFilterTest, SizeCombinations) {
  // Depending on subsampling this function may be called with values of 8 or 16
  // for width and height, in any combination.
  Buffer<uint8_t> a = Buffer<uint8_t>(16, 16, 8);
  ASSERT_TRUE(a.Init());
  const int filter_weight = 2;
  const int filter_strength = 6;
  for (int width = 8; width <= 16; width += 8) {
    for (int height = 8; height <= 16; height += 8) {
      // The second buffer must not have any border.
      Buffer<uint8_t> b = Buffer<uint8_t>(width, height, 0);
      ASSERT_TRUE(b.Init());
      Buffer<unsigned int> accum_ref = Buffer<unsigned int>(width, height, 0);
      ASSERT_TRUE(accum_ref.Init());
      Buffer<unsigned int> accum_chk = Buffer<unsigned int>(width, height, 0);
      ASSERT_TRUE(accum_chk.Init());
      Buffer<uint16_t> count_ref = Buffer<uint16_t>(width, height, 0);
      ASSERT_TRUE(count_ref.Init());
      Buffer<uint16_t> count_chk = Buffer<uint16_t>(width, height, 0);
      ASSERT_TRUE(count_chk.Init());
      // The difference between the buffers must be small to pass the threshold
      // to apply the filter.
      a.Set(&rnd_, 0, 7);
      b.Set(&rnd_, 0, 7);
      accum_ref.Set(rnd_.Rand8());
      accum_chk.CopyFrom(accum_ref);
      count_ref.Set(rnd_.Rand8());
      count_chk.CopyFrom(count_ref);
      reference_filter(a, b, width, height, filter_strength, filter_weight,
                       &accum_ref, &count_ref);
      ASM_REGISTER_STATE_CHECK(
          filter_func_(a.TopLeftPixel(), a.stride(), b.TopLeftPixel(), width,
                       height, filter_strength, filter_weight,
                       accum_chk.TopLeftPixel(), count_chk.TopLeftPixel()));
      EXPECT_TRUE(accum_chk.CheckValues(accum_ref));
      EXPECT_TRUE(count_chk.CheckValues(count_ref));
      if (HasFailure()) {
        printf("Width: %d Height: %d\n", width, height);
        count_chk.PrintDifference(count_ref);
        accum_chk.PrintDifference(accum_ref);
        return;
      }
    }
  }
 }
 TEST_P(TemporalFilterTest, CompareReferenceRandom) {
  for (int width = 8; width <= 16; width += 8) {
    for (int height = 8; height <= 16; height += 8) {
      Buffer<uint8_t> a = Buffer<uint8_t>(width, height, 8);
      ASSERT_TRUE(a.Init());
      // The second buffer must not have any border.
      Buffer<uint8_t> b = Buffer<uint8_t>(width, height, 0);
      ASSERT_TRUE(b.Init());
      Buffer<unsigned int> accum_ref = Buffer<unsigned int>(width, height, 0);
      ASSERT_TRUE(accum_ref.Init());
      Buffer<unsigned int> accum_chk = Buffer<unsigned int>(width, height, 0);
      ASSERT_TRUE(accum_chk.Init());
      Buffer<uint16_t> count_ref = Buffer<uint16_t>(width, height, 0);
      ASSERT_TRUE(count_ref.Init());
      Buffer<uint16_t> count_chk = Buffer<uint16_t>(width, height, 0);
      ASSERT_TRUE(count_chk.Init());
      for (int filter_strength = 0; filter_strength <= 6; ++filter_strength) {
        for (int filter_weight = 0; filter_weight <= 2; ++filter_weight) {
          for (int repeat = 0; repeat < 100; ++repeat) {
            if (repeat < 50) {
              a.Set(&rnd_, 0, 7);
              b.Set(&rnd_, 0, 7);
            } else {
              // Check large (but close) values as well.
              a.Set(&rnd_, std::numeric_limits<uint8_t>::max() - 7,
                    std::numeric_limits<uint8_t>::max());
              b.Set(&rnd_, std::numeric_limits<uint8_t>::max() - 7,
                    std::numeric_limits<uint8_t>::max());
            }
            accum_ref.Set(rnd_.Rand8());
            accum_chk.CopyFrom(accum_ref);
            count_ref.Set(rnd_.Rand8());
            count_chk.CopyFrom(count_ref);
            reference_filter(a, b, width, height, filter_strength,
                             filter_weight, &accum_ref, &count_ref);
            ASM_REGISTER_STATE_CHECK(filter_func_(
                a.TopLeftPixel(), a.stride(), b.TopLeftPixel(), width, height,
                filter_strength, filter_weight, accum_chk.TopLeftPixel(),
                count_chk.TopLeftPixel()));
            EXPECT_TRUE(accum_chk.CheckValues(accum_ref));
            EXPECT_TRUE(count_chk.CheckValues(count_ref));
            if (HasFailure()) {
              printf("Weight: %d Strength: %d\n", filter_weight,
                     filter_strength);
              count_chk.PrintDifference(count_ref);
              accum_chk.PrintDifference(accum_ref);
              return;
            }
          }
        }
      }
    }
  }
 }
 TEST_P(TemporalFilterTest, DISABLED_Speed) {
  Buffer<uint8_t> a = Buffer<uint8_t>(16, 16, 8);
  ASSERT_TRUE(a.Init());
  const int filter_weight = 2;
  const int filter_strength = 6;
  for (int width = 8; width <= 16; width += 8) {
    for (int height = 8; height <= 16; height += 8) {
      // The second buffer must not have any border.
      Buffer<uint8_t> b = Buffer<uint8_t>(width, height, 0);
      ASSERT_TRUE(b.Init());
      Buffer<unsigned int> accum_ref = Buffer<unsigned int>(width, height, 0);
      ASSERT_TRUE(accum_ref.Init());
      Buffer<unsigned int> accum_chk = Buffer<unsigned int>(width, height, 0);
      ASSERT_TRUE(accum_chk.Init());
      Buffer<uint16_t> count_ref = Buffer<uint16_t>(width, height, 0);
      ASSERT_TRUE(count_ref.Init());
      Buffer<uint16_t> count_chk = Buffer<uint16_t>(width, height, 0);
      ASSERT_TRUE(count_chk.Init());
      a.Set(&rnd_, 0, 7);
      b.Set(&rnd_, 0, 7);
      accum_chk.Set(0);
      count_chk.Set(0);
      vpx_usec_timer timer;
      vpx_usec_timer_start(&timer);
      for (int i = 0; i < 10000; ++i) {
        filter_func_(a.TopLeftPixel(), a.stride(), b.TopLeftPixel(), width,
                     height, filter_strength, filter_weight,
                     accum_chk.TopLeftPixel(), count_chk.TopLeftPixel());
      }
      vpx_usec_timer_mark(&timer);
      const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
      printf("Temporal filter %dx%d time: %5d us\n", width, height,
             elapsed_time);
    }
  }
 }
 INSTANTIATE_TEST_CASE_P(C, TemporalFilterTest,
                        ::testing::Values(&vp9_temporal_filter_apply_c));
 #if HAVE_SSE4_1
 INSTANTIATE_TEST_CASE_P(SSE4_1, TemporalFilterTest,
                        ::testing::Values(&vp9_temporal_filter_apply_sse4_1));
 #endif  // HAVE_SSE4_1
 }  // namespace
--- a/test/test-data.mk
+++ b/test/test-data.mk
@ -20,10 +20,8 @@ LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += desktop_credits.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += noisy_clip_640_360.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_640_480_30.yuv
 # Test vectors
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf
@ -732,10 +730,6 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm.md5
 endif  # CONFIG_VP9_HIGHBITDEPTH
 # Invalid files for testing libvpx error checking.
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm
@ -776,8 +770,6 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s367
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-1.webm
@ -821,6 +813,7 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += kirland_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += macmarcomoving_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += macmarcostationary_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomanarrows_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomasmallcameramovement_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += thaloundeskmtg_640_480_30.yuv
@ -880,5 +873,3 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_3.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_3.ivf.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_1.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_1.webm.md5
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@ -6,8 +6,6 @@ b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv
 456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
 c123d1f9f02fb4143abb5e271916e3a3080de8f6 *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
 456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
 efafb92b7567bc04c3f1432ea6c268c1c31affd5 *invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-.ivf
 5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-.ivf.res
 fe346136b9b8c1e6f6084cc106485706915795e4 *invalid-vp90-01-v3.webm
 5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-vp90-01-v3.webm.res
 d78e2fceba5ac942246503ec8366f879c4775ca5 *invalid-vp90-02-v2.webm
@ -16,7 +14,6 @@ df1a1453feb3c00d7d89746c7003b4163523bff3 *invalid-vp90-03-v3.webm
 4935c62becc68c13642a03db1e6d3e2331c1c612 *invalid-vp90-03-v3.webm.res
 d637297561dd904eb2c97a9015deeb31c4a1e8d2 *invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm
 3a204bdbeaa3c6458b77bcebb8366d107267f55d *invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm.res
 9aa21d8b2cb9d39abe8a7bb6032dc66955fb4342 *noisy_clip_640_360.y4m
 a432f96ff0a787268e2f94a8092ab161a18d1b06 *park_joy_90p_10_420.y4m
 0b194cc312c3a2e84d156a221b0a5eb615dfddc5 *park_joy_90p_10_422.y4m
 ff0e0a21dc2adc95b8c1b37902713700655ced17 *park_joy_90p_10_444.y4m
@ -847,12 +844,5 @@ a000d568431d07379dd5a8ec066061c07e560b47 *invalid-vp90-2-00-quantizer-63.ivf.kf_
 5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-crbug-629481.webm.res
 7602e00378161ca36ae93cc6ee12dd30b5ba1e1d *vp90-2-22-svc_1280x720_3.ivf
 02e53e3eefbf25ec0929047fe50876acdeb040bd *vp90-2-22-svc_1280x720_3.ivf.md5
 6fa3d3ac306a3d9ce1d610b78441dc00d2c2d4b9 *tos_vp8.webm
 e402cbbf9e550ae017a1e9f1f73931c1d18474e8 *invalid-crbug-667044.webm
 d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-crbug-667044.webm.res
 fd9df7f3f6992af1d7a9dde975c9a0d6f28c053d *invalid-bug-1443.ivf
 fd3020fa6e9ca5966206738654c97dec313b0a95 *invalid-bug-1443.ivf.res
 1a0e405606939f2febab1a21b30c37cb8f2c8cb1 *invalid-token-partition.ivf
 90a8a95e7024f015b87f5483a65036609b3d1b74 *invalid-token-partition.ivf.res
 17696cd21e875f1d6e5d418cbf89feab02c8850a *vp90-2-22-svc_1280x720_1.webm
 e2f9e1e47a791b4e939a9bdc50bf7a25b3761f77 *vp90-2-22-svc_1280x720_1.webm.md5
--- a/test/test.mk
+++ b/test/test.mk
@ -1,5 +1,4 @@
 LIBVPX_TEST_SRCS-yes += acm_random.h
 LIBVPX_TEST_SRCS-yes += buffer.h
 LIBVPX_TEST_SRCS-yes += clear_system_state.h
 LIBVPX_TEST_SRCS-yes += codec_factory.h
 LIBVPX_TEST_SRCS-yes += md5_helper.h
@ -22,9 +21,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += ../y4minput.h ../y4minput.c
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += altref_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += aq_segment_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += alt_ref_aq_segment_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += vp8_datarate_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += datarate_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += vp9_datarate_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += svc_datarate_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += encode_api_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += i420_video_source.h
@ -41,6 +38,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += byte_alignment_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += decode_svc_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_frame_parallel_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_refresh_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
@ -49,7 +47,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ethread_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_motion_vector_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += level_test.cc
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.cc
@ -125,7 +122,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
 LIBVPX_TEST_SRCS-yes                   += idct_test.cc
 LIBVPX_TEST_SRCS-yes                   += predict_test.cc
 LIBVPX_TEST_SRCS-yes                   += vpx_scale_test.cc
 LIBVPX_TEST_SRCS-yes                   += vpx_scale_test.h
 ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_TEMPORAL_DENOISING),yesyes)
 LIBVPX_TEST_SRCS-$(HAVE_SSE2) += vp8_denoiser_sse2_test.cc
@ -153,30 +149,25 @@ LIBVPX_TEST_SRCS-yes                   += vp9_intrapred_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_decrypt_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += avg_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += comp_avg_pred_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct_partial_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_scale_test.cc
 ifneq ($(CONFIG_REALTIME_ONLY),yes)
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += temporal_filter_test.cc
 endif
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_block_error_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
 ifeq ($(CONFIG_VP9_ENCODER),yes)
 LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += blockiness_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += consistency_test.cc
 endif
 ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes)
-LIBVPX_TEST_SRCS-yes += vp9_denoiser_test.cc
+LIBVPX_TEST_SRCS-$(HAVE_SSE2) += vp9_denoiser_sse2_test.cc
 endif
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_arf_freq_test.cc
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@ -270,22 +270,19 @@ INTRA_PRED_TEST(NEON, TestIntraPred4, vpx_dc_predictor_4x4_neon,
 INTRA_PRED_TEST(NEON, TestIntraPred8, vpx_dc_predictor_8x8_neon,
                vpx_dc_left_predictor_8x8_neon, vpx_dc_top_predictor_8x8_neon,
                vpx_dc_128_predictor_8x8_neon, vpx_v_predictor_8x8_neon,
-                vpx_h_predictor_8x8_neon, vpx_d45_predictor_8x8_neon,
+                vpx_h_predictor_8x8_neon, vpx_d45_predictor_8x8_neon, NULL,
-                vpx_d135_predictor_8x8_neon, NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_neon)
                vpx_tm_predictor_8x8_neon)
 INTRA_PRED_TEST(NEON, TestIntraPred16, vpx_dc_predictor_16x16_neon,
                vpx_dc_left_predictor_16x16_neon,
                vpx_dc_top_predictor_16x16_neon,
                vpx_dc_128_predictor_16x16_neon, vpx_v_predictor_16x16_neon,
-                vpx_h_predictor_16x16_neon, vpx_d45_predictor_16x16_neon,
+                vpx_h_predictor_16x16_neon, vpx_d45_predictor_16x16_neon, NULL,
-                vpx_d135_predictor_16x16_neon, NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, vpx_tm_predictor_16x16_neon)
                vpx_tm_predictor_16x16_neon)
 INTRA_PRED_TEST(NEON, TestIntraPred32, vpx_dc_predictor_32x32_neon,
                vpx_dc_left_predictor_32x32_neon,
                vpx_dc_top_predictor_32x32_neon,
                vpx_dc_128_predictor_32x32_neon, vpx_v_predictor_32x32_neon,
-                vpx_h_predictor_32x32_neon, vpx_d45_predictor_32x32_neon,
+                vpx_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
                vpx_d135_predictor_32x32_neon, NULL, NULL, NULL, NULL,
                vpx_tm_predictor_32x32_neon)
 #endif  // HAVE_NEON
@ -312,31 +309,6 @@ INTRA_PRED_TEST(MSA, TestIntraPred32, vpx_dc_predictor_32x32_msa,
                vpx_tm_predictor_32x32_msa)
 #endif  // HAVE_MSA
 #if HAVE_VSX
 INTRA_PRED_TEST(VSX, TestIntraPred4, NULL, NULL, NULL, NULL, NULL,
                vpx_h_predictor_4x4_vsx, NULL, NULL, NULL, NULL, NULL, NULL,
                vpx_tm_predictor_4x4_vsx)
 INTRA_PRED_TEST(VSX, TestIntraPred8, vpx_dc_predictor_8x8_vsx, NULL, NULL, NULL,
                NULL, vpx_h_predictor_8x8_vsx, vpx_d45_predictor_8x8_vsx, NULL,
                NULL, NULL, NULL, vpx_d63_predictor_8x8_vsx,
                vpx_tm_predictor_8x8_vsx)
 INTRA_PRED_TEST(VSX, TestIntraPred16, vpx_dc_predictor_16x16_vsx,
                vpx_dc_left_predictor_16x16_vsx, vpx_dc_top_predictor_16x16_vsx,
                vpx_dc_128_predictor_16x16_vsx, vpx_v_predictor_16x16_vsx,
                vpx_h_predictor_16x16_vsx, vpx_d45_predictor_16x16_vsx, NULL,
                NULL, NULL, NULL, vpx_d63_predictor_16x16_vsx,
                vpx_tm_predictor_16x16_vsx)
 INTRA_PRED_TEST(VSX, TestIntraPred32, vpx_dc_predictor_32x32_vsx,
                vpx_dc_left_predictor_32x32_vsx, vpx_dc_top_predictor_32x32_vsx,
                vpx_dc_128_predictor_32x32_vsx, vpx_v_predictor_32x32_vsx,
                vpx_h_predictor_32x32_vsx, vpx_d45_predictor_32x32_vsx, NULL,
                NULL, NULL, NULL, vpx_d63_predictor_32x32_vsx,
                vpx_tm_predictor_32x32_vsx)
 #endif  // HAVE_VSX
 // -----------------------------------------------------------------------------
 #if CONFIG_VP9_HIGHBITDEPTH
@ -480,107 +452,29 @@ HIGHBD_INTRA_PRED_TEST(
    vpx_highbd_d63_predictor_32x32_c, vpx_highbd_tm_predictor_32x32_c)
 #if HAVE_SSE2
-HIGHBD_INTRA_PRED_TEST(
+HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred4,
-    SSE2, TestHighbdIntraPred4, vpx_highbd_dc_predictor_4x4_sse2,
+                       vpx_highbd_dc_predictor_4x4_sse2, NULL, NULL, NULL,
-    vpx_highbd_dc_left_predictor_4x4_sse2, vpx_highbd_dc_top_predictor_4x4_sse2,
+                       vpx_highbd_v_predictor_4x4_sse2, NULL, NULL, NULL, NULL,
-    vpx_highbd_dc_128_predictor_4x4_sse2, vpx_highbd_v_predictor_4x4_sse2,
+                       NULL, NULL, NULL, vpx_highbd_tm_predictor_4x4_c)
    vpx_highbd_h_predictor_4x4_sse2, NULL, vpx_highbd_d135_predictor_4x4_sse2,
    vpx_highbd_d117_predictor_4x4_sse2, vpx_highbd_d153_predictor_4x4_sse2,
    vpx_highbd_d207_predictor_4x4_sse2, vpx_highbd_d63_predictor_4x4_sse2,
    vpx_highbd_tm_predictor_4x4_c)
 HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred8,
-                       vpx_highbd_dc_predictor_8x8_sse2,
+                       vpx_highbd_dc_predictor_8x8_sse2, NULL, NULL, NULL,
-                       vpx_highbd_dc_left_predictor_8x8_sse2,
+                       vpx_highbd_v_predictor_8x8_sse2, NULL, NULL, NULL, NULL,
-                       vpx_highbd_dc_top_predictor_8x8_sse2,
+                       NULL, NULL, NULL, vpx_highbd_tm_predictor_8x8_sse2)
                       vpx_highbd_dc_128_predictor_8x8_sse2,
                       vpx_highbd_v_predictor_8x8_sse2,
                       vpx_highbd_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL,
                       NULL, NULL, vpx_highbd_tm_predictor_8x8_sse2)
 HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred16,
-                       vpx_highbd_dc_predictor_16x16_sse2,
+                       vpx_highbd_dc_predictor_16x16_sse2, NULL, NULL, NULL,
-                       vpx_highbd_dc_left_predictor_16x16_sse2,
+                       vpx_highbd_v_predictor_16x16_sse2, NULL, NULL, NULL,
-                       vpx_highbd_dc_top_predictor_16x16_sse2,
+                       NULL, NULL, NULL, NULL,
-                       vpx_highbd_dc_128_predictor_16x16_sse2,
+                       vpx_highbd_tm_predictor_16x16_sse2)
                       vpx_highbd_v_predictor_16x16_sse2,
                       vpx_highbd_h_predictor_16x16_sse2, NULL, NULL, NULL,
                       NULL, NULL, NULL, vpx_highbd_tm_predictor_16x16_sse2)
 HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred32,
-                       vpx_highbd_dc_predictor_32x32_sse2,
+                       vpx_highbd_dc_predictor_32x32_sse2, NULL, NULL, NULL,
-                       vpx_highbd_dc_left_predictor_32x32_sse2,
+                       vpx_highbd_v_predictor_32x32_sse2, NULL, NULL, NULL,
-                       vpx_highbd_dc_top_predictor_32x32_sse2,
+                       NULL, NULL, NULL, NULL,
-                       vpx_highbd_dc_128_predictor_32x32_sse2,
+                       vpx_highbd_tm_predictor_32x32_sse2)
                       vpx_highbd_v_predictor_32x32_sse2,
                       vpx_highbd_h_predictor_32x32_sse2, NULL, NULL, NULL,
                       NULL, NULL, NULL, vpx_highbd_tm_predictor_32x32_sse2)
 #endif  // HAVE_SSE2
 #if HAVE_SSSE3
 HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred4, NULL, NULL, NULL, NULL,
                       NULL, NULL, vpx_highbd_d45_predictor_4x4_ssse3, NULL,
                       NULL, NULL, NULL, NULL, NULL)
 HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred8, NULL, NULL, NULL, NULL,
                       NULL, NULL, vpx_highbd_d45_predictor_8x8_ssse3,
                       vpx_highbd_d135_predictor_8x8_ssse3,
                       vpx_highbd_d117_predictor_8x8_ssse3,
                       vpx_highbd_d153_predictor_8x8_ssse3,
                       vpx_highbd_d207_predictor_8x8_ssse3,
                       vpx_highbd_d63_predictor_8x8_ssse3, NULL)
 HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred16, NULL, NULL, NULL, NULL,
                       NULL, NULL, vpx_highbd_d45_predictor_16x16_ssse3,
                       vpx_highbd_d135_predictor_16x16_ssse3,
                       vpx_highbd_d117_predictor_16x16_ssse3,
                       vpx_highbd_d153_predictor_16x16_ssse3,
                       vpx_highbd_d207_predictor_16x16_ssse3,
                       vpx_highbd_d63_predictor_16x16_ssse3, NULL)
 HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred32, NULL, NULL, NULL, NULL,
                       NULL, NULL, vpx_highbd_d45_predictor_32x32_ssse3,
                       vpx_highbd_d135_predictor_32x32_ssse3,
                       vpx_highbd_d117_predictor_32x32_ssse3,
                       vpx_highbd_d153_predictor_32x32_ssse3,
                       vpx_highbd_d207_predictor_32x32_ssse3,
                       vpx_highbd_d63_predictor_32x32_ssse3, NULL)
 #endif  // HAVE_SSSE3
 #if HAVE_NEON
 HIGHBD_INTRA_PRED_TEST(
    NEON, TestHighbdIntraPred4, vpx_highbd_dc_predictor_4x4_neon,
    vpx_highbd_dc_left_predictor_4x4_neon, vpx_highbd_dc_top_predictor_4x4_neon,
    vpx_highbd_dc_128_predictor_4x4_neon, vpx_highbd_v_predictor_4x4_neon,
    vpx_highbd_h_predictor_4x4_neon, vpx_highbd_d45_predictor_4x4_neon,
    vpx_highbd_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
    vpx_highbd_tm_predictor_4x4_neon)
 HIGHBD_INTRA_PRED_TEST(
    NEON, TestHighbdIntraPred8, vpx_highbd_dc_predictor_8x8_neon,
    vpx_highbd_dc_left_predictor_8x8_neon, vpx_highbd_dc_top_predictor_8x8_neon,
    vpx_highbd_dc_128_predictor_8x8_neon, vpx_highbd_v_predictor_8x8_neon,
    vpx_highbd_h_predictor_8x8_neon, vpx_highbd_d45_predictor_8x8_neon,
    vpx_highbd_d135_predictor_8x8_neon, NULL, NULL, NULL, NULL,
    vpx_highbd_tm_predictor_8x8_neon)
 HIGHBD_INTRA_PRED_TEST(NEON, TestHighbdIntraPred16,
                       vpx_highbd_dc_predictor_16x16_neon,
                       vpx_highbd_dc_left_predictor_16x16_neon,
                       vpx_highbd_dc_top_predictor_16x16_neon,
                       vpx_highbd_dc_128_predictor_16x16_neon,
                       vpx_highbd_v_predictor_16x16_neon,
                       vpx_highbd_h_predictor_16x16_neon,
                       vpx_highbd_d45_predictor_16x16_neon,
                       vpx_highbd_d135_predictor_16x16_neon, NULL, NULL, NULL,
                       NULL, vpx_highbd_tm_predictor_16x16_neon)
 HIGHBD_INTRA_PRED_TEST(NEON, TestHighbdIntraPred32,
                       vpx_highbd_dc_predictor_32x32_neon,
                       vpx_highbd_dc_left_predictor_32x32_neon,
                       vpx_highbd_dc_top_predictor_32x32_neon,
                       vpx_highbd_dc_128_predictor_32x32_neon,
                       vpx_highbd_v_predictor_32x32_neon,
                       vpx_highbd_h_predictor_32x32_neon,
                       vpx_highbd_d45_predictor_32x32_neon,
                       vpx_highbd_d135_predictor_32x32_neon, NULL, NULL, NULL,
                       NULL, vpx_highbd_tm_predictor_32x32_neon)
 #endif  // HAVE_NEON
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #include "test/test_libvpx.cc"
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@ -53,14 +53,12 @@ int main(int argc, char **argv) {
  }
  if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter(":AVX.*:AVX/*");
  if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter(":AVX2.*:AVX2/*");
  if (!(simd_caps & HAS_AVX512)) {
    append_negative_gtest_filter(":AVX512.*:AVX512/*");
  }
 #endif  // ARCH_X86 || ARCH_X86_64
 #if !CONFIG_SHARED
 // Shared library builds don't support whitebox tests
 // that exercise internal symbols.
 #if CONFIG_VP8
  vp8_rtcd();
 #endif  // CONFIG_VP8
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@ -28,10 +28,13 @@
 namespace {
-const int kThreads = 0;
+enum DecodeMode { kSerialMode, kFrameParallelMode };
 const int kFileName = 1;
-typedef ::testing::tuple<int, const char *> DecodeParam;
+const int kDecodeMode = 0;
 const int kThreads = 1;
 const int kFileName = 2;
 typedef std::tr1::tuple<int, int, const char *> DecodeParam;
 class TestVectorTest : public ::libvpx_test::DecoderTest,
                       public ::libvpx_test::CodecTestWithParam<DecodeParam> {
@ -50,8 +53,8 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
  void OpenMD5File(const std::string &md5_file_name_) {
    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
-    ASSERT_TRUE(md5_file_ != NULL)
+    ASSERT_TRUE(md5_file_ != NULL) << "Md5 file open failed. Filename: "
-        << "Md5 file open failed. Filename: " << md5_file_name_;
+                                   << md5_file_name_;
  }
  virtual void DecompressedFrameHook(const vpx_image_t &img,
@ -88,15 +91,30 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
 // the test failed.
 TEST_P(TestVectorTest, MD5Match) {
  const DecodeParam input = GET_PARAM(1);
-  const std::string filename = ::testing::get<kFileName>(input);
+  const std::string filename = std::tr1::get<kFileName>(input);
  const int threads = std::tr1::get<kThreads>(input);
  const int mode = std::tr1::get<kDecodeMode>(input);
  vpx_codec_flags_t flags = 0;
  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
  char str[256];
-  cfg.threads = ::testing::get<kThreads>(input);
+  if (mode == kFrameParallelMode) {
    flags |= VPX_CODEC_USE_FRAME_THREADING;
 #if CONFIG_VP9_DECODER
    // TODO(hkuang): Fix frame parallel decode bug. See issue 1086.
    if (resize_clips_.find(filename) != resize_clips_.end()) {
      printf("Skipping the test file: %s, due to frame parallel decode bug.\n",
             filename.c_str());
      return;
    }
 #endif
  }
-  snprintf(str, sizeof(str) / sizeof(str[0]) - 1, "file: %s threads: %d",
+  cfg.threads = threads;
-           filename.c_str(), cfg.threads);
+
  snprintf(str, sizeof(str) / sizeof(str[0]) - 1,
           "file: %s  mode: %s threads: %d", filename.c_str(),
           mode == 0 ? "Serial" : "Parallel", threads);
  SCOPED_TRACE(str);
  // Open compressed video file.
@ -127,44 +145,38 @@ TEST_P(TestVectorTest, MD5Match) {
  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get(), cfg));
 }
 // Test VP8 decode in serial mode with single thread.
 // NOTE: VP8 only support serial mode.
 #if CONFIG_VP8_DECODER
 VP8_INSTANTIATE_TEST_CASE(
    TestVectorTest,
    ::testing::Combine(
        ::testing::Values(0),  // Serial Mode.
        ::testing::Values(1),  // Single thread.
        ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
                            libvpx_test::kVP8TestVectors +
                                libvpx_test::kNumVP8TestVectors)));
 // Test VP8 decode in with different numbers of threads.
 INSTANTIATE_TEST_CASE_P(
    VP8MultiThreaded, TestVectorTest,
    ::testing::Combine(
        ::testing::Values(
            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP8)),
        ::testing::Combine(
            ::testing::Range(2, 9),  // With 2 ~ 8 threads.
            ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
                                libvpx_test::kVP8TestVectors +
                                    libvpx_test::kNumVP8TestVectors))));
 #endif  // CONFIG_VP8_DECODER
 // Test VP9 decode in serial mode with single thread.
 #if CONFIG_VP9_DECODER
 VP9_INSTANTIATE_TEST_CASE(
    TestVectorTest,
    ::testing::Combine(
        ::testing::Values(0),  // Serial Mode.
        ::testing::Values(1),  // Single thread.
        ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
                            libvpx_test::kVP9TestVectors +
                                libvpx_test::kNumVP9TestVectors)));
 // Test VP9 decode in frame parallel mode with different number of threads.
 INSTANTIATE_TEST_CASE_P(
-    VP9MultiThreaded, TestVectorTest,
+    VP9MultiThreadedFrameParallel, TestVectorTest,
    ::testing::Combine(
        ::testing::Values(
            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
        ::testing::Combine(
            ::testing::Values(1),    // Frame Parallel mode.
            ::testing::Range(2, 9),  // With 2 ~ 8 threads.
            ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
                                libvpx_test::kVP9TestVectors +
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@ -371,7 +371,6 @@ const char *const kVP9TestVectors[] = {
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  "vp90-2-20-big_superframe-01.webm",
  "vp90-2-20-big_superframe-02.webm",
  "vp90-2-22-svc_1280x720_1.webm",
  RESIZE_TEST_VECTORS
 };
 const char *const kVP9TestVectorsSvc[] = { "vp90-2-22-svc_1280x720_3.ivf" };
--- a/test/twopass_encoder.sh
+++ b/test/twopass_encoder.sh
@ -54,10 +54,7 @@ twopass_encoder_vp9() {
  fi
 }
 twopass_encoder_tests="twopass_encoder_vp8
                       twopass_encoder_vp9"
-if [ "$(vpx_config_option_enabled CONFIG_REALTIME_ONLY)" != "yes" ]; then
+run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}"
  twopass_encoder_tests="twopass_encoder_vp8
                         twopass_encoder_vp9"
  run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}"
 fi
--- a/test/user_priv_test.cc
+++ b/test/user_priv_test.cc
@ -27,8 +27,8 @@
 namespace {
 using libvpx_test::ACMRandom;
 using std::string;
 using libvpx_test::ACMRandom;
 #if CONFIG_WEBM_IO
--- a/test/util.h
+++ b/test/util.h
@ -17,7 +17,7 @@
 #include "vpx/vpx_image.h"
 // Macros
-#define GET_PARAM(k) ::testing::get<k>(GetParam())
+#define GET_PARAM(k) std::tr1::get<k>(GetParam())
 inline double compute_psnr(const vpx_image_t *img1, const vpx_image_t *img2) {
  assert((img1->fmt == img2->fmt) && (img1->d_w == img2->d_w) &&
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
--- a/test/video_source.h
+++ b/test/video_source.h
@ -13,9 +13,7 @@
 #if defined(_WIN32)
 #undef NOMINMAX
 #define NOMINMAX
 #ifndef WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
 #endif
 #include <windows.h>
 #endif
 #include <cstdio>
--- a/test/vp8_datarate_test.cc
+++ b/test/vp8_datarate_test.cc
@ -1,513 +0,0 @@
 /*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include "./vpx_config.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
 #include "test/i420_video_source.h"
 #include "test/util.h"
 #include "test/y4m_video_source.h"
 #include "vpx/vpx_codec.h"
 namespace {
 class DatarateTestLarge
    : public ::libvpx_test::EncoderTest,
      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
 public:
  DatarateTestLarge() : EncoderTest(GET_PARAM(0)) {}
  virtual ~DatarateTestLarge() {}
 protected:
  virtual void SetUp() {
    InitializeConfig();
    SetMode(GET_PARAM(1));
    set_cpu_used_ = GET_PARAM(2);
    ResetModel();
  }
  virtual void ResetModel() {
    last_pts_ = 0;
    bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
    frame_number_ = 0;
    first_drop_ = 0;
    bits_total_ = 0;
    duration_ = 0.0;
    denoiser_offon_test_ = 0;
    denoiser_offon_period_ = -1;
    gf_boost_ = 0;
    use_roi_ = false;
  }
  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
    if (video->frame() == 0) {
      encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
      encoder->Control(VP8E_SET_GF_CBR_BOOST_PCT, gf_boost_);
    }
    if (use_roi_) {
      encoder->Control(VP8E_SET_ROI_MAP, &roi_);
    }
    if (denoiser_offon_test_) {
      ASSERT_GT(denoiser_offon_period_, 0)
          << "denoiser_offon_period_ is not positive.";
      if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
        // Flip denoiser_on_ periodically
        denoiser_on_ ^= 1;
      }
      encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
    }
    const vpx_rational_t tb = video->timebase();
    timebase_ = static_cast<double>(tb.num) / tb.den;
    duration_ = 0;
  }
  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
    // Time since last timestamp = duration.
    vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
    // TODO(jimbankoski): Remove these lines when the issue:
    // http://code.google.com/p/webm/issues/detail?id=496 is fixed.
    // For now the codec assumes buffer starts at starting buffer rate
    // plus one frame's time.
    if (last_pts_ == 0) duration = 1;
    // Add to the buffer the bits we'd expect from a constant bitrate server.
    bits_in_buffer_model_ += static_cast<int64_t>(
        duration * timebase_ * cfg_.rc_target_bitrate * 1000);
    /* Test the buffer model here before subtracting the frame. Do so because
     * the way the leaky bucket model works in libvpx is to allow the buffer to
     * empty - and then stop showing frames until we've got enough bits to
     * show one. As noted in comment below (issue 495), this does not currently
     * apply to key frames. For now exclude key frames in condition below. */
    const bool key_frame =
        (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
    if (!key_frame) {
      ASSERT_GE(bits_in_buffer_model_, 0)
          << "Buffer Underrun at frame " << pkt->data.frame.pts;
    }
    const int64_t frame_size_in_bits = pkt->data.frame.sz * 8;
    // Subtract from the buffer the bits associated with a played back frame.
    bits_in_buffer_model_ -= frame_size_in_bits;
    // Update the running total of bits for end of test datarate checks.
    bits_total_ += frame_size_in_bits;
    // If first drop not set and we have a drop set it to this time.
    if (!first_drop_ && duration > 1) first_drop_ = last_pts_ + 1;
    // Update the most recent pts.
    last_pts_ = pkt->data.frame.pts;
    // We update this so that we can calculate the datarate minus the last
    // frame encoded in the file.
    bits_in_last_frame_ = frame_size_in_bits;
    ++frame_number_;
  }
  virtual void EndPassHook(void) {
    if (bits_total_) {
      const double file_size_in_kb = bits_total_ / 1000.;  // bits per kilobit
      duration_ = (last_pts_ + 1) * timebase_;
      // Effective file datarate includes the time spent prebuffering.
      effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0 /
                            (cfg_.rc_buf_initial_sz / 1000.0 + duration_);
      file_datarate_ = file_size_in_kb / duration_;
    }
  }
  vpx_codec_pts_t last_pts_;
  int64_t bits_in_buffer_model_;
  double timebase_;
  int frame_number_;
  vpx_codec_pts_t first_drop_;
  int64_t bits_total_;
  double duration_;
  double file_datarate_;
  double effective_datarate_;
  int64_t bits_in_last_frame_;
  int denoiser_on_;
  int denoiser_offon_test_;
  int denoiser_offon_period_;
  int set_cpu_used_;
  int gf_boost_;
  bool use_roi_;
  vpx_roi_map_t roi_;
 };
 #if CONFIG_TEMPORAL_DENOISING
 // Check basic datarate targeting, for a single bitrate, but loop over the
 // various denoiser settings.
 TEST_P(DatarateTestLarge, DenoiserLevels) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 140);
  for (int j = 1; j < 5; ++j) {
    // Run over the denoiser levels.
    // For the temporal denoiser (#if CONFIG_TEMPORAL_DENOISING) the level j
    // refers to the 4 denoiser modes: denoiserYonly, denoiserOnYUV,
    // denoiserOnAggressive, and denoiserOnAdaptive.
    denoiser_on_ = j;
    cfg_.rc_target_bitrate = 300;
    ResetModel();
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
        << " The datarate for the file exceeds the target!";
    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
        << " The datarate for the file missed the target!";
  }
 }
 // Check basic datarate targeting, for a single bitrate, when denoiser is off
 // and on.
 TEST_P(DatarateTestLarge, DenoiserOffOn) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 299);
  cfg_.rc_target_bitrate = 300;
  ResetModel();
  // The denoiser is off by default.
  denoiser_on_ = 0;
  // Set the offon test flag.
  denoiser_offon_test_ = 1;
  denoiser_offon_period_ = 100;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
      << " The datarate for the file exceeds the target!";
  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
      << " The datarate for the file missed the target!";
 }
 #endif  // CONFIG_TEMPORAL_DENOISING
 TEST_P(DatarateTestLarge, BasicBufferModel) {
  denoiser_on_ = 0;
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  // 2 pass cbr datarate control has a bug hidden by the small # of
  // frames selected in this encode. The problem is that even if the buffer is
  // negative we produce a keyframe on a cutscene. Ignoring datarate
  // constraints
  // TODO(jimbankoski): ( Fix when issue
  // http://code.google.com/p/webm/issues/detail?id=495 is addressed. )
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 140);
  // There is an issue for low bitrates in real-time mode, where the
  // effective_datarate slightly overshoots the target bitrate.
  // This is same the issue as noted about (#495).
  // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100),
  // when the issue is resolved.
  for (int i = 100; i < 800; i += 200) {
    cfg_.rc_target_bitrate = i;
    ResetModel();
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
        << " The datarate for the file exceeds the target!";
    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
        << " The datarate for the file missed the target!";
  }
 }
 TEST_P(DatarateTestLarge, ChangingDropFrameThresh) {
  denoiser_on_ = 0;
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_max_quantizer = 36;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.rc_target_bitrate = 200;
  cfg_.kf_mode = VPX_KF_DISABLED;
  const int frame_count = 40;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, frame_count);
  // Here we check that the first dropped frame gets earlier and earlier
  // as the drop frame threshold is increased.
  const int kDropFrameThreshTestStep = 30;
  vpx_codec_pts_t last_drop = frame_count;
  for (int i = 1; i < 91; i += kDropFrameThreshTestStep) {
    cfg_.rc_dropframe_thresh = i;
    ResetModel();
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    ASSERT_LE(first_drop_, last_drop)
        << " The first dropped frame for drop_thresh " << i
        << " > first dropped frame for drop_thresh "
        << i - kDropFrameThreshTestStep;
    last_drop = first_drop_;
  }
 }
 TEST_P(DatarateTestLarge, DropFramesMultiThreads) {
  denoiser_on_ = 0;
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_dropframe_thresh = 30;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_threads = 2;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 140);
  cfg_.rc_target_bitrate = 200;
  ResetModel();
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
      << " The datarate for the file exceeds the target!";
  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
      << " The datarate for the file missed the target!";
 }
 class DatarateTestRealTime : public DatarateTestLarge {
 public:
  virtual ~DatarateTestRealTime() {}
 };
 #if CONFIG_TEMPORAL_DENOISING
 // Check basic datarate targeting, for a single bitrate, but loop over the
 // various denoiser settings.
 TEST_P(DatarateTestRealTime, DenoiserLevels) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 140);
  for (int j = 1; j < 5; ++j) {
    // Run over the denoiser levels.
    // For the temporal denoiser (#if CONFIG_TEMPORAL_DENOISING) the level j
    // refers to the 4 denoiser modes: denoiserYonly, denoiserOnYUV,
    // denoiserOnAggressive, and denoiserOnAdaptive.
    denoiser_on_ = j;
    cfg_.rc_target_bitrate = 300;
    ResetModel();
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
        << " The datarate for the file exceeds the target!";
    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
        << " The datarate for the file missed the target!";
  }
 }
 // Check basic datarate targeting, for a single bitrate, when denoiser is off
 // and on.
 TEST_P(DatarateTestRealTime, DenoiserOffOn) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 299);
  cfg_.rc_target_bitrate = 300;
  ResetModel();
  // The denoiser is off by default.
  denoiser_on_ = 0;
  // Set the offon test flag.
  denoiser_offon_test_ = 1;
  denoiser_offon_period_ = 100;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
      << " The datarate for the file exceeds the target!";
  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
      << " The datarate for the file missed the target!";
 }
 #endif  // CONFIG_TEMPORAL_DENOISING
 TEST_P(DatarateTestRealTime, BasicBufferModel) {
  denoiser_on_ = 0;
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  // 2 pass cbr datarate control has a bug hidden by the small # of
  // frames selected in this encode. The problem is that even if the buffer is
  // negative we produce a keyframe on a cutscene, ignoring datarate
  // constraints
  // TODO(jimbankoski): Fix when issue
  // http://bugs.chromium.org/p/webm/issues/detail?id=495 is addressed.
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 140);
  // There is an issue for low bitrates in real-time mode, where the
  // effective_datarate slightly overshoots the target bitrate.
  // This is same the issue as noted above (#495).
  // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100),
  // when the issue is resolved.
  for (int i = 100; i <= 700; i += 200) {
    cfg_.rc_target_bitrate = i;
    ResetModel();
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
        << " The datarate for the file exceeds the target!";
    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
        << " The datarate for the file missed the target!";
  }
 }
 TEST_P(DatarateTestRealTime, ChangingDropFrameThresh) {
  denoiser_on_ = 0;
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_max_quantizer = 36;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.rc_target_bitrate = 200;
  cfg_.kf_mode = VPX_KF_DISABLED;
  const int frame_count = 40;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, frame_count);
  // Check that the first dropped frame gets earlier and earlier
  // as the drop frame threshold is increased.
  const int kDropFrameThreshTestStep = 30;
  vpx_codec_pts_t last_drop = frame_count;
  for (int i = 1; i < 91; i += kDropFrameThreshTestStep) {
    cfg_.rc_dropframe_thresh = i;
    ResetModel();
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    ASSERT_LE(first_drop_, last_drop)
        << " The first dropped frame for drop_thresh " << i
        << " > first dropped frame for drop_thresh "
        << i - kDropFrameThreshTestStep;
    last_drop = first_drop_;
  }
 }
 TEST_P(DatarateTestRealTime, DropFramesMultiThreads) {
  denoiser_on_ = 0;
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_dropframe_thresh = 30;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  // Encode using multiple threads.
  cfg_.g_threads = 2;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 140);
  cfg_.rc_target_bitrate = 200;
  ResetModel();
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
      << " The datarate for the file exceeds the target!";
  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
      << " The datarate for the file missed the target!";
 }
 TEST_P(DatarateTestRealTime, RegionOfInterest) {
  denoiser_on_ = 0;
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_dropframe_thresh = 0;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  // Encode using multiple threads.
  cfg_.g_threads = 2;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 300);
  cfg_.rc_target_bitrate = 450;
  cfg_.g_w = 352;
  cfg_.g_h = 288;
  ResetModel();
  // Set ROI parameters
  use_roi_ = true;
  memset(&roi_, 0, sizeof(roi_));
  roi_.rows = (cfg_.g_h + 15) / 16;
  roi_.cols = (cfg_.g_w + 15) / 16;
  roi_.delta_q[0] = 0;
  roi_.delta_q[1] = -20;
  roi_.delta_q[2] = 0;
  roi_.delta_q[3] = 0;
  roi_.delta_lf[0] = 0;
  roi_.delta_lf[1] = -20;
  roi_.delta_lf[2] = 0;
  roi_.delta_lf[3] = 0;
  roi_.static_threshold[0] = 0;
  roi_.static_threshold[1] = 1000;
  roi_.static_threshold[2] = 0;
  roi_.static_threshold[3] = 0;
  // Use 2 states: 1 is center square, 0 is the rest.
  roi_.roi_map =
      (uint8_t *)calloc(roi_.rows * roi_.cols, sizeof(*roi_.roi_map));
  for (unsigned int i = 0; i < roi_.rows; ++i) {
    for (unsigned int j = 0; j < roi_.cols; ++j) {
      if (i > (roi_.rows >> 2) && i < ((roi_.rows * 3) >> 2) &&
          j > (roi_.cols >> 2) && j < ((roi_.cols * 3) >> 2)) {
        roi_.roi_map[i * roi_.cols + j] = 1;
      }
    }
  }
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
      << " The datarate for the file exceeds the target!";
  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
      << " The datarate for the file missed the target!";
  free(roi_.roi_map);
 }
 TEST_P(DatarateTestRealTime, GFBoost) {
  denoiser_on_ = 0;
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_dropframe_thresh = 0;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_error_resilient = 0;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 300);
  cfg_.rc_target_bitrate = 300;
  ResetModel();
  // Apply a gf boost.
  gf_boost_ = 50;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
      << " The datarate for the file exceeds the target!";
  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
      << " The datarate for the file missed the target!";
 }
 VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES,
                          ::testing::Values(0));
 VP8_INSTANTIATE_TEST_CASE(DatarateTestRealTime,
                          ::testing::Values(::libvpx_test::kRealTime),
                          ::testing::Values(-6, -12));
 }  // namespace
--- a/test/vp8_fdct4x4_test.cc
+++ b/test/vp8_fdct4x4_test.cc
@ -17,16 +17,12 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vpx_config.h"
 #include "./vp8_rtcd.h"
 #include "test/acm_random.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_ports/mem.h"
 namespace {
 typedef void (*FdctFunc)(int16_t *a, int16_t *b, int a_stride);
 const int cospi8sqrt2minus1 = 20091;
 const int sinpi8sqrt2 = 35468;
@ -72,21 +68,10 @@ void reference_idct4x4(const int16_t *input, int16_t *output) {
 using libvpx_test::ACMRandom;
-class FdctTest : public ::testing::TestWithParam<FdctFunc> {
+TEST(VP8FdctTest, SignBiasCheck) {
- public:
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
  virtual void SetUp() {
    fdct_func_ = GetParam();
    rnd_.Reset(ACMRandom::DeterministicSeed());
  }
 protected:
  FdctFunc fdct_func_;
  ACMRandom rnd_;
 };
 TEST_P(FdctTest, SignBiasCheck) {
  int16_t test_input_block[16];
-  DECLARE_ALIGNED(16, int16_t, test_output_block[16]);
+  int16_t test_output_block[16];
  const int pitch = 8;
  int count_sign_block[16][2];
  const int count_test_block = 1000000;
@ -96,10 +81,10 @@ TEST_P(FdctTest, SignBiasCheck) {
  for (int i = 0; i < count_test_block; ++i) {
    // Initialize a test block with input range [-255, 255].
    for (int j = 0; j < 16; ++j) {
-      test_input_block[j] = rnd_.Rand8() - rnd_.Rand8();
+      test_input_block[j] = rnd.Rand8() - rnd.Rand8();
    }
-    fdct_func_(test_input_block, test_output_block, pitch);
+    vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
    for (int j = 0; j < 16; ++j) {
      if (test_output_block[j] < 0) {
@ -125,10 +110,10 @@ TEST_P(FdctTest, SignBiasCheck) {
  for (int i = 0; i < count_test_block; ++i) {
    // Initialize a test block with input range [-15, 15].
    for (int j = 0; j < 16; ++j) {
-      test_input_block[j] = (rnd_.Rand8() >> 4) - (rnd_.Rand8() >> 4);
+      test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
    }
-    fdct_func_(test_input_block, test_output_block, pitch);
+    vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
    for (int j = 0; j < 16; ++j) {
      if (test_output_block[j] < 0) {
@ -150,22 +135,23 @@ TEST_P(FdctTest, SignBiasCheck) {
      << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]";
 };
-TEST_P(FdctTest, RoundTripErrorCheck) {
+TEST(VP8FdctTest, RoundTripErrorCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  int max_error = 0;
  double total_error = 0;
  const int count_test_block = 1000000;
  for (int i = 0; i < count_test_block; ++i) {
    int16_t test_input_block[16];
    int16_t test_temp_block[16];
    int16_t test_output_block[16];
    DECLARE_ALIGNED(16, int16_t, test_temp_block[16]);
    // Initialize a test block with input range [-255, 255].
    for (int j = 0; j < 16; ++j) {
-      test_input_block[j] = rnd_.Rand8() - rnd_.Rand8();
+      test_input_block[j] = rnd.Rand8() - rnd.Rand8();
    }
    const int pitch = 8;
-    fdct_func_(test_input_block, test_temp_block, pitch);
+    vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch);
    reference_idct4x4(test_temp_block, test_output_block);
    for (int j = 0; j < 16; ++j) {
@ -183,24 +169,4 @@ TEST_P(FdctTest, RoundTripErrorCheck) {
      << "Error: FDCT/IDCT has average roundtrip error > 1 per block";
 };
 INSTANTIATE_TEST_CASE_P(C, FdctTest, ::testing::Values(vp8_short_fdct4x4_c));
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(NEON, FdctTest,
                        ::testing::Values(vp8_short_fdct4x4_neon));
 #endif  // HAVE_NEON
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(SSE2, FdctTest,
                        ::testing::Values(vp8_short_fdct4x4_sse2));
 #endif  // HAVE_SSE2
 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(MSA, FdctTest,
                        ::testing::Values(vp8_short_fdct4x4_msa));
 #endif  // HAVE_MSA
 #if HAVE_MMI
 INSTANTIATE_TEST_CASE_P(MMI, FdctTest,
                        ::testing::Values(vp8_short_fdct4x4_mmi));
 #endif  // HAVE_MMI
 }  // namespace
--- a/test/vp9_datarate_test.cc
+++ b/test/vp9_datarate_test.cc
@ -1,839 +0,0 @@
 /*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include "./vpx_config.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
 #include "test/i420_video_source.h"
 #include "test/util.h"
 #include "test/y4m_video_source.h"
 #include "vpx/vpx_codec.h"
 #include "vpx_ports/bitops.h"
 namespace {
 class DatarateTestVP9 : public ::libvpx_test::EncoderTest {
 public:
  explicit DatarateTestVP9(const ::libvpx_test::CodecFactory *codec)
      : EncoderTest(codec) {}
 protected:
  virtual ~DatarateTestVP9() {}
  virtual void ResetModel() {
    last_pts_ = 0;
    bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
    frame_number_ = 0;
    tot_frame_number_ = 0;
    first_drop_ = 0;
    num_drops_ = 0;
    // Denoiser is off by default.
    denoiser_on_ = 0;
    // For testing up to 3 layers.
    for (int i = 0; i < 3; ++i) {
      bits_total_[i] = 0;
    }
    denoiser_offon_test_ = 0;
    denoiser_offon_period_ = -1;
    frame_parallel_decoding_mode_ = 1;
    use_roi_ = false;
  }
  //
  // Frame flags and layer id for temporal layers.
  //
  // For two layers, test pattern is:
  //   1     3
  // 0    2     .....
  // For three layers, test pattern is:
  //   1      3    5      7
  //      2           6
  // 0          4            ....
  // LAST is always update on base/layer 0, GOLDEN is updated on layer 1.
  // For this 3 layer example, the 2nd enhancement layer (layer 2) updates
  // the altref frame.
  static int GetFrameFlags(int frame_num, int num_temp_layers) {
    int frame_flags = 0;
    if (num_temp_layers == 2) {
      if (frame_num % 2 == 0) {
        // Layer 0: predict from L and ARF, update L.
        frame_flags =
            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
      } else {
        // Layer 1: predict from L, G and ARF, and update G.
        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
                      VP8_EFLAG_NO_UPD_ENTROPY;
      }
    } else if (num_temp_layers == 3) {
      if (frame_num % 4 == 0) {
        // Layer 0: predict from L and ARF; update L.
        frame_flags =
            VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF;
      } else if ((frame_num - 2) % 4 == 0) {
        // Layer 1: predict from L, G, ARF; update G.
        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
      } else if ((frame_num - 1) % 2 == 0) {
        // Layer 2: predict from L, G, ARF; update ARF.
        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
      }
    }
    return frame_flags;
  }
  static int SetLayerId(int frame_num, int num_temp_layers) {
    int layer_id = 0;
    if (num_temp_layers == 2) {
      if (frame_num % 2 == 0) {
        layer_id = 0;
      } else {
        layer_id = 1;
      }
    } else if (num_temp_layers == 3) {
      if (frame_num % 4 == 0) {
        layer_id = 0;
      } else if ((frame_num - 2) % 4 == 0) {
        layer_id = 1;
      } else if ((frame_num - 1) % 2 == 0) {
        layer_id = 2;
      }
    }
    return layer_id;
  }
  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
    if (video->frame() == 0) encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
    if (denoiser_offon_test_) {
      ASSERT_GT(denoiser_offon_period_, 0)
          << "denoiser_offon_period_ is not positive.";
      if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
        // Flip denoiser_on_ periodically
        denoiser_on_ ^= 1;
      }
    }
    encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_);
    encoder->Control(VP9E_SET_TILE_COLUMNS, get_msb(cfg_.g_threads));
    encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING,
                     frame_parallel_decoding_mode_);
    if (use_roi_) {
      encoder->Control(VP9E_SET_ROI_MAP, &roi_);
    }
    if (cfg_.ts_number_layers > 1) {
      if (video->frame() == 0) {
        encoder->Control(VP9E_SET_SVC, 1);
      }
      vpx_svc_layer_id_t layer_id;
      layer_id.spatial_layer_id = 0;
      frame_flags_ = GetFrameFlags(video->frame(), cfg_.ts_number_layers);
      layer_id.temporal_layer_id =
          SetLayerId(video->frame(), cfg_.ts_number_layers);
      encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
    }
    const vpx_rational_t tb = video->timebase();
    timebase_ = static_cast<double>(tb.num) / tb.den;
    duration_ = 0;
  }
  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
    // Time since last timestamp = duration.
    vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
    if (duration > 1) {
      // If first drop not set and we have a drop set it to this time.
      if (!first_drop_) first_drop_ = last_pts_ + 1;
      // Update the number of frame drops.
      num_drops_ += static_cast<int>(duration - 1);
      // Update counter for total number of frames (#frames input to encoder).
      // Needed for setting the proper layer_id below.
      tot_frame_number_ += static_cast<int>(duration - 1);
    }
    int layer = SetLayerId(tot_frame_number_, cfg_.ts_number_layers);
    // Add to the buffer the bits we'd expect from a constant bitrate server.
    bits_in_buffer_model_ += static_cast<int64_t>(
        duration * timebase_ * cfg_.rc_target_bitrate * 1000);
    // Buffer should not go negative.
    ASSERT_GE(bits_in_buffer_model_, 0)
        << "Buffer Underrun at frame " << pkt->data.frame.pts;
    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
    // Update the total encoded bits. For temporal layers, update the cumulative
    // encoded bits per layer.
    for (int i = layer; i < static_cast<int>(cfg_.ts_number_layers); ++i) {
      bits_total_[i] += frame_size_in_bits;
    }
    // Update the most recent pts.
    last_pts_ = pkt->data.frame.pts;
    ++frame_number_;
    ++tot_frame_number_;
  }
  virtual void EndPassHook(void) {
    for (int layer = 0; layer < static_cast<int>(cfg_.ts_number_layers);
         ++layer) {
      duration_ = (last_pts_ + 1) * timebase_;
      if (bits_total_[layer]) {
        // Effective file datarate:
        effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_;
      }
    }
  }
  vpx_codec_pts_t last_pts_;
  double timebase_;
  int frame_number_;      // Counter for number of non-dropped/encoded frames.
  int tot_frame_number_;  // Counter for total number of input frames.
  int64_t bits_total_[3];
  double duration_;
  double effective_datarate_[3];
  int set_cpu_used_;
  int64_t bits_in_buffer_model_;
  vpx_codec_pts_t first_drop_;
  int num_drops_;
  int denoiser_on_;
  int denoiser_offon_test_;
  int denoiser_offon_period_;
  int frame_parallel_decoding_mode_;
  bool use_roi_;
  vpx_roi_map_t roi_;
 };
 // Params: test mode, speed setting and index for bitrate array.
 class DatarateTestVP9Large
    : public DatarateTestVP9,
      public ::libvpx_test::CodecTestWith3Params<libvpx_test::TestMode, int,
                                                 int> {
 public:
  DatarateTestVP9Large() : DatarateTestVP9(GET_PARAM(0)) {}
 protected:
  virtual void SetUp() {
    InitializeConfig();
    SetMode(GET_PARAM(1));
    set_cpu_used_ = GET_PARAM(2);
    ResetModel();
  }
 };
 // Params: test mode, speed setting.
 class DatarateTestVP9LargeOneBR
    : public DatarateTestVP9,
      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
 public:
  DatarateTestVP9LargeOneBR() : DatarateTestVP9(GET_PARAM(0)) {}
 protected:
  virtual void SetUp() {
    InitializeConfig();
    SetMode(GET_PARAM(1));
    set_cpu_used_ = GET_PARAM(2);
    ResetModel();
  }
 };
 // Check basic rate targeting for VBR mode with 0 lag.
 TEST_P(DatarateTestVP9Large, BasicRateTargetingVBRLagZero) {
  cfg_.rc_min_quantizer = 0;
  cfg_.rc_max_quantizer = 63;
  cfg_.g_error_resilient = 0;
  cfg_.rc_end_usage = VPX_VBR;
  cfg_.g_lag_in_frames = 0;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 300);
  const int bitrates[2] = { 400, 800 };
  const int bitrate_index = GET_PARAM(3);
  if (bitrate_index > 1) return;
  cfg_.rc_target_bitrate = bitrates[bitrate_index];
  ResetModel();
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75)
      << " The datarate for the file is lower than target by too much!";
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.35)
      << " The datarate for the file is greater than target by too much!";
 }
 // Check basic rate targeting for VBR mode with non-zero lag.
 TEST_P(DatarateTestVP9Large, BasicRateTargetingVBRLagNonZero) {
  cfg_.rc_min_quantizer = 0;
  cfg_.rc_max_quantizer = 63;
  cfg_.g_error_resilient = 0;
  cfg_.rc_end_usage = VPX_VBR;
  // For non-zero lag, rate control will work (be within bounds) for
  // real-time mode.
  if (deadline_ == VPX_DL_REALTIME) {
    cfg_.g_lag_in_frames = 15;
  } else {
    cfg_.g_lag_in_frames = 0;
  }
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 300);
  const int bitrates[2] = { 400, 800 };
  const int bitrate_index = GET_PARAM(3);
  if (bitrate_index > 1) return;
  cfg_.rc_target_bitrate = bitrates[bitrate_index];
  ResetModel();
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75)
      << " The datarate for the file is lower than target by too much!";
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.30)
      << " The datarate for the file is greater than target by too much!";
 }
 // Check basic rate targeting for VBR mode with non-zero lag, with
 // frame_parallel_decoding_mode off. This enables the adapt_coeff/mode/mv probs
 // since error_resilience is off.
 TEST_P(DatarateTestVP9Large, BasicRateTargetingVBRLagNonZeroFrameParDecOff) {
  cfg_.rc_min_quantizer = 0;
  cfg_.rc_max_quantizer = 63;
  cfg_.g_error_resilient = 0;
  cfg_.rc_end_usage = VPX_VBR;
  // For non-zero lag, rate control will work (be within bounds) for
  // real-time mode.
  if (deadline_ == VPX_DL_REALTIME) {
    cfg_.g_lag_in_frames = 15;
  } else {
    cfg_.g_lag_in_frames = 0;
  }
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 300);
  const int bitrates[2] = { 400, 800 };
  const int bitrate_index = GET_PARAM(3);
  if (bitrate_index > 1) return;
  cfg_.rc_target_bitrate = bitrates[bitrate_index];
  ResetModel();
  frame_parallel_decoding_mode_ = 0;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75)
      << " The datarate for the file is lower than target by too much!";
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.35)
      << " The datarate for the file is greater than target by too much!";
 }
 // Check basic rate targeting for CBR mode.
 TEST_P(DatarateTestVP9Large, BasicRateTargeting) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_min_quantizer = 0;
  cfg_.rc_max_quantizer = 63;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_lag_in_frames = 0;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 140);
  const int bitrates[4] = { 150, 350, 550, 750 };
  const int bitrate_index = GET_PARAM(3);
  cfg_.rc_target_bitrate = bitrates[bitrate_index];
  ResetModel();
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
      << " The datarate for the file is lower than target by too much!";
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
      << " The datarate for the file is greater than target by too much!";
 }
 // Check basic rate targeting for CBR mode, with frame_parallel_decoding_mode
 // off( and error_resilience off).
 TEST_P(DatarateTestVP9Large, BasicRateTargetingFrameParDecOff) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_min_quantizer = 0;
  cfg_.rc_max_quantizer = 63;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_lag_in_frames = 0;
  cfg_.g_error_resilient = 0;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 140);
  const int bitrates[4] = { 150, 350, 550, 750 };
  const int bitrate_index = GET_PARAM(3);
  cfg_.rc_target_bitrate = bitrates[bitrate_index];
  ResetModel();
  frame_parallel_decoding_mode_ = 0;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
      << " The datarate for the file is lower than target by too much!";
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
      << " The datarate for the file is greater than target by too much!";
 }
 // Check basic rate targeting for CBR mode, with 2 threads and dropped frames.
 TEST_P(DatarateTestVP9LargeOneBR, BasicRateTargetingDropFramesMultiThreads) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  cfg_.rc_dropframe_thresh = 30;
  cfg_.rc_min_quantizer = 0;
  cfg_.rc_max_quantizer = 63;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_lag_in_frames = 0;
  // Encode using multiple threads.
  cfg_.g_threads = 2;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 140);
  cfg_.rc_target_bitrate = 200;
  ResetModel();
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
      << " The datarate for the file is lower than target by too much!";
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
      << " The datarate for the file is greater than target by too much!";
 }
 // Check basic rate targeting for CBR.
 TEST_P(DatarateTestVP9Large, BasicRateTargeting444) {
  ::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
  cfg_.g_profile = 1;
  cfg_.g_timebase = video.timebase();
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_min_quantizer = 0;
  cfg_.rc_max_quantizer = 63;
  cfg_.rc_end_usage = VPX_CBR;
  const int bitrates[4] = { 250, 450, 650, 850 };
  const int bitrate_index = GET_PARAM(3);
  cfg_.rc_target_bitrate = bitrates[bitrate_index];
  ResetModel();
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate),
            effective_datarate_[0] * 0.80)
      << " The datarate for the file exceeds the target by too much!";
  ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate),
            effective_datarate_[0] * 1.15)
      << " The datarate for the file missed the target!"
      << cfg_.rc_target_bitrate << " " << effective_datarate_;
 }
 // Check that (1) the first dropped frame gets earlier and earlier
 // as the drop frame threshold is increased, and (2) that the total number of
 // frame drops does not decrease as we increase frame drop threshold.
 // Use a lower qp-max to force some frame drops.
 TEST_P(DatarateTestVP9Large, ChangingDropFrameThresh) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  cfg_.rc_undershoot_pct = 20;
  cfg_.rc_undershoot_pct = 20;
  cfg_.rc_dropframe_thresh = 10;
  cfg_.rc_min_quantizer = 0;
  cfg_.rc_max_quantizer = 50;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.rc_target_bitrate = 200;
  cfg_.g_lag_in_frames = 0;
  // TODO(marpan): Investigate datarate target failures with a smaller keyframe
  // interval (128).
  cfg_.kf_max_dist = 9999;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 140);
  const int kDropFrameThreshTestStep = 30;
  const int bitrates[2] = { 50, 150 };
  const int bitrate_index = GET_PARAM(3);
  if (bitrate_index > 1) return;
  cfg_.rc_target_bitrate = bitrates[bitrate_index];
  vpx_codec_pts_t last_drop = 140;
  int last_num_drops = 0;
  for (int i = 10; i < 100; i += kDropFrameThreshTestStep) {
    cfg_.rc_dropframe_thresh = i;
    ResetModel();
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
        << " The datarate for the file is lower than target by too much!";
    ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.25)
        << " The datarate for the file is greater than target by too much!";
    ASSERT_LE(first_drop_, last_drop)
        << " The first dropped frame for drop_thresh " << i
        << " > first dropped frame for drop_thresh "
        << i - kDropFrameThreshTestStep;
    ASSERT_GE(num_drops_, last_num_drops * 0.85)
        << " The number of dropped frames for drop_thresh " << i
        << " < number of dropped frames for drop_thresh "
        << i - kDropFrameThreshTestStep;
    last_drop = first_drop_;
    last_num_drops = num_drops_;
  }
 }  // namespace
 // Check basic rate targeting for 2 temporal layers.
 TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_min_quantizer = 0;
  cfg_.rc_max_quantizer = 63;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_lag_in_frames = 0;
  // 2 Temporal layers, no spatial layers: Framerate decimation (2, 1).
  cfg_.ss_number_layers = 1;
  cfg_.ts_number_layers = 2;
  cfg_.ts_rate_decimator[0] = 2;
  cfg_.ts_rate_decimator[1] = 1;
  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
  if (deadline_ == VPX_DL_REALTIME) cfg_.g_error_resilient = 1;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 200);
  const int bitrates[4] = { 200, 400, 600, 800 };
  const int bitrate_index = GET_PARAM(3);
  cfg_.rc_target_bitrate = bitrates[bitrate_index];
  ResetModel();
  // 60-40 bitrate allocation for 2 temporal layers.
  cfg_.layer_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
  cfg_.layer_target_bitrate[1] = cfg_.rc_target_bitrate;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
    ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
        << " The datarate for the file is lower than target by too much, "
           "for layer: "
        << j;
    ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
        << " The datarate for the file is greater than target by too much, "
           "for layer: "
        << j;
  }
 }
 // Check basic rate targeting for 3 temporal layers.
 TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayers) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_min_quantizer = 0;
  cfg_.rc_max_quantizer = 63;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_lag_in_frames = 0;
  // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1).
  cfg_.ss_number_layers = 1;
  cfg_.ts_number_layers = 3;
  cfg_.ts_rate_decimator[0] = 4;
  cfg_.ts_rate_decimator[1] = 2;
  cfg_.ts_rate_decimator[2] = 1;
  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 200);
  const int bitrates[4] = { 200, 400, 600, 800 };
  const int bitrate_index = GET_PARAM(3);
  cfg_.rc_target_bitrate = bitrates[bitrate_index];
  ResetModel();
  // 40-20-40 bitrate allocation for 3 temporal layers.
  cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
  cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
  cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
    // TODO(yaowu): Work out more stable rc control strategy and
    //              Adjust the thresholds to be tighter than .75.
    ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.75)
        << " The datarate for the file is lower than target by too much, "
           "for layer: "
        << j;
    // TODO(yaowu): Work out more stable rc control strategy and
    //              Adjust the thresholds to be tighter than 1.25.
    ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.25)
        << " The datarate for the file is greater than target by too much, "
           "for layer: "
        << j;
  }
 }
 // Check basic rate targeting for 3 temporal layers, with frame dropping.
 // Only for one (low) bitrate with lower max_quantizer, and somewhat higher
 // frame drop threshold, to force frame dropping.
 TEST_P(DatarateTestVP9LargeOneBR,
       BasicRateTargeting3TemporalLayersFrameDropping) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  // Set frame drop threshold and rc_max_quantizer to force some frame drops.
  cfg_.rc_dropframe_thresh = 20;
  cfg_.rc_max_quantizer = 45;
  cfg_.rc_min_quantizer = 0;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_lag_in_frames = 0;
  // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1).
  cfg_.ss_number_layers = 1;
  cfg_.ts_number_layers = 3;
  cfg_.ts_rate_decimator[0] = 4;
  cfg_.ts_rate_decimator[1] = 2;
  cfg_.ts_rate_decimator[2] = 1;
  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 200);
  cfg_.rc_target_bitrate = 200;
  ResetModel();
  // 40-20-40 bitrate allocation for 3 temporal layers.
  cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
  cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
  cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
    ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
        << " The datarate for the file is lower than target by too much, "
           "for layer: "
        << j;
    ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
        << " The datarate for the file is greater than target by too much, "
           "for layer: "
        << j;
    // Expect some frame drops in this test: for this 200 frames test,
    // expect at least 10% and not more than 60% drops.
    ASSERT_GE(num_drops_, 20);
    ASSERT_LE(num_drops_, 130);
  }
 }
 // Params: speed setting.
 class DatarateTestVP9RealTime : public DatarateTestVP9,
                                public ::libvpx_test::CodecTestWithParam<int> {
 public:
  DatarateTestVP9RealTime() : DatarateTestVP9(GET_PARAM(0)) {}
  virtual ~DatarateTestVP9RealTime() {}
 protected:
  virtual void SetUp() {
    InitializeConfig();
    SetMode(::libvpx_test::kRealTime);
    set_cpu_used_ = GET_PARAM(1);
    ResetModel();
  }
 };
 // Check VP9 region of interest feature.
 TEST_P(DatarateTestVP9RealTime, RegionOfInterest) {
  if (deadline_ != VPX_DL_REALTIME || set_cpu_used_ < 5) return;
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  cfg_.rc_dropframe_thresh = 0;
  cfg_.rc_min_quantizer = 0;
  cfg_.rc_max_quantizer = 63;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_lag_in_frames = 0;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 300);
  cfg_.rc_target_bitrate = 450;
  cfg_.g_w = 352;
  cfg_.g_h = 288;
  ResetModel();
  // Set ROI parameters
  use_roi_ = true;
  memset(&roi_, 0, sizeof(roi_));
  roi_.rows = (cfg_.g_h + 7) / 8;
  roi_.cols = (cfg_.g_w + 7) / 8;
  roi_.delta_q[1] = -20;
  roi_.delta_lf[1] = -20;
  memset(roi_.ref_frame, -1, sizeof(roi_.ref_frame));
  roi_.ref_frame[1] = 1;
  // Use 2 states: 1 is center square, 0 is the rest.
  roi_.roi_map = reinterpret_cast<uint8_t *>(
      calloc(roi_.rows * roi_.cols, sizeof(*roi_.roi_map)));
  ASSERT_TRUE(roi_.roi_map != NULL);
  for (unsigned int i = 0; i < roi_.rows; ++i) {
    for (unsigned int j = 0; j < roi_.cols; ++j) {
      if (i > (roi_.rows >> 2) && i < ((roi_.rows * 3) >> 2) &&
          j > (roi_.cols >> 2) && j < ((roi_.cols * 3) >> 2)) {
        roi_.roi_map[i * roi_.cols + j] = 1;
      }
    }
  }
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_[0] * 0.90)
      << " The datarate for the file exceeds the target!";
  ASSERT_LE(cfg_.rc_target_bitrate, effective_datarate_[0] * 1.4)
      << " The datarate for the file missed the target!";
  free(roi_.roi_map);
 }
 #if CONFIG_VP9_TEMPORAL_DENOISING
 // Params: speed setting.
 class DatarateTestVP9LargeDenoiser : public DatarateTestVP9RealTime {
 public:
  virtual ~DatarateTestVP9LargeDenoiser() {}
 };
 // Check basic datarate targeting, for a single bitrate, when denoiser is on.
 TEST_P(DatarateTestVP9LargeDenoiser, LowNoise) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_min_quantizer = 2;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_lag_in_frames = 0;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 140);
  // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
  // there is only one denoiser mode: denoiserYonly(which is 1),
  // but may add more modes in the future.
  cfg_.rc_target_bitrate = 300;
  ResetModel();
  // Turn on the denoiser.
  denoiser_on_ = 1;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
      << " The datarate for the file is lower than target by too much!";
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
      << " The datarate for the file is greater than target by too much!";
 }
 // Check basic datarate targeting, for a single bitrate, when denoiser is on,
 // for clip with high noise level. Use 2 threads.
 TEST_P(DatarateTestVP9LargeDenoiser, HighNoise) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_min_quantizer = 2;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_lag_in_frames = 0;
  cfg_.g_threads = 2;
  ::libvpx_test::Y4mVideoSource video("noisy_clip_640_360.y4m", 0, 200);
  // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
  // there is only one denoiser mode: kDenoiserOnYOnly(which is 1),
  // but may add more modes in the future.
  cfg_.rc_target_bitrate = 1000;
  ResetModel();
  // Turn on the denoiser.
  denoiser_on_ = 1;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
      << " The datarate for the file is lower than target by too much!";
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
      << " The datarate for the file is greater than target by too much!";
 }
 // Check basic datarate targeting, for a single bitrate, when denoiser is on,
 // for 1280x720 clip with 4 threads.
 TEST_P(DatarateTestVP9LargeDenoiser, 4threads) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_min_quantizer = 2;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_lag_in_frames = 0;
  cfg_.g_threads = 4;
  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
  // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
  // there is only one denoiser mode: denoiserYonly(which is 1),
  // but may add more modes in the future.
  cfg_.rc_target_bitrate = 1000;
  ResetModel();
  // Turn on the denoiser.
  denoiser_on_ = 1;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
      << " The datarate for the file is lower than target by too much!";
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.29)
      << " The datarate for the file is greater than target by too much!";
 }
 // Check basic datarate targeting, for a single bitrate, when denoiser is off
 // and on.
 TEST_P(DatarateTestVP9LargeDenoiser, DenoiserOffOn) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
  cfg_.rc_dropframe_thresh = 1;
  cfg_.rc_min_quantizer = 2;
  cfg_.rc_max_quantizer = 56;
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.g_lag_in_frames = 0;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 299);
  // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
  // there is only one denoiser mode: denoiserYonly(which is 1),
  // but may add more modes in the future.
  cfg_.rc_target_bitrate = 300;
  ResetModel();
  // The denoiser is off by default.
  denoiser_on_ = 0;
  // Set the offon test flag.
  denoiser_offon_test_ = 1;
  denoiser_offon_period_ = 100;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
      << " The datarate for the file is lower than target by too much!";
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
      << " The datarate for the file is greater than target by too much!";
 }
 #endif  // CONFIG_VP9_TEMPORAL_DENOISING
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
                          ::testing::Values(::libvpx_test::kOnePassGood,
                                            ::libvpx_test::kRealTime),
                          ::testing::Range(2, 9), ::testing::Range(0, 4));
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeOneBR,
                          ::testing::Values(::libvpx_test::kOnePassGood,
                                            ::libvpx_test::kRealTime),
                          ::testing::Range(2, 9));
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime, ::testing::Range(5, 9));
 #if CONFIG_VP9_TEMPORAL_DENOISING
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser, ::testing::Range(5, 9));
 #endif
 }  // namespace
--- a/test/vp9_denoiser_sse2_test.cc
+++ b/test/vp9_denoiser_sse2_test.cc
@ -29,22 +29,11 @@ using libvpx_test::ACMRandom;
 namespace {
 const int kNumPixels = 64 * 64;
-
+class VP9DenoiserTest : public ::testing::TestWithParam<BLOCK_SIZE> {
 typedef int (*Vp9DenoiserFilterFunc)(const uint8_t *sig, int sig_stride,
                                     const uint8_t *mc_avg, int mc_avg_stride,
                                     uint8_t *avg, int avg_stride,
                                     int increase_denoising, BLOCK_SIZE bs,
                                     int motion_magnitude);
 typedef ::testing::tuple<Vp9DenoiserFilterFunc, BLOCK_SIZE>
    VP9DenoiserTestParam;
 class VP9DenoiserTest
    : public ::testing::Test,
      public ::testing::WithParamInterface<VP9DenoiserTestParam> {
 public:
  virtual ~VP9DenoiserTest() {}
-  virtual void SetUp() { bs_ = GET_PARAM(1); }
+  virtual void SetUp() { bs_ = GetParam(); }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
@ -87,9 +76,9 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {
                                                   64, avg_block_c, 64, 0, bs_,
                                                   motion_magnitude_random));
-    ASM_REGISTER_STATE_CHECK(GET_PARAM(0)(sig_block, 64, mc_avg_block, 64,
+    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_sse2(
-                                          avg_block_sse2, 64, 0, bs_,
+        sig_block, 64, mc_avg_block, 64, avg_block_sse2, 64, 0, bs_,
-                                          motion_magnitude_random));
+        motion_magnitude_random));
    // Test bitexactness.
    for (int h = 0; h < (4 << b_height_log2_lookup[bs_]); ++h) {
@ -100,36 +89,10 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {
  }
 }
 using ::testing::make_tuple;
 // Test for all block size.
-#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, VP9DenoiserTest,
-INSTANTIATE_TEST_CASE_P(
+                        ::testing::Values(BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
-    SSE2, VP9DenoiserTest,
+                                          BLOCK_16X16, BLOCK_16X32, BLOCK_32X16,
-    ::testing::Values(make_tuple(&vp9_denoiser_filter_sse2, BLOCK_8X8),
+                                          BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
-                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_8X16),
+                                          BLOCK_64X64));
                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_16X8),
                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_16X16),
                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_16X32),
                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_32X16),
                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_32X32),
                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_32X64),
                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_64X32),
                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_64X64)));
 #endif  // HAVE_SSE2
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(
    NEON, VP9DenoiserTest,
    ::testing::Values(make_tuple(&vp9_denoiser_filter_neon, BLOCK_8X8),
                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_8X16),
                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_16X8),
                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_16X16),
                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_16X32),
                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_32X16),
                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_32X32),
                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_32X64),
                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_64X32),
                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_64X64)));
 #endif
 }  // namespace
--- a/test/vp9_encoder_parms_get_to_decoder.cc
+++ b/test/vp9_encoder_parms_get_to_decoder.cc
@ -99,7 +99,9 @@ class VpxEncoderParmsGetToDecoder
    vpx_codec_ctx_t *const vp9_decoder = decoder->GetDecoder();
    vpx_codec_alg_priv_t *const priv =
        reinterpret_cast<vpx_codec_alg_priv_t *>(vp9_decoder->priv);
-    VP9_COMMON *const common = &priv->pbi->common;
+    FrameWorkerData *const worker_data =
        reinterpret_cast<FrameWorkerData *>(priv->frame_workers[0].data1);
    VP9_COMMON *const common = &worker_data->pbi->common;
    if (encode_parms.lossless) {
      EXPECT_EQ(0, common->base_qindex);
--- a/test/vp9_end_to_end_test.cc
+++ b/test/vp9_end_to_end_test.cc
@ -59,7 +59,7 @@ const TestVideoParam kTestVectors[] = {
 // Encoding modes tested
 const libvpx_test::TestMode kEncodingModeVectors[] = {
  ::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
-  ::libvpx_test::kRealTime
+  ::libvpx_test::kRealTime,
 };
 // Speed settings tested
--- a/test/vp9_error_block_test.cc
+++ b/test/vp9_error_block_test.cc
@ -23,36 +23,36 @@
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_dsp/vpx_dsp_common.h"
 using libvpx_test::ACMRandom;
 namespace {
 #if CONFIG_VP9_HIGHBITDEPTH
 const int kNumIterations = 1000;
-typedef int64_t (*HBDBlockErrorFunc)(const tran_low_t *coeff,
+typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff,
                                     const tran_low_t *dqcoeff,
                                     intptr_t block_size, int64_t *ssz,
                                     int bps);
 typedef ::testing::tuple<HBDBlockErrorFunc, HBDBlockErrorFunc, vpx_bit_depth_t>
    BlockErrorParam;
 typedef int64_t (*BlockErrorFunc)(const tran_low_t *coeff,
                                  const tran_low_t *dqcoeff,
-                                  intptr_t block_size, int64_t *ssz);
+                                  intptr_t block_size, int64_t *ssz, int bps);
-template <BlockErrorFunc fn>
+typedef std::tr1::tuple<ErrorBlockFunc, ErrorBlockFunc, vpx_bit_depth_t>
-int64_t BlockError8BitWrapper(const tran_low_t *coeff,
+    ErrorBlockParam;
-                              const tran_low_t *dqcoeff, intptr_t block_size,
+
-                              int64_t *ssz, int bps) {
+// wrapper for 8-bit block error functions without a 'bps' param.
-  EXPECT_EQ(bps, 8);
+typedef int64_t (*HighBdBlockError8bit)(const tran_low_t *coeff,
                                        const tran_low_t *dqcoeff,
                                        intptr_t block_size, int64_t *ssz);
 template <HighBdBlockError8bit fn>
 int64_t HighBdBlockError8bitWrapper(const tran_low_t *coeff,
                                    const tran_low_t *dqcoeff,
                                    intptr_t block_size, int64_t *ssz,
                                    int bps) {
  EXPECT_EQ(8, bps);
  return fn(coeff, dqcoeff, block_size, ssz);
 }
-class BlockErrorTest : public ::testing::TestWithParam<BlockErrorParam> {
+class ErrorBlockTest : public ::testing::TestWithParam<ErrorBlockParam> {
 public:
-  virtual ~BlockErrorTest() {}
+  virtual ~ErrorBlockTest() {}
  virtual void SetUp() {
    error_block_op_ = GET_PARAM(0);
    ref_error_block_op_ = GET_PARAM(1);
@ -63,11 +63,11 @@ class BlockErrorTest : public ::testing::TestWithParam<BlockErrorParam> {
 protected:
  vpx_bit_depth_t bit_depth_;
-  HBDBlockErrorFunc error_block_op_;
+  ErrorBlockFunc error_block_op_;
-  HBDBlockErrorFunc ref_error_block_op_;
+  ErrorBlockFunc ref_error_block_op_;
 };
-TEST_P(BlockErrorTest, OperationCheck) {
+TEST_P(ErrorBlockTest, OperationCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
@ -110,7 +110,7 @@ TEST_P(BlockErrorTest, OperationCheck) {
      << "First failed at test case " << first_failure;
 }
-TEST_P(BlockErrorTest, ExtremeValues) {
+TEST_P(ErrorBlockTest, ExtremeValues) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
@ -168,31 +168,32 @@ TEST_P(BlockErrorTest, ExtremeValues) {
      << "First failed at test case " << first_failure;
 }
-using ::testing::make_tuple;
+using std::tr1::make_tuple;
 #if HAVE_SSE2
-const BlockErrorParam sse2_block_error_tests[] = {
+INSTANTIATE_TEST_CASE_P(
-#if CONFIG_VP9_HIGHBITDEPTH
+    SSE2, ErrorBlockTest,
-  make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
+    ::testing::Values(
-             VPX_BITS_10),
+        make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
-  make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
+                   VPX_BITS_10),
-             VPX_BITS_12),
+        make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
-  make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
+                   VPX_BITS_12),
-             VPX_BITS_8),
+        make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+                   VPX_BITS_8),
-  make_tuple(&BlockError8BitWrapper<vp9_block_error_sse2>,
+        make_tuple(
-             &BlockError8BitWrapper<vp9_block_error_c>, VPX_BITS_8)
+            &HighBdBlockError8bitWrapper<vp9_highbd_block_error_8bit_sse2>,
-};
+            &HighBdBlockError8bitWrapper<vp9_highbd_block_error_8bit_c>,
-
+            VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(SSE2, BlockErrorTest,
                        ::testing::ValuesIn(sse2_block_error_tests));
 #endif  // HAVE_SSE2
-#if HAVE_AVX2
+#if HAVE_AVX
 INSTANTIATE_TEST_CASE_P(
-    AVX2, BlockErrorTest,
+    AVX, ErrorBlockTest,
-    ::testing::Values(make_tuple(&BlockError8BitWrapper<vp9_block_error_avx2>,
+    ::testing::Values(make_tuple(
-                                 &BlockError8BitWrapper<vp9_block_error_c>,
+        &HighBdBlockError8bitWrapper<vp9_highbd_block_error_8bit_avx>,
-                                 VPX_BITS_8)));
+        &HighBdBlockError8bitWrapper<vp9_highbd_block_error_8bit_c>,
-#endif  // HAVE_AVX2
+        VPX_BITS_8)));
 #endif  // HAVE_AVX
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 }  // namespace
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@ -16,207 +16,8 @@
 #include "test/md5_helper.h"
 #include "test/util.h"
 #include "test/y4m_video_source.h"
 #include "vp9/encoder/vp9_firstpass.h"
 namespace {
 // FIRSTPASS_STATS struct:
 // {
 //   25 double members;
 //   1 int64_t member;
 // }
 // Whenever FIRSTPASS_STATS struct is modified, the following constants need to
 // be revisited.
 const int kDbl = 25;
 const int kInt = 1;
 const size_t kFirstPassStatsSz = kDbl * sizeof(double) + kInt * sizeof(int64_t);
 class VPxFirstPassEncoderThreadTest
    : public ::libvpx_test::EncoderTest,
      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
 protected:
  VPxFirstPassEncoderThreadTest()
      : EncoderTest(GET_PARAM(0)), encoder_initialized_(false), tiles_(0),
        encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)) {
    init_flags_ = VPX_CODEC_USE_PSNR;
    row_mt_mode_ = 1;
    first_pass_only_ = true;
    firstpass_stats_.buf = NULL;
    firstpass_stats_.sz = 0;
  }
  virtual ~VPxFirstPassEncoderThreadTest() { free(firstpass_stats_.buf); }
  virtual void SetUp() {
    InitializeConfig();
    SetMode(encoding_mode_);
    cfg_.rc_end_usage = VPX_VBR;
    cfg_.rc_2pass_vbr_minsection_pct = 5;
    cfg_.rc_2pass_vbr_maxsection_pct = 2000;
    cfg_.rc_max_quantizer = 56;
    cfg_.rc_min_quantizer = 0;
  }
  virtual void BeginPassHook(unsigned int /*pass*/) {
    encoder_initialized_ = false;
    abort_ = false;
  }
  virtual void EndPassHook() {
    // For first pass stats test, only run first pass encoder.
    if (first_pass_only_ && cfg_.g_pass == VPX_RC_FIRST_PASS)
      abort_ |= first_pass_only_;
  }
  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource * /*video*/,
                                  ::libvpx_test::Encoder *encoder) {
    if (!encoder_initialized_) {
      // Encode in 2-pass mode.
      encoder->Control(VP9E_SET_TILE_COLUMNS, tiles_);
      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
      encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
      encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
      encoder->Control(VP8E_SET_ARNR_TYPE, 3);
      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0);
      if (encoding_mode_ == ::libvpx_test::kTwoPassGood)
        encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_);
      encoder_initialized_ = true;
    }
  }
  virtual void StatsPktHook(const vpx_codec_cx_pkt_t *pkt) {
    const uint8_t *const pkt_buf =
        reinterpret_cast<uint8_t *>(pkt->data.twopass_stats.buf);
    const size_t pkt_size = pkt->data.twopass_stats.sz;
    // First pass stats size equals sizeof(FIRSTPASS_STATS)
    EXPECT_EQ(pkt_size, kFirstPassStatsSz)
        << "Error: First pass stats size doesn't equal kFirstPassStatsSz";
    firstpass_stats_.buf =
        realloc(firstpass_stats_.buf, firstpass_stats_.sz + pkt_size);
    memcpy((uint8_t *)firstpass_stats_.buf + firstpass_stats_.sz, pkt_buf,
           pkt_size);
    firstpass_stats_.sz += pkt_size;
  }
  bool encoder_initialized_;
  int tiles_;
  ::libvpx_test::TestMode encoding_mode_;
  int set_cpu_used_;
  int row_mt_mode_;
  bool first_pass_only_;
  vpx_fixed_buf_t firstpass_stats_;
 };
 static void compare_fp_stats(vpx_fixed_buf_t *fp_stats, double factor) {
  // fp_stats consists of 2 set of first pass encoding stats. These 2 set of
  // stats are compared to check if the stats match or at least are very close.
  FIRSTPASS_STATS *stats1 = reinterpret_cast<FIRSTPASS_STATS *>(fp_stats->buf);
  int nframes_ = (int)(fp_stats->sz / sizeof(FIRSTPASS_STATS));
  FIRSTPASS_STATS *stats2 = stats1 + nframes_ / 2;
  int i, j;
  // The total stats are also output and included in the first pass stats. Here
  // ignore that in the comparison.
  for (i = 0; i < (nframes_ / 2 - 1); ++i) {
    const double *frame_stats1 = reinterpret_cast<double *>(stats1);
    const double *frame_stats2 = reinterpret_cast<double *>(stats2);
    for (j = 0; j < kDbl; ++j) {
      ASSERT_LE(fabs(*frame_stats1 - *frame_stats2),
                fabs(*frame_stats1) / factor)
          << "First failure @ frame #" << i << " stat #" << j << " ("
          << *frame_stats1 << " vs. " << *frame_stats2 << ")";
      frame_stats1++;
      frame_stats2++;
    }
    stats1++;
    stats2++;
  }
  // Reset firstpass_stats_ to 0.
  memset((uint8_t *)fp_stats->buf, 0, fp_stats->sz);
  fp_stats->sz = 0;
 }
 static void compare_fp_stats_md5(vpx_fixed_buf_t *fp_stats) {
  // fp_stats consists of 2 set of first pass encoding stats. These 2 set of
  // stats are compared to check if the stats match.
  uint8_t *stats1 = reinterpret_cast<uint8_t *>(fp_stats->buf);
  uint8_t *stats2 = stats1 + fp_stats->sz / 2;
  ::libvpx_test::MD5 md5_row_mt_0, md5_row_mt_1;
  md5_row_mt_0.Add(stats1, fp_stats->sz / 2);
  const char *md5_row_mt_0_str = md5_row_mt_0.Get();
  md5_row_mt_1.Add(stats2, fp_stats->sz / 2);
  const char *md5_row_mt_1_str = md5_row_mt_1.Get();
  // Check md5 match.
  ASSERT_STREQ(md5_row_mt_0_str, md5_row_mt_1_str)
      << "MD5 checksums don't match";
  // Reset firstpass_stats_ to 0.
  memset((uint8_t *)fp_stats->buf, 0, fp_stats->sz);
  fp_stats->sz = 0;
 }
 TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) {
  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
  first_pass_only_ = true;
  cfg_.rc_target_bitrate = 1000;
  // Test row_mt_mode: 0 vs 1 at single thread case(threads = 1, tiles_ = 0)
  tiles_ = 0;
  cfg_.g_threads = 1;
  row_mt_mode_ = 0;
  init_flags_ = VPX_CODEC_USE_PSNR;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  row_mt_mode_ = 1;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  // Compare to check if using or not using row-mt generates close stats.
  ASSERT_NO_FATAL_FAILURE(compare_fp_stats(&firstpass_stats_, 1000.0));
  // Test single thread vs multiple threads
  row_mt_mode_ = 1;
  tiles_ = 0;
  cfg_.g_threads = 1;
  init_flags_ = VPX_CODEC_USE_PSNR;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  cfg_.g_threads = 4;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  // Compare to check if single-thread and multi-thread stats are close enough.
  ASSERT_NO_FATAL_FAILURE(compare_fp_stats(&firstpass_stats_, 1000.0));
  // Bit exact test in row_mt mode.
  // When row_mt_mode_=1 and using >1 threads, the encoder generates bit exact
  // result.
  row_mt_mode_ = 1;
  tiles_ = 2;
  cfg_.g_threads = 2;
  init_flags_ = VPX_CODEC_USE_PSNR;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  cfg_.g_threads = 8;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  // Compare to check if stats match with row-mt=0/1.
  compare_fp_stats_md5(&firstpass_stats_);
 }
 class VPxEncoderThreadTest
    : public ::libvpx_test::EncoderTest,
      public ::libvpx_test::CodecTestWith4Params<libvpx_test::TestMode, int,
@ -228,9 +29,6 @@ class VPxEncoderThreadTest
        encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)) {
    init_flags_ = VPX_CODEC_USE_PSNR;
    md5_.clear();
    row_mt_mode_ = 1;
    psnr_ = 0.0;
    nframes_ = 0;
  }
  virtual ~VPxEncoderThreadTest() {}
@ -239,6 +37,7 @@ class VPxEncoderThreadTest
    SetMode(encoding_mode_);
    if (encoding_mode_ != ::libvpx_test::kRealTime) {
      cfg_.g_lag_in_frames = 3;
      cfg_.rc_end_usage = VPX_VBR;
      cfg_.rc_2pass_vbr_minsection_pct = 5;
      cfg_.rc_2pass_vbr_maxsection_pct = 2000;
@ -253,8 +52,6 @@ class VPxEncoderThreadTest
  virtual void BeginPassHook(unsigned int /*pass*/) {
    encoder_initialized_ = false;
    psnr_ = 0.0;
    nframes_ = 0;
  }
  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource * /*video*/,
@ -273,17 +70,10 @@ class VPxEncoderThreadTest
        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
        encoder->Control(VP9E_SET_AQ_MODE, 3);
      }
      encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_);
      encoder_initialized_ = true;
    }
  }
  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
    psnr_ += pkt->data.psnr.psnr[0];
    nframes_++;
  }
  virtual void DecompressedFrameHook(const vpx_image_t &img,
                                     vpx_codec_pts_t /*pts*/) {
    ::libvpx_test::MD5 md5_res;
@ -302,127 +92,43 @@ class VPxEncoderThreadTest
    return true;
  }
  double GetAveragePsnr() const { return nframes_ ? (psnr_ / nframes_) : 0.0; }
  bool encoder_initialized_;
  int tiles_;
  int threads_;
  ::libvpx_test::TestMode encoding_mode_;
  int set_cpu_used_;
  int row_mt_mode_;
  double psnr_;
  unsigned int nframes_;
  std::vector<std::string> md5_;
 };
 TEST_P(VPxEncoderThreadTest, EncoderResultTest) {
-  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20);
+  std::vector<std::string> single_thr_md5, multi_thr_md5;
  cfg_.rc_target_bitrate = 1000;
-  // Part 1: Bit exact test for row_mt_mode_ = 0.
+  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20);
-  // This part keeps original unit tests done before row-mt code is checked in.
+
-  row_mt_mode_ = 0;
+  cfg_.rc_target_bitrate = 1000;
  // Encode using single thread.
  cfg_.g_threads = 1;
  init_flags_ = VPX_CODEC_USE_PSNR;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  const std::vector<std::string> single_thr_md5 = md5_;
+  single_thr_md5 = md5_;
  md5_.clear();
  // Encode using multiple threads.
  cfg_.g_threads = threads_;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  const std::vector<std::string> multi_thr_md5 = md5_;
+  multi_thr_md5 = md5_;
  md5_.clear();
  // Compare to check if two vectors are equal.
  ASSERT_EQ(single_thr_md5, multi_thr_md5);
  // Part 2: row_mt_mode_ = 0 vs row_mt_mode_ = 1 single thread bit exact test.
  row_mt_mode_ = 1;
  // Encode using single thread
  cfg_.g_threads = 1;
  init_flags_ = VPX_CODEC_USE_PSNR;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  std::vector<std::string> row_mt_single_thr_md5 = md5_;
  md5_.clear();
  ASSERT_EQ(single_thr_md5, row_mt_single_thr_md5);
  // Part 3: Bit exact test with row-mt on
  // When row_mt_mode_=1 and using >1 threads, the encoder generates bit exact
  // result.
  row_mt_mode_ = 1;
  row_mt_single_thr_md5.clear();
  // Encode using 2 threads.
  cfg_.g_threads = 2;
  init_flags_ = VPX_CODEC_USE_PSNR;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  row_mt_single_thr_md5 = md5_;
  md5_.clear();
  // Encode using multiple threads.
  cfg_.g_threads = threads_;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  const std::vector<std::string> row_mt_multi_thr_md5 = md5_;
  md5_.clear();
  // Compare to check if two vectors are equal.
  ASSERT_EQ(row_mt_single_thr_md5, row_mt_multi_thr_md5);
  // Part 4: PSNR test with bit_match_mode_ = 0
  row_mt_mode_ = 1;
  // Encode using single thread.
  cfg_.g_threads = 1;
  init_flags_ = VPX_CODEC_USE_PSNR;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  const double single_thr_psnr = GetAveragePsnr();
  // Encode using multiple threads.
  cfg_.g_threads = threads_;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  const double multi_thr_psnr = GetAveragePsnr();
  EXPECT_NEAR(single_thr_psnr, multi_thr_psnr, 0.1);
 }
-INSTANTIATE_TEST_CASE_P(
+VP9_INSTANTIATE_TEST_CASE(VPxEncoderThreadTest,
-    VP9, VPxFirstPassEncoderThreadTest,
+                          ::testing::Values(::libvpx_test::kTwoPassGood,
-    ::testing::Combine(
+                                            ::libvpx_test::kOnePassGood,
-        ::testing::Values(
+                                            ::libvpx_test::kRealTime),
-            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
+                          ::testing::Range(0, 9),   // cpu_used
-        ::testing::Values(::libvpx_test::kTwoPassGood),
+                          ::testing::Range(0, 3),   // tile_columns
-        ::testing::Range(0, 4)));  // cpu_used
+                          ::testing::Range(2, 5));  // threads
 // Split this into two instantiations so that we can distinguish
 // between very slow runs ( ie cpu_speed 0 ) vs ones that can be
 // run nightly by adding Large to the title.
 INSTANTIATE_TEST_CASE_P(
    VP9, VPxEncoderThreadTest,
    ::testing::Combine(
        ::testing::Values(
            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
        ::testing::Values(::libvpx_test::kTwoPassGood,
                          ::libvpx_test::kOnePassGood,
                          ::libvpx_test::kRealTime),
        ::testing::Range(3, 9),    // cpu_used
        ::testing::Range(0, 3),    // tile_columns
        ::testing::Range(2, 5)));  // threads
 INSTANTIATE_TEST_CASE_P(
    VP9Large, VPxEncoderThreadTest,
    ::testing::Combine(
        ::testing::Values(
            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
        ::testing::Values(::libvpx_test::kTwoPassGood,
                          ::libvpx_test::kOnePassGood,
                          ::libvpx_test::kRealTime),
        ::testing::Range(0, 3),    // cpu_used
        ::testing::Range(0, 3),    // tile_columns
        ::testing::Range(2, 5)));  // threads
 }  // namespace
--- a/test/vp9_frame_parallel_test.cc
+++ b/test/vp9_frame_parallel_test.cc
@ -0,0 +1,217 @@
 /*
 *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include <cstdio>
 #include <cstdlib>
 #include <string>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vpx_config.h"
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
 #include "test/ivf_video_source.h"
 #include "test/md5_helper.h"
 #include "test/util.h"
 #if CONFIG_WEBM_IO
 #include "test/webm_video_source.h"
 #endif
 #include "vpx_mem/vpx_mem.h"
 namespace {
 using std::string;
 #if CONFIG_WEBM_IO
 struct PauseFileList {
  const char *name;
  // md5 sum for decoded frames which does not include skipped frames.
  const char *expected_md5;
  const int pause_frame_num;
 };
 // Decodes |filename| with |num_threads|. Pause at the specified frame_num,
 // seek to next key frame and then continue decoding until the end. Return
 // the md5 of the decoded frames which does not include skipped frames.
 string DecodeFileWithPause(const string &filename, int num_threads,
                           int pause_num) {
  libvpx_test::WebMVideoSource video(filename);
  video.Init();
  int in_frames = 0;
  int out_frames = 0;
  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
  cfg.threads = num_threads;
  vpx_codec_flags_t flags = 0;
  flags |= VPX_CODEC_USE_FRAME_THREADING;
  libvpx_test::VP9Decoder decoder(cfg, flags);
  libvpx_test::MD5 md5;
  video.Begin();
  do {
    ++in_frames;
    const vpx_codec_err_t res =
        decoder.DecodeFrame(video.cxdata(), video.frame_size());
    if (res != VPX_CODEC_OK) {
      EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
      break;
    }
    // Pause at specified frame number.
    if (in_frames == pause_num) {
      // Flush the decoder and then seek to next key frame.
      decoder.DecodeFrame(NULL, 0);
      video.SeekToNextKeyFrame();
    } else {
      video.Next();
    }
    // Flush the decoder at the end of the video.
    if (!video.cxdata()) decoder.DecodeFrame(NULL, 0);
    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
    const vpx_image_t *img;
    // Get decompressed data
    while ((img = dec_iter.Next())) {
      ++out_frames;
      md5.Add(img);
    }
  } while (video.cxdata() != NULL);
  EXPECT_EQ(in_frames, out_frames)
      << "Input frame count does not match output frame count";
  return string(md5.Get());
 }
 void DecodeFilesWithPause(const PauseFileList files[]) {
  for (const PauseFileList *iter = files; iter->name != NULL; ++iter) {
    SCOPED_TRACE(iter->name);
    for (int t = 2; t <= 8; ++t) {
      EXPECT_EQ(iter->expected_md5,
                DecodeFileWithPause(iter->name, t, iter->pause_frame_num))
          << "threads = " << t;
    }
  }
 }
 TEST(VP9MultiThreadedFrameParallel, PauseSeekResume) {
  // vp90-2-07-frame_parallel-1.webm is a 40 frame video file with
  // one key frame for every ten frames.
  static const PauseFileList files[] = {
    { "vp90-2-07-frame_parallel-1.webm", "6ea7c3875d67252e7caf2bc6e75b36b1",
      6 },
    { "vp90-2-07-frame_parallel-1.webm", "4bb634160c7356a8d7d4299b6dc83a45",
      12 },
    { "vp90-2-07-frame_parallel-1.webm", "89772591e6ef461f9fa754f916c78ed8",
      26 },
    { NULL, NULL, 0 },
  };
  DecodeFilesWithPause(files);
 }
 struct FileList {
  const char *name;
  // md5 sum for decoded frames which does not include corrupted frames.
  const char *expected_md5;
  // Expected number of decoded frames which does not include corrupted frames.
  const int expected_frame_count;
 };
 // Decodes |filename| with |num_threads|. Return the md5 of the decoded
 // frames which does not include corrupted frames.
 string DecodeFile(const string &filename, int num_threads,
                  int expected_frame_count) {
  libvpx_test::WebMVideoSource video(filename);
  video.Init();
  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
  cfg.threads = num_threads;
  const vpx_codec_flags_t flags = VPX_CODEC_USE_FRAME_THREADING;
  libvpx_test::VP9Decoder decoder(cfg, flags);
  libvpx_test::MD5 md5;
  video.Begin();
  int out_frames = 0;
  do {
    const vpx_codec_err_t res =
        decoder.DecodeFrame(video.cxdata(), video.frame_size());
    // TODO(hkuang): frame parallel mode should return an error on corruption.
    if (res != VPX_CODEC_OK) {
      EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
      break;
    }
    video.Next();
    // Flush the decoder at the end of the video.
    if (!video.cxdata()) decoder.DecodeFrame(NULL, 0);
    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
    const vpx_image_t *img;
    // Get decompressed data
    while ((img = dec_iter.Next())) {
      ++out_frames;
      md5.Add(img);
    }
  } while (video.cxdata() != NULL);
  EXPECT_EQ(expected_frame_count, out_frames)
      << "Input frame count does not match expected output frame count";
  return string(md5.Get());
 }
 void DecodeFiles(const FileList files[]) {
  for (const FileList *iter = files; iter->name != NULL; ++iter) {
    SCOPED_TRACE(iter->name);
    for (int t = 2; t <= 8; ++t) {
      EXPECT_EQ(iter->expected_md5,
                DecodeFile(iter->name, t, iter->expected_frame_count))
          << "threads = " << t;
    }
  }
 }
 TEST(VP9MultiThreadedFrameParallel, InvalidFileTest) {
  static const FileList files[] = {
    // invalid-vp90-2-07-frame_parallel-1.webm is a 40 frame video file with
    // one key frame for every ten frames. The 11th frame has corrupted data.
    { "invalid-vp90-2-07-frame_parallel-1.webm",
      "0549d0f45f60deaef8eb708e6c0eb6cb", 30 },
    // invalid-vp90-2-07-frame_parallel-2.webm is a 40 frame video file with
    // one key frame for every ten frames. The 1st and 31st frames have
    // corrupted data.
    { "invalid-vp90-2-07-frame_parallel-2.webm",
      "6a1f3cf6f9e7a364212fadb9580d525e", 20 },
    // invalid-vp90-2-07-frame_parallel-3.webm is a 40 frame video file with
    // one key frame for every ten frames. The 5th and 13th frames have
    // corrupted data.
    { "invalid-vp90-2-07-frame_parallel-3.webm",
      "8256544308de926b0681e04685b98677", 27 },
    { NULL, NULL, 0 },
  };
  DecodeFiles(files);
 }
 TEST(VP9MultiThreadedFrameParallel, ValidFileTest) {
  static const FileList files[] = {
 #if CONFIG_VP9_HIGHBITDEPTH
    { "vp92-2-20-10bit-yuv420.webm", "a16b99df180c584e8db2ffeda987d293", 10 },
 #endif
    { NULL, NULL, 0 },
  };
  DecodeFiles(files);
 }
 #endif  // CONFIG_WEBM_IO
 }  // namespace
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
@ -235,16 +235,8 @@ INSTANTIATE_TEST_CASE_P(
                       8),
        IntraPredParam(&vpx_d45_predictor_16x16_neon,
                       &vpx_d45_predictor_16x16_c, 16, 8),
        IntraPredParam(&vpx_d45_predictor_32x32_neon,
                       &vpx_d45_predictor_32x32_c, 32, 8),
        IntraPredParam(&vpx_d135_predictor_4x4_neon, &vpx_d135_predictor_4x4_c,
                       4, 8),
        IntraPredParam(&vpx_d135_predictor_8x8_neon, &vpx_d135_predictor_8x8_c,
                       8, 8),
        IntraPredParam(&vpx_d135_predictor_16x16_neon,
                       &vpx_d135_predictor_16x16_c, 16, 8),
        IntraPredParam(&vpx_d135_predictor_32x32_neon,
                       &vpx_d135_predictor_32x32_c, 32, 8),
        IntraPredParam(&vpx_dc_128_predictor_4x4_neon,
                       &vpx_dc_128_predictor_4x4_c, 4, 8),
        IntraPredParam(&vpx_dc_128_predictor_8x8_neon,
@ -299,139 +291,6 @@ INSTANTIATE_TEST_CASE_P(
                       32, 8)));
 #endif  // HAVE_NEON
 #if HAVE_DSPR2
 INSTANTIATE_TEST_CASE_P(
    DSPR2, VP9IntraPredTest,
    ::testing::Values(IntraPredParam(&vpx_dc_predictor_4x4_dspr2,
                                     &vpx_dc_predictor_4x4_c, 4, 8),
                      IntraPredParam(&vpx_dc_predictor_8x8_dspr2,
                                     &vpx_dc_predictor_8x8_c, 8, 8),
                      IntraPredParam(&vpx_dc_predictor_16x16_dspr2,
                                     &vpx_dc_predictor_16x16_c, 16, 8),
                      IntraPredParam(&vpx_h_predictor_4x4_dspr2,
                                     &vpx_h_predictor_4x4_c, 4, 8),
                      IntraPredParam(&vpx_h_predictor_8x8_dspr2,
                                     &vpx_h_predictor_8x8_c, 8, 8),
                      IntraPredParam(&vpx_h_predictor_16x16_dspr2,
                                     &vpx_h_predictor_16x16_c, 16, 8),
                      IntraPredParam(&vpx_tm_predictor_4x4_dspr2,
                                     &vpx_tm_predictor_4x4_c, 4, 8),
                      IntraPredParam(&vpx_tm_predictor_8x8_dspr2,
                                     &vpx_tm_predictor_8x8_c, 8, 8)));
 #endif  // HAVE_DSPR2
 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(
    MSA, VP9IntraPredTest,
    ::testing::Values(
        IntraPredParam(&vpx_dc_128_predictor_4x4_msa,
                       &vpx_dc_128_predictor_4x4_c, 4, 8),
        IntraPredParam(&vpx_dc_128_predictor_8x8_msa,
                       &vpx_dc_128_predictor_8x8_c, 8, 8),
        IntraPredParam(&vpx_dc_128_predictor_16x16_msa,
                       &vpx_dc_128_predictor_16x16_c, 16, 8),
        IntraPredParam(&vpx_dc_128_predictor_32x32_msa,
                       &vpx_dc_128_predictor_32x32_c, 32, 8),
        IntraPredParam(&vpx_dc_left_predictor_4x4_msa,
                       &vpx_dc_left_predictor_4x4_c, 4, 8),
        IntraPredParam(&vpx_dc_left_predictor_8x8_msa,
                       &vpx_dc_left_predictor_8x8_c, 8, 8),
        IntraPredParam(&vpx_dc_left_predictor_16x16_msa,
                       &vpx_dc_left_predictor_16x16_c, 16, 8),
        IntraPredParam(&vpx_dc_left_predictor_32x32_msa,
                       &vpx_dc_left_predictor_32x32_c, 32, 8),
        IntraPredParam(&vpx_dc_predictor_4x4_msa, &vpx_dc_predictor_4x4_c, 4,
                       8),
        IntraPredParam(&vpx_dc_predictor_8x8_msa, &vpx_dc_predictor_8x8_c, 8,
                       8),
        IntraPredParam(&vpx_dc_predictor_16x16_msa, &vpx_dc_predictor_16x16_c,
                       16, 8),
        IntraPredParam(&vpx_dc_predictor_32x32_msa, &vpx_dc_predictor_32x32_c,
                       32, 8),
        IntraPredParam(&vpx_dc_top_predictor_4x4_msa,
                       &vpx_dc_top_predictor_4x4_c, 4, 8),
        IntraPredParam(&vpx_dc_top_predictor_8x8_msa,
                       &vpx_dc_top_predictor_8x8_c, 8, 8),
        IntraPredParam(&vpx_dc_top_predictor_16x16_msa,
                       &vpx_dc_top_predictor_16x16_c, 16, 8),
        IntraPredParam(&vpx_dc_top_predictor_32x32_msa,
                       &vpx_dc_top_predictor_32x32_c, 32, 8),
        IntraPredParam(&vpx_h_predictor_4x4_msa, &vpx_h_predictor_4x4_c, 4, 8),
        IntraPredParam(&vpx_h_predictor_8x8_msa, &vpx_h_predictor_8x8_c, 8, 8),
        IntraPredParam(&vpx_h_predictor_16x16_msa, &vpx_h_predictor_16x16_c, 16,
                       8),
        IntraPredParam(&vpx_h_predictor_32x32_msa, &vpx_h_predictor_32x32_c, 32,
                       8),
        IntraPredParam(&vpx_tm_predictor_4x4_msa, &vpx_tm_predictor_4x4_c, 4,
                       8),
        IntraPredParam(&vpx_tm_predictor_8x8_msa, &vpx_tm_predictor_8x8_c, 8,
                       8),
        IntraPredParam(&vpx_tm_predictor_16x16_msa, &vpx_tm_predictor_16x16_c,
                       16, 8),
        IntraPredParam(&vpx_tm_predictor_32x32_msa, &vpx_tm_predictor_32x32_c,
                       32, 8),
        IntraPredParam(&vpx_v_predictor_4x4_msa, &vpx_v_predictor_4x4_c, 4, 8),
        IntraPredParam(&vpx_v_predictor_8x8_msa, &vpx_v_predictor_8x8_c, 8, 8),
        IntraPredParam(&vpx_v_predictor_16x16_msa, &vpx_v_predictor_16x16_c, 16,
                       8),
        IntraPredParam(&vpx_v_predictor_32x32_msa, &vpx_v_predictor_32x32_c, 32,
                       8)));
 #endif  // HAVE_MSA
 #if HAVE_VSX
 INSTANTIATE_TEST_CASE_P(
    VSX, VP9IntraPredTest,
    ::testing::Values(
        IntraPredParam(&vpx_d45_predictor_8x8_vsx, &vpx_d45_predictor_8x8_c, 8,
                       8),
        IntraPredParam(&vpx_d45_predictor_16x16_vsx, &vpx_d45_predictor_16x16_c,
                       16, 8),
        IntraPredParam(&vpx_d45_predictor_32x32_vsx, &vpx_d45_predictor_32x32_c,
                       32, 8),
        IntraPredParam(&vpx_d63_predictor_8x8_vsx, &vpx_d63_predictor_8x8_c, 8,
                       8),
        IntraPredParam(&vpx_d63_predictor_16x16_vsx, &vpx_d63_predictor_16x16_c,
                       16, 8),
        IntraPredParam(&vpx_d63_predictor_32x32_vsx, &vpx_d63_predictor_32x32_c,
                       32, 8),
        IntraPredParam(&vpx_dc_128_predictor_16x16_vsx,
                       &vpx_dc_128_predictor_16x16_c, 16, 8),
        IntraPredParam(&vpx_dc_128_predictor_32x32_vsx,
                       &vpx_dc_128_predictor_32x32_c, 32, 8),
        IntraPredParam(&vpx_dc_left_predictor_16x16_vsx,
                       &vpx_dc_left_predictor_16x16_c, 16, 8),
        IntraPredParam(&vpx_dc_left_predictor_32x32_vsx,
                       &vpx_dc_left_predictor_32x32_c, 32, 8),
        IntraPredParam(&vpx_dc_predictor_8x8_vsx, &vpx_dc_predictor_8x8_c, 8,
                       8),
        IntraPredParam(&vpx_dc_predictor_16x16_vsx, &vpx_dc_predictor_16x16_c,
                       16, 8),
        IntraPredParam(&vpx_dc_predictor_32x32_vsx, &vpx_dc_predictor_32x32_c,
                       32, 8),
        IntraPredParam(&vpx_dc_top_predictor_16x16_vsx,
                       &vpx_dc_top_predictor_16x16_c, 16, 8),
        IntraPredParam(&vpx_dc_top_predictor_32x32_vsx,
                       &vpx_dc_top_predictor_32x32_c, 32, 8),
        IntraPredParam(&vpx_h_predictor_4x4_vsx, &vpx_h_predictor_4x4_c, 4, 8),
        IntraPredParam(&vpx_h_predictor_8x8_vsx, &vpx_h_predictor_8x8_c, 8, 8),
        IntraPredParam(&vpx_h_predictor_16x16_vsx, &vpx_h_predictor_16x16_c, 16,
                       8),
        IntraPredParam(&vpx_h_predictor_32x32_vsx, &vpx_h_predictor_32x32_c, 32,
                       8),
        IntraPredParam(&vpx_tm_predictor_4x4_vsx, &vpx_tm_predictor_4x4_c, 4,
                       8),
        IntraPredParam(&vpx_tm_predictor_8x8_vsx, &vpx_tm_predictor_8x8_c, 8,
                       8),
        IntraPredParam(&vpx_tm_predictor_16x16_vsx, &vpx_tm_predictor_16x16_c,
                       16, 8),
        IntraPredParam(&vpx_tm_predictor_32x32_vsx, &vpx_tm_predictor_32x32_c,
                       32, 8),
        IntraPredParam(&vpx_v_predictor_16x16_vsx, &vpx_v_predictor_16x16_c, 16,
                       8),
        IntraPredParam(&vpx_v_predictor_32x32_vsx, &vpx_v_predictor_32x32_c, 32,
                       8)));
 #endif  // HAVE_VSX
 #if CONFIG_VP9_HIGHBITDEPTH
 typedef void (*HighbdIntraPred)(uint16_t *dst, ptrdiff_t stride,
                                const uint16_t *above, const uint16_t *left,
@ -467,164 +326,10 @@ TEST_P(VP9HighbdIntraPredTest, HighbdIntraPredTests) {
  RunTest(left_col, above_data, dst, ref_dst);
 }
 #if HAVE_SSSE3
 INSTANTIATE_TEST_CASE_P(
    SSSE3_TO_C_8, VP9HighbdIntraPredTest,
    ::testing::Values(
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_ssse3,
                             &vpx_highbd_d45_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_ssse3,
                             &vpx_highbd_d45_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_ssse3,
                             &vpx_highbd_d45_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_ssse3,
                             &vpx_highbd_d45_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_d63_predictor_8x8_ssse3,
                             &vpx_highbd_d63_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_d63_predictor_16x16_ssse3,
                             &vpx_highbd_d63_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_d63_predictor_32x32_c,
                             &vpx_highbd_d63_predictor_32x32_ssse3, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_d117_predictor_8x8_ssse3,
                             &vpx_highbd_d117_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_d117_predictor_16x16_ssse3,
                             &vpx_highbd_d117_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_d117_predictor_32x32_c,
                             &vpx_highbd_d117_predictor_32x32_ssse3, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_ssse3,
                             &vpx_highbd_d135_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_ssse3,
                             &vpx_highbd_d135_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_ssse3,
                             &vpx_highbd_d135_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_d153_predictor_8x8_ssse3,
                             &vpx_highbd_d153_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_d153_predictor_16x16_ssse3,
                             &vpx_highbd_d153_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_d153_predictor_32x32_ssse3,
                             &vpx_highbd_d153_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_d207_predictor_8x8_ssse3,
                             &vpx_highbd_d207_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_d207_predictor_16x16_ssse3,
                             &vpx_highbd_d207_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_d207_predictor_32x32_ssse3,
                             &vpx_highbd_d207_predictor_32x32_c, 32, 8)));
 INSTANTIATE_TEST_CASE_P(
    SSSE3_TO_C_10, VP9HighbdIntraPredTest,
    ::testing::Values(
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_ssse3,
                             &vpx_highbd_d45_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_ssse3,
                             &vpx_highbd_d45_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_ssse3,
                             &vpx_highbd_d45_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_ssse3,
                             &vpx_highbd_d45_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_d63_predictor_8x8_ssse3,
                             &vpx_highbd_d63_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_d63_predictor_16x16_ssse3,
                             &vpx_highbd_d63_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_d63_predictor_32x32_c,
                             &vpx_highbd_d63_predictor_32x32_ssse3, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_d117_predictor_8x8_ssse3,
                             &vpx_highbd_d117_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_d117_predictor_16x16_ssse3,
                             &vpx_highbd_d117_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_d117_predictor_32x32_c,
                             &vpx_highbd_d117_predictor_32x32_ssse3, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_ssse3,
                             &vpx_highbd_d135_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_ssse3,
                             &vpx_highbd_d135_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_ssse3,
                             &vpx_highbd_d135_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_d153_predictor_8x8_ssse3,
                             &vpx_highbd_d153_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_d153_predictor_16x16_ssse3,
                             &vpx_highbd_d153_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_d153_predictor_32x32_ssse3,
                             &vpx_highbd_d153_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_d207_predictor_8x8_ssse3,
                             &vpx_highbd_d207_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_d207_predictor_16x16_ssse3,
                             &vpx_highbd_d207_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_d207_predictor_32x32_ssse3,
                             &vpx_highbd_d207_predictor_32x32_c, 32, 10)));
 INSTANTIATE_TEST_CASE_P(
    SSSE3_TO_C_12, VP9HighbdIntraPredTest,
    ::testing::Values(
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_ssse3,
                             &vpx_highbd_d45_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_ssse3,
                             &vpx_highbd_d45_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_ssse3,
                             &vpx_highbd_d45_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_ssse3,
                             &vpx_highbd_d45_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_d63_predictor_8x8_ssse3,
                             &vpx_highbd_d63_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_d63_predictor_16x16_ssse3,
                             &vpx_highbd_d63_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_d63_predictor_32x32_c,
                             &vpx_highbd_d63_predictor_32x32_ssse3, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_d117_predictor_8x8_ssse3,
                             &vpx_highbd_d117_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_d117_predictor_16x16_ssse3,
                             &vpx_highbd_d117_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_d117_predictor_32x32_c,
                             &vpx_highbd_d117_predictor_32x32_ssse3, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_ssse3,
                             &vpx_highbd_d135_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_ssse3,
                             &vpx_highbd_d135_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_ssse3,
                             &vpx_highbd_d135_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_d153_predictor_8x8_ssse3,
                             &vpx_highbd_d153_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_d153_predictor_16x16_ssse3,
                             &vpx_highbd_d153_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_d153_predictor_32x32_ssse3,
                             &vpx_highbd_d153_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_d207_predictor_8x8_ssse3,
                             &vpx_highbd_d207_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_d207_predictor_16x16_ssse3,
                             &vpx_highbd_d207_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_d207_predictor_32x32_ssse3,
                             &vpx_highbd_d207_predictor_32x32_c, 32, 12)));
 #endif  // HAVE_SSSE3
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
    SSE2_TO_C_8, VP9HighbdIntraPredTest,
    ::testing::Values(
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2,
                             &vpx_highbd_dc_128_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2,
                             &vpx_highbd_dc_128_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2,
                             &vpx_highbd_dc_128_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2,
                             &vpx_highbd_dc_128_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_d63_predictor_4x4_sse2,
                             &vpx_highbd_d63_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_d117_predictor_4x4_sse2,
                             &vpx_highbd_d117_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_sse2,
                             &vpx_highbd_d135_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_d153_predictor_4x4_sse2,
                             &vpx_highbd_d153_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_d207_predictor_4x4_sse2,
                             &vpx_highbd_d207_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
                             &vpx_highbd_dc_left_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
                             &vpx_highbd_dc_left_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_sse2,
                             &vpx_highbd_dc_left_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_sse2,
                             &vpx_highbd_dc_left_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_sse2,
                             &vpx_highbd_dc_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_sse2,
@ -633,14 +338,6 @@ INSTANTIATE_TEST_CASE_P(
                             &vpx_highbd_dc_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_sse2,
                             &vpx_highbd_dc_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_sse2,
                             &vpx_highbd_dc_top_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_sse2,
                             &vpx_highbd_dc_top_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_sse2,
                             &vpx_highbd_dc_top_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_sse2,
                             &vpx_highbd_dc_top_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_sse2,
                             &vpx_highbd_tm_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_sse2,
@ -649,14 +346,6 @@ INSTANTIATE_TEST_CASE_P(
                             &vpx_highbd_tm_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_sse2,
                             &vpx_highbd_tm_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_sse2,
                             &vpx_highbd_h_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_sse2,
                             &vpx_highbd_h_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_sse2,
                             &vpx_highbd_h_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_sse2,
                             &vpx_highbd_h_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_sse2,
                             &vpx_highbd_v_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_sse2,
@ -669,32 +358,6 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    SSE2_TO_C_10, VP9HighbdIntraPredTest,
    ::testing::Values(
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2,
                             &vpx_highbd_dc_128_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2,
                             &vpx_highbd_dc_128_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2,
                             &vpx_highbd_dc_128_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2,
                             &vpx_highbd_dc_128_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_d63_predictor_4x4_sse2,
                             &vpx_highbd_d63_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_d117_predictor_4x4_sse2,
                             &vpx_highbd_d117_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_sse2,
                             &vpx_highbd_d135_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_d153_predictor_4x4_sse2,
                             &vpx_highbd_d153_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_d207_predictor_4x4_sse2,
                             &vpx_highbd_d207_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
                             &vpx_highbd_dc_left_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
                             &vpx_highbd_dc_left_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_sse2,
                             &vpx_highbd_dc_left_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_sse2,
                             &vpx_highbd_dc_left_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_sse2,
                             &vpx_highbd_dc_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_sse2,
@ -703,14 +366,6 @@ INSTANTIATE_TEST_CASE_P(
                             &vpx_highbd_dc_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_sse2,
                             &vpx_highbd_dc_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_sse2,
                             &vpx_highbd_dc_top_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_sse2,
                             &vpx_highbd_dc_top_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_sse2,
                             &vpx_highbd_dc_top_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_sse2,
                             &vpx_highbd_dc_top_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_sse2,
                             &vpx_highbd_tm_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_sse2,
@ -719,14 +374,6 @@ INSTANTIATE_TEST_CASE_P(
                             &vpx_highbd_tm_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_sse2,
                             &vpx_highbd_tm_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_sse2,
                             &vpx_highbd_h_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_sse2,
                             &vpx_highbd_h_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_sse2,
                             &vpx_highbd_h_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_sse2,
                             &vpx_highbd_h_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_sse2,
                             &vpx_highbd_v_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_sse2,
@ -739,32 +386,6 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    SSE2_TO_C_12, VP9HighbdIntraPredTest,
    ::testing::Values(
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2,
                             &vpx_highbd_dc_128_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2,
                             &vpx_highbd_dc_128_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2,
                             &vpx_highbd_dc_128_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2,
                             &vpx_highbd_dc_128_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_d63_predictor_4x4_sse2,
                             &vpx_highbd_d63_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_d117_predictor_4x4_sse2,
                             &vpx_highbd_d117_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_sse2,
                             &vpx_highbd_d135_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_d153_predictor_4x4_sse2,
                             &vpx_highbd_d153_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_d207_predictor_4x4_sse2,
                             &vpx_highbd_d207_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
                             &vpx_highbd_dc_left_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
                             &vpx_highbd_dc_left_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_sse2,
                             &vpx_highbd_dc_left_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_sse2,
                             &vpx_highbd_dc_left_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_sse2,
                             &vpx_highbd_dc_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_sse2,
@ -773,14 +394,6 @@ INSTANTIATE_TEST_CASE_P(
                             &vpx_highbd_dc_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_sse2,
                             &vpx_highbd_dc_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_sse2,
                             &vpx_highbd_dc_top_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_sse2,
                             &vpx_highbd_dc_top_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_sse2,
                             &vpx_highbd_dc_top_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_sse2,
                             &vpx_highbd_dc_top_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_sse2,
                             &vpx_highbd_tm_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_sse2,
@ -789,14 +402,6 @@ INSTANTIATE_TEST_CASE_P(
                             &vpx_highbd_tm_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_sse2,
                             &vpx_highbd_tm_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_sse2,
                             &vpx_highbd_h_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_sse2,
                             &vpx_highbd_h_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_sse2,
                             &vpx_highbd_h_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_sse2,
                             &vpx_highbd_h_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_sse2,
                             &vpx_highbd_v_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_sse2,
@ -807,235 +412,5 @@ INSTANTIATE_TEST_CASE_P(
                             &vpx_highbd_v_predictor_32x32_c, 32, 12)));
 #endif  // HAVE_SSE2
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(
    NEON_TO_C_8, VP9HighbdIntraPredTest,
    ::testing::Values(
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_neon,
                             &vpx_highbd_d45_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_neon,
                             &vpx_highbd_d45_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_neon,
                             &vpx_highbd_d45_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_neon,
                             &vpx_highbd_d45_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_neon,
                             &vpx_highbd_d135_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_neon,
                             &vpx_highbd_d135_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_neon,
                             &vpx_highbd_d135_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_neon,
                             &vpx_highbd_d135_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_neon,
                             &vpx_highbd_dc_128_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_neon,
                             &vpx_highbd_dc_128_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_neon,
                             &vpx_highbd_dc_128_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_neon,
                             &vpx_highbd_dc_128_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_neon,
                             &vpx_highbd_dc_left_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_neon,
                             &vpx_highbd_dc_left_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_neon,
                             &vpx_highbd_dc_left_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_neon,
                             &vpx_highbd_dc_left_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_neon,
                             &vpx_highbd_dc_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_neon,
                             &vpx_highbd_dc_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_neon,
                             &vpx_highbd_dc_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_neon,
                             &vpx_highbd_dc_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_neon,
                             &vpx_highbd_dc_top_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_neon,
                             &vpx_highbd_dc_top_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_neon,
                             &vpx_highbd_dc_top_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_neon,
                             &vpx_highbd_dc_top_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_neon,
                             &vpx_highbd_h_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_neon,
                             &vpx_highbd_h_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_neon,
                             &vpx_highbd_h_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_neon,
                             &vpx_highbd_h_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_neon,
                             &vpx_highbd_tm_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_neon,
                             &vpx_highbd_tm_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_neon,
                             &vpx_highbd_tm_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_neon,
                             &vpx_highbd_tm_predictor_32x32_c, 32, 8),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_neon,
                             &vpx_highbd_v_predictor_4x4_c, 4, 8),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_neon,
                             &vpx_highbd_v_predictor_8x8_c, 8, 8),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_neon,
                             &vpx_highbd_v_predictor_16x16_c, 16, 8),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_neon,
                             &vpx_highbd_v_predictor_32x32_c, 32, 8)));
 INSTANTIATE_TEST_CASE_P(
    NEON_TO_C_10, VP9HighbdIntraPredTest,
    ::testing::Values(
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_neon,
                             &vpx_highbd_d45_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_neon,
                             &vpx_highbd_d45_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_neon,
                             &vpx_highbd_d45_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_neon,
                             &vpx_highbd_d45_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_neon,
                             &vpx_highbd_d135_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_neon,
                             &vpx_highbd_d135_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_neon,
                             &vpx_highbd_d135_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_neon,
                             &vpx_highbd_d135_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_neon,
                             &vpx_highbd_dc_128_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_neon,
                             &vpx_highbd_dc_128_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_neon,
                             &vpx_highbd_dc_128_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_neon,
                             &vpx_highbd_dc_128_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_neon,
                             &vpx_highbd_dc_left_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_neon,
                             &vpx_highbd_dc_left_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_neon,
                             &vpx_highbd_dc_left_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_neon,
                             &vpx_highbd_dc_left_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_neon,
                             &vpx_highbd_dc_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_neon,
                             &vpx_highbd_dc_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_neon,
                             &vpx_highbd_dc_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_neon,
                             &vpx_highbd_dc_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_neon,
                             &vpx_highbd_dc_top_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_neon,
                             &vpx_highbd_dc_top_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_neon,
                             &vpx_highbd_dc_top_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_neon,
                             &vpx_highbd_dc_top_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_neon,
                             &vpx_highbd_h_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_neon,
                             &vpx_highbd_h_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_neon,
                             &vpx_highbd_h_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_neon,
                             &vpx_highbd_h_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_neon,
                             &vpx_highbd_tm_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_neon,
                             &vpx_highbd_tm_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_neon,
                             &vpx_highbd_tm_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_neon,
                             &vpx_highbd_tm_predictor_32x32_c, 32, 10),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_neon,
                             &vpx_highbd_v_predictor_4x4_c, 4, 10),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_neon,
                             &vpx_highbd_v_predictor_8x8_c, 8, 10),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_neon,
                             &vpx_highbd_v_predictor_16x16_c, 16, 10),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_neon,
                             &vpx_highbd_v_predictor_32x32_c, 32, 10)));
 INSTANTIATE_TEST_CASE_P(
    NEON_TO_C_12, VP9HighbdIntraPredTest,
    ::testing::Values(
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_neon,
                             &vpx_highbd_d45_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_neon,
                             &vpx_highbd_d45_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_neon,
                             &vpx_highbd_d45_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_neon,
                             &vpx_highbd_d45_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_neon,
                             &vpx_highbd_d135_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_neon,
                             &vpx_highbd_d135_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_neon,
                             &vpx_highbd_d135_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_neon,
                             &vpx_highbd_d135_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_neon,
                             &vpx_highbd_dc_128_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_neon,
                             &vpx_highbd_dc_128_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_neon,
                             &vpx_highbd_dc_128_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_neon,
                             &vpx_highbd_dc_128_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_neon,
                             &vpx_highbd_dc_left_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_neon,
                             &vpx_highbd_dc_left_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_neon,
                             &vpx_highbd_dc_left_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_neon,
                             &vpx_highbd_dc_left_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_neon,
                             &vpx_highbd_dc_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_neon,
                             &vpx_highbd_dc_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_neon,
                             &vpx_highbd_dc_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_neon,
                             &vpx_highbd_dc_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_neon,
                             &vpx_highbd_dc_top_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_neon,
                             &vpx_highbd_dc_top_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_neon,
                             &vpx_highbd_dc_top_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_neon,
                             &vpx_highbd_dc_top_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_neon,
                             &vpx_highbd_h_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_neon,
                             &vpx_highbd_h_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_neon,
                             &vpx_highbd_h_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_neon,
                             &vpx_highbd_h_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_neon,
                             &vpx_highbd_tm_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_neon,
                             &vpx_highbd_tm_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_neon,
                             &vpx_highbd_tm_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_neon,
                             &vpx_highbd_tm_predictor_32x32_c, 32, 12),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_neon,
                             &vpx_highbd_v_predictor_4x4_c, 4, 12),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_neon,
                             &vpx_highbd_v_predictor_8x8_c, 8, 12),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_neon,
                             &vpx_highbd_v_predictor_16x16_c, 16, 12),
        HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_neon,
                             &vpx_highbd_v_predictor_32x32_c, 32, 12)));
 #endif  // HAVE_NEON
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 }  // namespace
--- a/test/vp9_motion_vector_test.cc
+++ b/test/vp9_motion_vector_test.cc
@ -1,97 +0,0 @@
 /*
 *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
 #include "test/util.h"
 #include "test/yuv_video_source.h"
 namespace {
 #define MAX_EXTREME_MV 1
 #define MIN_EXTREME_MV 2
 // Encoding modes
 const libvpx_test::TestMode kEncodingModeVectors[] = {
  ::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
  ::libvpx_test::kRealTime
 };
 // Encoding speeds
 const int kCpuUsedVectors[] = { 0, 1, 2, 3, 4, 5, 6 };
 // MV test modes: 1 - always use maximum MV; 2 - always use minimum MV.
 const int kMVTestModes[] = { MAX_EXTREME_MV, MIN_EXTREME_MV };
 class MotionVectorTestLarge
    : public ::libvpx_test::EncoderTest,
      public ::libvpx_test::CodecTestWith3Params<libvpx_test::TestMode, int,
                                                 int> {
 protected:
  MotionVectorTestLarge()
      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
        cpu_used_(GET_PARAM(2)), mv_test_mode_(GET_PARAM(3)) {}
  virtual ~MotionVectorTestLarge() {}
  virtual void SetUp() {
    InitializeConfig();
    SetMode(encoding_mode_);
    if (encoding_mode_ != ::libvpx_test::kRealTime) {
      cfg_.g_lag_in_frames = 3;
      cfg_.rc_end_usage = VPX_VBR;
    } else {
      cfg_.g_lag_in_frames = 0;
      cfg_.rc_end_usage = VPX_CBR;
      cfg_.rc_buf_sz = 1000;
      cfg_.rc_buf_initial_sz = 500;
      cfg_.rc_buf_optimal_sz = 600;
    }
  }
  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
    if (video->frame() == 1) {
      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
      encoder->Control(VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, mv_test_mode_);
      if (encoding_mode_ != ::libvpx_test::kRealTime) {
        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
      }
    }
  }
  libvpx_test::TestMode encoding_mode_;
  int cpu_used_;
  int mv_test_mode_;
 };
 TEST_P(MotionVectorTestLarge, OverallTest) {
  cfg_.rc_target_bitrate = 24000;
  cfg_.g_profile = 0;
  init_flags_ = VPX_CODEC_USE_PSNR;
  testing::internal::scoped_ptr<libvpx_test::VideoSource> video;
  video.reset(new libvpx_test::YUVVideoSource(
      "niklas_640_480_30.yuv", VPX_IMG_FMT_I420, 3840, 2160,  // 2048, 1080,
      30, 1, 0, 5));
  ASSERT_TRUE(video.get() != NULL);
  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
 }
 VP9_INSTANTIATE_TEST_CASE(MotionVectorTestLarge,
                          ::testing::ValuesIn(kEncodingModeVectors),
                          ::testing::ValuesIn(kCpuUsedVectors),
                          ::testing::ValuesIn(kMVTestModes));
 }  // namespace
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@ -14,11 +14,9 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vp9_rtcd.h"
 #include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/buffer.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
@ -26,12 +24,11 @@
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_ports/vpx_timer.h"
 using libvpx_test::ACMRandom;
 using libvpx_test::Buffer;
 namespace {
 #if CONFIG_VP9_HIGHBITDEPTH
 const int number_of_iterations = 100;
 typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
@ -41,537 +38,307 @@ typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
                             tran_low_t *dqcoeff, const int16_t *dequant,
                             uint16_t *eob, const int16_t *scan,
                             const int16_t *iscan);
-typedef ::testing::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
+typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t>
                         int /*max_size*/, bool /*is_fp*/>
    QuantizeParam;
-// Wrapper for FP version which does not use zbin or quant_shift.
+class VP9QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
 typedef void (*QuantizeFPFunc)(const tran_low_t *coeff, intptr_t count,
                               int skip_block, const int16_t *round,
                               const int16_t *quant, tran_low_t *qcoeff,
                               tran_low_t *dqcoeff, const int16_t *dequant,
                               uint16_t *eob, const int16_t *scan,
                               const int16_t *iscan);
 template <QuantizeFPFunc fn>
 void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
                    const int16_t *zbin, const int16_t *round,
                    const int16_t *quant, const int16_t *quant_shift,
                    tran_low_t *qcoeff, tran_low_t *dqcoeff,
                    const int16_t *dequant, uint16_t *eob, const int16_t *scan,
                    const int16_t *iscan) {
  (void)zbin;
  (void)quant_shift;
  fn(coeff, count, skip_block, round, quant, qcoeff, dqcoeff, dequant, eob,
     scan, iscan);
 }
 class VP9QuantizeBase {
 public:
-  VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
+  virtual ~VP9QuantizeTest() {}
-      : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) {
+  virtual void SetUp() {
-    max_value_ = (1 << bit_depth_) - 1;
+    quantize_op_ = GET_PARAM(0);
-    zbin_ptr_ =
+    ref_quantize_op_ = GET_PARAM(1);
-        reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
+    bit_depth_ = GET_PARAM(2);
-    round_fp_ptr_ = reinterpret_cast<int16_t *>(
+    mask_ = (1 << bit_depth_) - 1;
        vpx_memalign(16, 8 * sizeof(*round_fp_ptr_)));
    quant_fp_ptr_ = reinterpret_cast<int16_t *>(
        vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_)));
    round_ptr_ =
        reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*round_ptr_)));
    quant_ptr_ =
        reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*quant_ptr_)));
    quant_shift_ptr_ = reinterpret_cast<int16_t *>(
        vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_)));
    dequant_ptr_ = reinterpret_cast<int16_t *>(
        vpx_memalign(16, 8 * sizeof(*dequant_ptr_)));
  }
-  ~VP9QuantizeBase() {
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
    vpx_free(zbin_ptr_);
    vpx_free(round_fp_ptr_);
    vpx_free(quant_fp_ptr_);
    vpx_free(round_ptr_);
    vpx_free(quant_ptr_);
    vpx_free(quant_shift_ptr_);
    vpx_free(dequant_ptr_);
    zbin_ptr_ = NULL;
    round_fp_ptr_ = NULL;
    quant_fp_ptr_ = NULL;
    round_ptr_ = NULL;
    quant_ptr_ = NULL;
    quant_shift_ptr_ = NULL;
    dequant_ptr_ = NULL;
    libvpx_test::ClearSystemState();
  }
 protected:
-  int16_t *zbin_ptr_;
+  vpx_bit_depth_t bit_depth_;
-  int16_t *round_fp_ptr_;
+  int mask_;
-  int16_t *quant_fp_ptr_;
+  QuantizeFunc quantize_op_;
-  int16_t *round_ptr_;
+  QuantizeFunc ref_quantize_op_;
  int16_t *quant_ptr_;
  int16_t *quant_shift_ptr_;
  int16_t *dequant_ptr_;
  const vpx_bit_depth_t bit_depth_;
  int max_value_;
  const int max_size_;
  const bool is_fp_;
 };
-class VP9QuantizeTest : public VP9QuantizeBase,
+class VP9Quantize32Test : public ::testing::TestWithParam<QuantizeParam> {
                        public ::testing::TestWithParam<QuantizeParam> {
 public:
-  VP9QuantizeTest()
+  virtual ~VP9Quantize32Test() {}
-      : VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)),
+  virtual void SetUp() {
-        quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
+    quantize_op_ = GET_PARAM(0);
    ref_quantize_op_ = GET_PARAM(1);
    bit_depth_ = GET_PARAM(2);
    mask_ = (1 << bit_depth_) - 1;
  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
 protected:
-  const QuantizeFunc quantize_op_;
+  vpx_bit_depth_t bit_depth_;
-  const QuantizeFunc ref_quantize_op_;
+  int mask_;
  QuantizeFunc quantize_op_;
  QuantizeFunc ref_quantize_op_;
 };
 // This quantizer compares the AC coefficients to the quantization step size to
 // determine if further multiplication operations are needed.
 // Based on vp9_quantize_fp_sse2().
 inline void quant_fp_nz(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                        int skip_block, const int16_t *round_ptr,
                        const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
                        tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
                        uint16_t *eob_ptr, const int16_t *scan,
                        const int16_t *iscan, int is_32x32) {
  int i, eob = -1;
  const int thr = dequant_ptr[1] >> (1 + is_32x32);
  (void)iscan;
  (void)skip_block;
  assert(!skip_block);
  // Quantization pass: All coefficients with index >= zero_flag are
  // skippable. Note: zero_flag can be zero.
  for (i = 0; i < n_coeffs; i += 16) {
    int y;
    int nzflag_cnt = 0;
    int abs_coeff[16];
    int coeff_sign[16];
    // count nzflag for each row (16 tran_low_t)
    for (y = 0; y < 16; ++y) {
      const int rc = i + y;
      const int coeff = coeff_ptr[rc];
      coeff_sign[y] = (coeff >> 31);
      abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y];
      // The first 16 are skipped in the sse2 code.  Do the same here to match.
      if (i >= 16 && (abs_coeff[y] <= thr)) {
        nzflag_cnt++;
      }
    }
    for (y = 0; y < 16; ++y) {
      const int rc = i + y;
      // If all of the AC coeffs in a row has magnitude less than the
      // quantization step_size/2, quantize to zero.
      if (nzflag_cnt < 16) {
        int tmp;
        int _round;
        if (is_32x32) {
          _round = ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
        } else {
          _round = round_ptr[rc != 0];
        }
        tmp = clamp(abs_coeff[y] + _round, INT16_MIN, INT16_MAX);
        tmp = (tmp * quant_ptr[rc != 0]) >> (16 - is_32x32);
        qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y];
        dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
        if (is_32x32) {
          dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
        } else {
          dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
        }
      } else {
        qcoeff_ptr[rc] = 0;
        dqcoeff_ptr[rc] = 0;
      }
    }
  }
  // Scan for eob.
  for (i = 0; i < n_coeffs; i++) {
    // Use the scan order to find the correct eob.
    const int rc = scan[i];
    if (qcoeff_ptr[rc]) {
      eob = i;
    }
  }
  *eob_ptr = eob + 1;
 }
 void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                      int skip_block, const int16_t *round_ptr,
                      const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
                      tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
                      uint16_t *eob_ptr, const int16_t *scan,
                      const int16_t *iscan) {
  quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr,
              dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 0);
 }
 void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                            int skip_block, const int16_t *round_ptr,
                            const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
                            tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
                            uint16_t *eob_ptr, const int16_t *scan,
                            const int16_t *iscan) {
  quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr,
              dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 1);
 }
 void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
                          int16_t *quant, int16_t *quant_shift,
                          int16_t *dequant, int16_t *round_fp,
                          int16_t *quant_fp) {
  // Max when q == 0.  Otherwise, it is 48 for Y and 42 for U/V.
  const int max_qrounding_factor_fp = 64;
  for (int j = 0; j < 2; j++) {
    // The range is 4 to 1828 in the VP9 tables.
    const int qlookup = rnd->RandRange(1825) + 4;
    round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7;
    quant_fp[j] = (1 << 16) / qlookup;
    // Values determined by deconstructing vp9_init_quantizer().
    // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
    // values or U/V values of any bit depth. This is because y_delta is not
    // factored into the vp9_ac_quant() call.
    zbin[j] = rnd->RandRange(1200);
    // round may be up to 685 for Y values or 914 for U/V.
    round[j] = rnd->RandRange(914);
    // quant ranges from 1 to -32703
    quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703;
    // quant_shift goes up to 1 << 16.
    quant_shift[j] = rnd->RandRange(16384);
    // dequant maxes out at 1828 for all cases.
    dequant[j] = rnd->RandRange(1828);
  }
  for (int j = 2; j < 8; j++) {
    zbin[j] = zbin[1];
    round_fp[j] = round_fp[1];
    quant_fp[j] = quant_fp[1];
    round[j] = round[1];
    quant[j] = quant[1];
    quant_shift[j] = quant_shift[1];
    dequant[j] = dequant[1];
  }
 }
 TEST_P(VP9QuantizeTest, OperationCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
+  DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[256]);
-  ASSERT_TRUE(coeff.Init());
+  DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
-  Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+  DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
-  ASSERT_TRUE(qcoeff.Init());
+  DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
-  Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+  DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
-  ASSERT_TRUE(dqcoeff.Init());
+  DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[256]);
-  Buffer<tran_low_t> ref_qcoeff =
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[256]);
-      Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+  DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[256]);
-  ASSERT_TRUE(ref_qcoeff.Init());
+  DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[256]);
-  Buffer<tran_low_t> ref_dqcoeff =
+  DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
-      Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+  DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
-  ASSERT_TRUE(ref_dqcoeff.Init());
+  DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
-  uint16_t eob, ref_eob;
+  int err_count_total = 0;
-
+  int first_failure = -1;
  for (int i = 0; i < number_of_iterations; ++i) {
-    // Test skip block for the first three iterations to catch all the different
+    const int skip_block = i == 0;
-    // sizes.
+    const TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
-    const int skip_block = 0;
+    const TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
    TX_SIZE sz;
    if (max_size_ == 16) {
      sz = static_cast<TX_SIZE>(i % 3);  // TX_4X4, TX_8X8 TX_16X16
    } else {
      sz = TX_32X32;
    }
    const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    const int count = (4 << sz) * (4 << sz);
+    const int count = (4 << sz) * (4 << sz);  // 16, 64, 256
-    coeff.Set(&rnd, -max_value_, max_value_);
+    int err_count = 0;
-    GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
+    *eob_ptr = rnd.Rand16();
-                         quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
+    *ref_eob_ptr = *eob_ptr;
-                         quant_fp_ptr_);
+    for (int j = 0; j < count; j++) {
-    int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
+      coeff_ptr[j] = rnd.Rand16() & mask_;
    int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
    ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
                     q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
                     ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
                     scan_order->scan, scan_order->iscan);
    ASM_REGISTER_STATE_CHECK(quantize_op_(
        coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
        quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
        dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
    EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
    EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
    EXPECT_EQ(eob, ref_eob);
    if (HasFailure()) {
      printf("Failure on iteration %d.\n", i);
      qcoeff.PrintDifference(ref_qcoeff);
      dqcoeff.PrintDifference(ref_dqcoeff);
      return;
    }
    for (int j = 0; j < 2; j++) {
      zbin_ptr[j] = rnd.Rand16() & mask_;
      round_ptr[j] = rnd.Rand16();
      quant_ptr[j] = rnd.Rand16();
      quant_shift_ptr[j] = rnd.Rand16();
      dequant_ptr[j] = rnd.Rand16();
    }
    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
                     ref_dqcoeff_ptr, dequant_ptr, ref_eob_ptr,
                     scan_order->scan, scan_order->iscan);
    ASM_REGISTER_STATE_CHECK(quantize_op_(
        coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
        quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr,
        scan_order->scan, scan_order->iscan));
    for (int j = 0; j < sz; ++j) {
      err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
    }
    err_count += (*ref_eob_ptr != *eob_ptr);
    if (err_count && !err_count_total) {
      first_failure = i;
    }
    err_count_total += err_count;
  }
  EXPECT_EQ(0, err_count_total)
      << "Error: Quantization Test, C output doesn't match SSE2 output. "
      << "First failed at test case " << first_failure;
 }
 TEST_P(VP9Quantize32Test, OperationCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[1024]);
  DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
  DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
  DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
  DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
  DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[1024]);
  DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[1024]);
  DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[1024]);
  DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[1024]);
  DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
  DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
  DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
  int err_count_total = 0;
  int first_failure = -1;
  for (int i = 0; i < number_of_iterations; ++i) {
    const int skip_block = i == 0;
    const TX_SIZE sz = TX_32X32;
    const TX_TYPE tx_type = (TX_TYPE)(i % 4);
    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
    const int count = (4 << sz) * (4 << sz);  // 1024
    int err_count = 0;
    *eob_ptr = rnd.Rand16();
    *ref_eob_ptr = *eob_ptr;
    for (int j = 0; j < count; j++) {
      coeff_ptr[j] = rnd.Rand16() & mask_;
    }
    for (int j = 0; j < 2; j++) {
      zbin_ptr[j] = rnd.Rand16() & mask_;
      round_ptr[j] = rnd.Rand16();
      quant_ptr[j] = rnd.Rand16();
      quant_shift_ptr[j] = rnd.Rand16();
      dequant_ptr[j] = rnd.Rand16();
    }
    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
                     ref_dqcoeff_ptr, dequant_ptr, ref_eob_ptr,
                     scan_order->scan, scan_order->iscan);
    ASM_REGISTER_STATE_CHECK(quantize_op_(
        coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
        quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr,
        scan_order->scan, scan_order->iscan));
    for (int j = 0; j < sz; ++j) {
      err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
    }
    err_count += (*ref_eob_ptr != *eob_ptr);
    if (err_count && !err_count_total) {
      first_failure = i;
    }
    err_count_total += err_count;
  }
  EXPECT_EQ(0, err_count_total)
      << "Error: Quantization Test, C output doesn't match SSE2 output. "
      << "First failed at test case " << first_failure;
 }
 TEST_P(VP9QuantizeTest, EOBCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
+  DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[256]);
-  ASSERT_TRUE(coeff.Init());
+  DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
-  Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+  DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
-  ASSERT_TRUE(qcoeff.Init());
+  DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
-  Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+  DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
-  ASSERT_TRUE(dqcoeff.Init());
+  DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[256]);
-  Buffer<tran_low_t> ref_qcoeff =
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[256]);
-      Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+  DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[256]);
-  ASSERT_TRUE(ref_qcoeff.Init());
+  DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[256]);
-  Buffer<tran_low_t> ref_dqcoeff =
+  DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
-      Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+  DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
-  ASSERT_TRUE(ref_dqcoeff.Init());
+  DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
-  uint16_t eob, ref_eob;
+  int err_count_total = 0;
-
+  int first_failure = -1;
  for (int i = 0; i < number_of_iterations; ++i) {
-    const int skip_block = 0;
+    int skip_block = i == 0;
-    TX_SIZE sz;
+    TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
-    if (max_size_ == 16) {
+    TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
      sz = static_cast<TX_SIZE>(i % 3);  // TX_4X4, TX_8X8 TX_16X16
    } else {
      sz = TX_32X32;
    }
    const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    int count = (4 << sz) * (4 << sz);
+    int count = (4 << sz) * (4 << sz);  // 16, 64, 256
    int err_count = 0;
    *eob_ptr = rnd.Rand16();
    *ref_eob_ptr = *eob_ptr;
    // Two random entries
-    coeff.Set(0);
+    for (int j = 0; j < count; j++) {
-    coeff.TopLeftPixel()[rnd(count)] =
+      coeff_ptr[j] = 0;
-        static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
+    }
-    coeff.TopLeftPixel()[rnd(count)] =
+    coeff_ptr[rnd(count)] = rnd.Rand16() & mask_;
-        static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
+    coeff_ptr[rnd(count)] = rnd.Rand16() & mask_;
-    GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
+    for (int j = 0; j < 2; j++) {
-                         quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
+      zbin_ptr[j] = rnd.Rand16() & mask_;
-                         quant_fp_ptr_);
+      round_ptr[j] = rnd.Rand16();
-    int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
+      quant_ptr[j] = rnd.Rand16();
-    int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
+      quant_shift_ptr[j] = rnd.Rand16();
-    ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
+      dequant_ptr[j] = rnd.Rand16();
-                     q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
+    }
                     ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
                     scan_order->scan, scan_order->iscan);
    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
                     ref_dqcoeff_ptr, dequant_ptr, ref_eob_ptr,
                     scan_order->scan, scan_order->iscan);
    ASM_REGISTER_STATE_CHECK(quantize_op_(
-        coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
+        coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
-        quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
+        quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr,
-        dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
+        scan_order->scan, scan_order->iscan));
-    EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
+    for (int j = 0; j < sz; ++j) {
-    EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
+      err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
-
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
    EXPECT_EQ(eob, ref_eob);
    if (HasFailure()) {
      printf("Failure on iteration %d.\n", i);
      qcoeff.PrintDifference(ref_qcoeff);
      dqcoeff.PrintDifference(ref_dqcoeff);
      return;
    }
    err_count += (*ref_eob_ptr != *eob_ptr);
    if (err_count && !err_count_total) {
      first_failure = i;
    }
    err_count_total += err_count;
  }
  EXPECT_EQ(0, err_count_total)
      << "Error: Quantization Test, C output doesn't match SSE2 output. "
      << "First failed at test case " << first_failure;
 }
-TEST_P(VP9QuantizeTest, DISABLED_Speed) {
+TEST_P(VP9Quantize32Test, EOBCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
+  DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[1024]);
-  ASSERT_TRUE(coeff.Init());
+  DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
-  Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+  DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
-  ASSERT_TRUE(qcoeff.Init());
+  DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
-  Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+  DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
-  ASSERT_TRUE(dqcoeff.Init());
+  DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[1024]);
-  uint16_t eob;
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[1024]);
-  TX_SIZE starting_sz, ending_sz;
+  DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[1024]);
-
+  DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[1024]);
-  if (max_size_ == 16) {
+  DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
-    starting_sz = TX_4X4;
+  DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
-    ending_sz = TX_16X16;
+  DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
-  } else {
+  int err_count_total = 0;
-    starting_sz = TX_32X32;
+  int first_failure = -1;
-    ending_sz = TX_32X32;
+  for (int i = 0; i < number_of_iterations; ++i) {
-  }
+    int skip_block = i == 0;
-
+    TX_SIZE sz = TX_32X32;
-  for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
+    TX_TYPE tx_type = (TX_TYPE)(i % 4);
-    // zbin > coeff, zbin < coeff.
+    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    for (int i = 0; i < 2; ++i) {
+    int count = (4 << sz) * (4 << sz);  // 1024
-      const int skip_block = 0;
+    int err_count = 0;
-      // TX_TYPE defines the scan order. That is not relevant to the speed test.
+    *eob_ptr = rnd.Rand16();
-      // Pick the first one.
+    *ref_eob_ptr = *eob_ptr;
-      const TX_TYPE tx_type = DCT_DCT;
+    for (int j = 0; j < count; j++) {
-      const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+      coeff_ptr[j] = 0;
-      const int count = (4 << sz) * (4 << sz);
+    }
-
+    // Two random entries
-      GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
+    coeff_ptr[rnd(count)] = rnd.Rand16() & mask_;
-                           quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
+    coeff_ptr[rnd(count)] = rnd.Rand16() & mask_;
-                           quant_fp_ptr_);
+    for (int j = 0; j < 2; j++) {
-      int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
+      zbin_ptr[j] = rnd.Rand16() & mask_;
-      int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
+      round_ptr[j] = rnd.Rand16();
-
+      quant_ptr[j] = rnd.Rand16();
-      if (i == 0) {
+      quant_shift_ptr[j] = rnd.Rand16();
-        // When |coeff values| are less than zbin the results are 0.
+      dequant_ptr[j] = rnd.Rand16();
        int threshold = 100;
        if (max_size_ == 32) {
          // For 32x32, the threshold is halved. Double it to keep the values
          // from clearing it.
          threshold = 200;
        }
        for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold;
        coeff.Set(&rnd, -99, 99);
      } else if (i == 1) {
        for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50;
        coeff.Set(&rnd, -500, 500);
      }
      vpx_usec_timer timer;
      vpx_usec_timer_start(&timer);
      for (int j = 0; j < 100000000 / count; ++j) {
        quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
                     q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(),
                     dqcoeff.TopLeftPixel(), dequant_ptr_, &eob,
                     scan_order->scan, scan_order->iscan);
      }
      vpx_usec_timer_mark(&timer);
      const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
      if (i == 0) printf("Bypass calculations.\n");
      if (i == 1) printf("Full calculations.\n");
      printf("Quantize %dx%d time: %5d ms\n", 4 << sz, 4 << sz,
             elapsed_time / 1000);
    }
    printf("\n");
  }
 }
-using ::testing::make_tuple;
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
                     ref_dqcoeff_ptr, dequant_ptr, ref_eob_ptr,
                     scan_order->scan, scan_order->iscan);
    ASM_REGISTER_STATE_CHECK(quantize_op_(
        coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
        quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr,
        scan_order->scan, scan_order->iscan));
    for (int j = 0; j < sz; ++j) {
      err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
    }
    err_count += (*ref_eob_ptr != *eob_ptr);
    if (err_count && !err_count_total) {
      first_failure = i;
    }
    err_count_total += err_count;
  }
  EXPECT_EQ(0, err_count_total)
      << "Error: Quantization Test, C output doesn't match SSE2 output. "
      << "First failed at test case " << first_failure;
 }
 using std::tr1::make_tuple;
 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
 // TODO(johannkoenig): Fix vpx_quantize_b_sse2 in highbitdepth builds.
 // make_tuple(&vpx_quantize_b_sse2, &vpx_highbd_quantize_b_c, VPX_BITS_8),
 INSTANTIATE_TEST_CASE_P(
    SSE2, VP9QuantizeTest,
-    ::testing::Values(
+    ::testing::Values(make_tuple(&vpx_highbd_quantize_b_sse2,
-        make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
+                                 &vpx_highbd_quantize_b_c, VPX_BITS_8),
-                   VPX_BITS_8, 16, false),
+                      make_tuple(&vpx_highbd_quantize_b_sse2,
-        make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
+                                 &vpx_highbd_quantize_b_c, VPX_BITS_10),
-                   VPX_BITS_10, 16, false),
+                      make_tuple(&vpx_highbd_quantize_b_sse2,
-        make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
+                                 &vpx_highbd_quantize_b_c, VPX_BITS_12)));
                   VPX_BITS_12, 16, false),
        make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
                   &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false),
        make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
                   &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false),
        make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
                   &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false)));
 #else
 INSTANTIATE_TEST_CASE_P(
-    SSE2, VP9QuantizeTest,
+    SSE2, VP9Quantize32Test,
-    ::testing::Values(make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c,
+    ::testing::Values(make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
-                                 VPX_BITS_8, 16, false),
+                                 &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8),
-                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
+                      make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
-                                 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
+                                 &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10),
-                                 16, true)));
+                      make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+                                 &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12)));
 #endif  // HAVE_SSE2
-
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
 #if ARCH_X86_64
 INSTANTIATE_TEST_CASE_P(
    SSSE3, VP9QuantizeTest,
    ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
                                 VPX_BITS_8, 16, false),
                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
                                 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
                                 16, true),
                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
                                 &QuantFPWrapper<quantize_fp_32x32_nz_c>,
                                 VPX_BITS_8, 32, true)));
 #else
 INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
                        ::testing::Values(make_tuple(&vpx_quantize_b_ssse3,
                                                     &vpx_quantize_b_c,
                                                     VPX_BITS_8, 16, false)));
 #endif
 #if ARCH_X86_64
 // TODO(johannkoenig): SSSE3 optimizations do not yet pass this test.
 INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, VP9QuantizeTest,
                        ::testing::Values(make_tuple(
                            &vpx_quantize_b_32x32_ssse3,
                            &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false)));
 #endif  // ARCH_X86_64
 #endif  // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
 // TODO(johannkoenig): AVX optimizations do not yet pass the 32x32 test or
 // highbitdepth configurations.
 #if HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    AVX, VP9QuantizeTest,
    ::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c,
                                 VPX_BITS_8, 16, false),
                      // Even though SSSE3 and AVX do not match the reference
                      // code, we can keep them in sync with each other.
                      make_tuple(&vpx_quantize_b_32x32_avx,
                                 &vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32,
                                 false)));
 #endif  // HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH
 #if ARCH_X86_64 && HAVE_AVX2
 INSTANTIATE_TEST_CASE_P(
    AVX2, VP9QuantizeTest,
    ::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
                                 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
                                 16, true)));
 #endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
 // TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.
 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    NEON, VP9QuantizeTest,
    ::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c,
                                 VPX_BITS_8, 16, false),
                      make_tuple(&vpx_quantize_b_32x32_neon,
                                 &vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
                                 false),
                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
                                 &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
                                 16, true),
                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
                                 &QuantFPWrapper<vp9_quantize_fp_32x32_c>,
                                 VPX_BITS_8, 32, true)));
 #endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
 // Only useful to compare "Speed" test results.
 INSTANTIATE_TEST_CASE_P(
    DISABLED_C, VP9QuantizeTest,
    ::testing::Values(
        make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16, false),
        make_tuple(&vpx_quantize_b_32x32_c, &vpx_quantize_b_32x32_c, VPX_BITS_8,
                   32, false),
        make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
                   &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
        make_tuple(&QuantFPWrapper<quantize_fp_nz_c>,
                   &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
        make_tuple(&QuantFPWrapper<quantize_fp_32x32_nz_c>,
                   &QuantFPWrapper<quantize_fp_32x32_nz_c>, VPX_BITS_8, 32,
                   true),
        make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
                   &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
                   true)));
 }  // namespace
--- a/test/vp9_scale_test.cc
+++ b/test/vp9_scale_test.cc
@ -1,215 +0,0 @@
 /*
 *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vp9_rtcd.h"
 #include "./vpx_config.h"
 #include "./vpx_scale_rtcd.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/vpx_scale_test.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/vpx_timer.h"
 #include "vpx_scale/yv12config.h"
 namespace libvpx_test {
 typedef void (*ScaleFrameFunc)(const YV12_BUFFER_CONFIG *src,
                               YV12_BUFFER_CONFIG *dst,
                               INTERP_FILTER filter_type, int phase_scaler);
 class ScaleTest : public VpxScaleBase,
                  public ::testing::TestWithParam<ScaleFrameFunc> {
 public:
  virtual ~ScaleTest() {}
 protected:
  virtual void SetUp() { scale_fn_ = GetParam(); }
  void ReferenceScaleFrame(INTERP_FILTER filter_type, int phase_scaler) {
    vp9_scale_and_extend_frame_c(&img_, &ref_img_, filter_type, phase_scaler);
  }
  void ScaleFrame(INTERP_FILTER filter_type, int phase_scaler) {
    ASM_REGISTER_STATE_CHECK(
        scale_fn_(&img_, &dst_img_, filter_type, phase_scaler));
  }
  void RunTest(INTERP_FILTER filter_type) {
    static const int kNumSizesToTest = 20;
    static const int kNumScaleFactorsToTest = 4;
    static const int kSizesToTest[] = {
      2,  4,  6,  8,  10, 12, 14, 16, 18,  20,
      22, 24, 26, 28, 30, 32, 34, 68, 128, 134
    };
    static const int kScaleFactors[] = { 1, 2, 3, 4 };
    for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) {
      for (int h = 0; h < kNumSizesToTest; ++h) {
        const int src_height = kSizesToTest[h];
        for (int w = 0; w < kNumSizesToTest; ++w) {
          const int src_width = kSizesToTest[w];
          for (int sf_up_idx = 0; sf_up_idx < kNumScaleFactorsToTest;
               ++sf_up_idx) {
            const int sf_up = kScaleFactors[sf_up_idx];
            for (int sf_down_idx = 0; sf_down_idx < kNumScaleFactorsToTest;
                 ++sf_down_idx) {
              const int sf_down = kScaleFactors[sf_down_idx];
              const int dst_width = src_width * sf_up / sf_down;
              const int dst_height = src_height * sf_up / sf_down;
              if (sf_up == sf_down && sf_up != 1) {
                continue;
              }
              // I420 frame width and height must be even.
              if (!dst_width || !dst_height || dst_width & 1 ||
                  dst_height & 1) {
                continue;
              }
              // vpx_convolve8_c() has restriction on the step which cannot
              // exceed 64 (ratio 1 to 4).
              if (src_width > 4 * dst_width || src_height > 4 * dst_height) {
                continue;
              }
              ASSERT_NO_FATAL_FAILURE(ResetScaleImages(src_width, src_height,
                                                       dst_width, dst_height));
              ReferenceScaleFrame(filter_type, phase_scaler);
              ScaleFrame(filter_type, phase_scaler);
              if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc,
                         ref_img_.frame_size)) {
                printf(
                    "filter_type = %d, phase_scaler = %d, src_width = %4d, "
                    "src_height = %4d, dst_width = %4d, dst_height = %4d, "
                    "scale factor = %d:%d\n",
                    filter_type, phase_scaler, src_width, src_height, dst_width,
                    dst_height, sf_down, sf_up);
                PrintDiff();
              }
              CompareImages(dst_img_);
              DeallocScaleImages();
            }
          }
        }
      }
    }
  }
  void PrintDiffComponent(const uint8_t *const ref, const uint8_t *const opt,
                          const int stride, const int width, const int height,
                          const int plane_idx) const {
    for (int y = 0; y < height; y++) {
      for (int x = 0; x < width; x++) {
        if (ref[y * stride + x] != opt[y * stride + x]) {
          printf("Plane %d pixel[%d][%d] diff:%6d (ref),%6d (opt)\n", plane_idx,
                 y, x, ref[y * stride + x], opt[y * stride + x]);
          break;
        }
      }
    }
  }
  void PrintDiff() const {
    assert(ref_img_.y_stride == dst_img_.y_stride);
    assert(ref_img_.y_width == dst_img_.y_width);
    assert(ref_img_.y_height == dst_img_.y_height);
    assert(ref_img_.uv_stride == dst_img_.uv_stride);
    assert(ref_img_.uv_width == dst_img_.uv_width);
    assert(ref_img_.uv_height == dst_img_.uv_height);
    if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc,
               ref_img_.frame_size)) {
      PrintDiffComponent(ref_img_.y_buffer, dst_img_.y_buffer,
                         ref_img_.y_stride, ref_img_.y_width, ref_img_.y_height,
                         0);
      PrintDiffComponent(ref_img_.u_buffer, dst_img_.u_buffer,
                         ref_img_.uv_stride, ref_img_.uv_width,
                         ref_img_.uv_height, 1);
      PrintDiffComponent(ref_img_.v_buffer, dst_img_.v_buffer,
                         ref_img_.uv_stride, ref_img_.uv_width,
                         ref_img_.uv_height, 2);
    }
  }
  ScaleFrameFunc scale_fn_;
 };
 TEST_P(ScaleTest, ScaleFrame_EightTap) { RunTest(EIGHTTAP); }
 TEST_P(ScaleTest, ScaleFrame_EightTapSmooth) { RunTest(EIGHTTAP_SMOOTH); }
 TEST_P(ScaleTest, ScaleFrame_EightTapSharp) { RunTest(EIGHTTAP_SHARP); }
 TEST_P(ScaleTest, ScaleFrame_Bilinear) { RunTest(BILINEAR); }
 TEST_P(ScaleTest, DISABLED_Speed) {
  static const int kCountSpeedTestBlock = 100;
  static const int kNumScaleFactorsToTest = 4;
  static const int kScaleFactors[] = { 1, 2, 3, 4 };
  const int src_width = 1280;
  const int src_height = 720;
  for (INTERP_FILTER filter_type = 2; filter_type < 4; ++filter_type) {
    for (int phase_scaler = 0; phase_scaler < 2; ++phase_scaler) {
      for (int sf_up_idx = 0; sf_up_idx < kNumScaleFactorsToTest; ++sf_up_idx) {
        const int sf_up = kScaleFactors[sf_up_idx];
        for (int sf_down_idx = 0; sf_down_idx < kNumScaleFactorsToTest;
             ++sf_down_idx) {
          const int sf_down = kScaleFactors[sf_down_idx];
          const int dst_width = src_width * sf_up / sf_down;
          const int dst_height = src_height * sf_up / sf_down;
          if (sf_up == sf_down && sf_up != 1) {
            continue;
          }
          // I420 frame width and height must be even.
          if (dst_width & 1 || dst_height & 1) {
            continue;
          }
          ASSERT_NO_FATAL_FAILURE(
              ResetScaleImages(src_width, src_height, dst_width, dst_height));
          ASM_REGISTER_STATE_CHECK(
              ReferenceScaleFrame(filter_type, phase_scaler));
          vpx_usec_timer timer;
          vpx_usec_timer_start(&timer);
          for (int i = 0; i < kCountSpeedTestBlock; ++i) {
            ScaleFrame(filter_type, phase_scaler);
          }
          libvpx_test::ClearSystemState();
          vpx_usec_timer_mark(&timer);
          const int elapsed_time =
              static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
          CompareImages(dst_img_);
          DeallocScaleImages();
          printf(
              "filter_type = %d, phase_scaler = %d, src_width = %4d, "
              "src_height = %4d, dst_width = %4d, dst_height = %4d, "
              "scale factor = %d:%d, scale time: %5d ms\n",
              filter_type, phase_scaler, src_width, src_height, dst_width,
              dst_height, sf_down, sf_up, elapsed_time);
        }
      }
    }
  }
 }
 INSTANTIATE_TEST_CASE_P(C, ScaleTest,
                        ::testing::Values(vp9_scale_and_extend_frame_c));
 #if HAVE_SSSE3
 INSTANTIATE_TEST_CASE_P(SSSE3, ScaleTest,
                        ::testing::Values(vp9_scale_and_extend_frame_ssse3));
 #endif  // HAVE_SSSE3
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(NEON, ScaleTest,
                        ::testing::Values(vp9_scale_and_extend_frame_neon));
 #endif  // HAVE_NEON
 }  // namespace libvpx_test
--- a/Show More
+++ b/Show More