Release v1.6.1 Long Tailed Duck

Change-Id: If27447472417c7ed34238295427ddb9da0561725
Merge "Add mips dspr2 partial idct tests"
2017-01-12 12:27:27 -08:00 · 2017-01-09 19:49:02 +00:00 · 2017-01-09 19:47:47 +00:00 · 2017-01-09 19:47:00 +00:00 · 2017-01-09 19:46:18 +00:00 · 2017-01-09 19:45:54 +00:00
986 changed files with 177037 additions and 169146 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -0,0 +1,91 @@
+---
+Language:        Cpp
+# BasedOnStyle:  Google
+# Generated with clang-format 3.8.1
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: true
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: true
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+ColumnLimit:     80
+CommentPragmas:  '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: false
+DerivePointerAlignment: false
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IncludeCategories:
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+  - Regex:           '^<.*'
+    Priority:        2
+  - Regex:           '.*'
+    Priority:        3
+IndentCaseLabels: true
+IndentWidth:     2
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Right
+ReflowComments:  true
+SortIncludes:    false
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Auto
+TabWidth:        8
+UseTab:          Never
+...
+
--- a/.gitignore
+++ b/.gitignore
@@ -30,14 +30,17 @@
 /examples/decode_with_partial_drops
 /examples/example_xma
 /examples/postproc
+/examples/resize_util
 /examples/set_maps
 /examples/simple_decoder
 /examples/simple_encoder
 /examples/twopass_encoder
 /examples/vp8_multi_resolution_encoder
 /examples/vp8cx_set_ref
-/examples/vp9_spatial_scalable_encoder
-/examples/vpx_temporal_scalable_patterns
+/examples/vp9cx_set_ref
+/examples/vp9_lossless_encoder
+/examples/vp9_spatial_svc_encoder
+/examples/vpx_temporal_svc_encoder
 /ivfdec
 /ivfdec.dox
 /ivfenc
@@ -45,12 +48,17 @@
 /libvpx.so*
 /libvpx.ver
 /samples.dox
+/test_intra_pred_speed
 /test_libvpx
+/tools.dox
+/tools/*.dox
+/tools/tiny_ssim
 /vp8_api1_migration.dox
 /vp[89x]_rtcd.h
 /vpx.pc
 /vpx_config.c
 /vpx_config.h
+/vpx_dsp_rtcd.h
 /vpx_scale_rtcd.h
 /vpx_version.h
 /vpxdec
--- a/.mailmap
+++ b/.mailmap
@@ -1,26 +1,37 @@
 Adrian Grange <agrange@google.com>
-Alex Converse <aconverse@google.com> <alex.converse@gmail.com>
+Aℓex Converse <aconverse@google.com>
+Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com>
 Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
 Alpha Lam <hclam@google.com> <hclam@chromium.org>
+Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
 Deb Mukherjee <debargha@google.com>
 Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
 Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
 Hangyu Kuang <hkuang@google.com>
+Hui Su <huisu@google.com>
+Jacky Chen <jackychen@google.com>
 Jim Bankoski <jimbankoski@google.com>
 Johann Koenig <johannkoenig@google.com>
 Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
+Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com>
+Johann Koenig <johannkoenig@google.com> <johannkoenig@chromium.org>
 John Koleszar <jkoleszar@google.com>
 Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
 Marco Paniconi <marpan@google.com>
 Marco Paniconi <marpan@google.com> <marpan@chromium.org>
 Pascal Massimino <pascal.massimino@gmail.com>
 Paul Wilkins <paulwilkins@google.com>
+Peter de Rivaz <peter.derivaz@gmail.com>
+Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com>
 Ralph Giles <giles@xiph.org> <giles@entropywave.com>
 Ralph Giles <giles@xiph.org> <giles@mozilla.com>
+Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
 Sami Pietilä <samipietila@google.com>
 Tamar Levy <tamar.levy@intel.com>
 Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
 Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
-Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
+Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com>
 Tom Finegan <tomfinegan@google.com>
+Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
 Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
+Yaowu Xu <yaowu@google.com> <Yaowu Xu>
--- a/38
+++ b/38
@@ -5,9 +5,11 @@ Aaron Watry <awatry@gmail.com>
 Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
 Adam Xu <adam@xuyaowu.com>
 Adrian Grange <agrange@google.com>
+Aℓex Converse <aconverse@google.com>
 Ahmad Sharif <asharif@google.com>
+Aleksey Vasenev <margtu-fivt@ya.ru>
+Alexander Potapenko <glider@google.com>
 Alexander Voronov <avoronov@graphics.cs.msu.ru>
-Alex Converse <aconverse@google.com>
 Alexis Ballier <aballier@gentoo.org>
 Alok Ahuja <waveletcoeff@gmail.com>
 Alpha Lam <hclam@google.com>
@@ -16,24 +18,32 @@ Ami Fischman <fischman@chromium.org>
 Andoni Morales Alastruey <ylatuya@gmail.com>
 Andres Mejia <mcitadel@gmail.com>
 Andrew Russell <anrussell@google.com>
+Angie Chiang <angiebird@google.com>
 Aron Rosenberg <arosenberg@logitech.com>
 Attila Nagy <attilanagy@google.com>
+Brion Vibber <bvibber@wikimedia.org>
 changjun.yang <changjun.yang@intel.com>
 Charles 'Buck' Krasic <ckrasic@google.com>
 chm <chm@rock-chips.com>
 Christian Duvivier <cduvivier@google.com>
+Daniele Castagna <dcastagna@chromium.org>
 Daniel Kang <ddkang@google.com>
 Deb Mukherjee <debargha@google.com>
+Deepa K G <deepa.kg@ittiam.com>
 Dim Temp <dimtemp0@gmail.com>
 Dmitry Kovalev <dkovalev@google.com>
 Dragan Mrdjan <dmrdjan@mips.com>
+Ed Baker <edward.baker@intel.com>
 Ehsan Akhgari <ehsan.akhgari@gmail.com>
 Erik Niemeyer <erik.a.niemeyer@intel.com>
 Fabio Pedretti <fabio.ped@libero.it>
 Frank Galligan <fgalligan@google.com>
 Fredrik Söderquist <fs@opera.com>
 Fritz Koenig <frkoenig@google.com>
+Gabriel Marin <gmx@chromium.org>
 Gaute Strokkenes <gaute.strokkenes@broadcom.com>
+Geza Lore <gezalore@gmail.com>
+Ghislain MARY <ghislainmary2@gmail.com>
 Giuseppe Scrivano <gscrivano@gnu.org>
 Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
 Guillaume Martres <gmartres@google.com>
@@ -42,31 +52,39 @@ Hangyu Kuang <hkuang@google.com>
 Hanno Böck <hanno@hboeck.de>
 Henrik Lundin <hlundin@google.com>
 Hui Su <huisu@google.com>
+Ivan Krasin <krasin@chromium.org>
 Ivan Maltz <ivanmaltz@google.com>
 Jacek Caban <cjacek@gmail.com>
-JackyChen <jackychen@google.com>
+Jacky Chen <jackychen@google.com>
 James Berry <jamesberry@google.com>
 James Yu <james.yu@linaro.org>
 James Zern <jzern@google.com>
 Jan Gerber <j@mailb.org>
 Jan Kratochvil <jan.kratochvil@redhat.com>
 Janne Salonen <jsalonen@google.com>
+Jean-Yves Avenard <jyavenard@mozilla.com>
 Jeff Faust <jfaust@google.com>
 Jeff Muizelaar <jmuizelaar@mozilla.com>
 Jeff Petkau <jpet@chromium.org>
+Jerome Jiang <jianj@google.com>
 Jia Jia <jia.jia@linaro.org>
+Jian Zhou <zhoujian@google.com>
 Jim Bankoski <jimbankoski@google.com>
 Jingning Han <jingning@google.com>
 Joey Parrish <joeyparrish@google.com>
 Johann Koenig <johannkoenig@google.com>
 John Koleszar <jkoleszar@google.com>
+Johnny Klonaris <google@jawknee.com>
 John Stark <jhnstrk@gmail.com>
 Joshua Bleecher Snyder <josh@treelinelabs.com>
 Joshua Litt <joshualitt@google.com>
+Julia Robson <juliamrobson@gmail.com>
 Justin Clift <justin@salasaga.org>
 Justin Lebar <justin.lebar@gmail.com>
+Kaustubh Raste <kaustubh.raste@imgtec.com>
 KO Myung-Hun <komh@chollian.net>
 Lawrence Velázquez <larryv@macports.org>
+Linfeng Zhang <linfengz@google.com>
 Lou Quillio <louquillio@google.com>
 Luca Barbato <lu_zero@gentoo.org>
 Makoto Kato <makoto.kt@gmail.com>
@@ -80,8 +98,12 @@ Michael Kohler <michaelkohler@live.com>
 Mike Frysinger <vapier@chromium.org>
 Mike Hommey <mhommey@mozilla.com>
 Mikhal Shemer <mikhal@google.com>
+Min Chen <chenm003@gmail.com>
 Minghai Shang <minghai@google.com>
+Min Ye <yeemmi@google.com>
 Morton Jonuschat <yabawock@gmail.com>
+Nathan E. Egge <negge@mozilla.com>
+Nico Weber <thakis@chromium.org>
 Parag Salasakar <img.mips1@gmail.com>
 Pascal Massimino <pascal.massimino@gmail.com>
 Patrik Westin <patrik.westin@gmail.com>
@@ -89,21 +111,27 @@ Paul Wilkins <paulwilkins@google.com>
 Pavol Rusnak <stick@gk2.sk>
 Paweł Hajdan <phajdan@google.com>
 Pengchong Jin <pengchong@google.com>
+Peter Boström <pbos@google.com>
 Peter de Rivaz <peter.derivaz@gmail.com>
 Philip Jägenstedt <philipj@opera.com>
 Priit Laes <plaes@plaes.org>
 Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
 Rafaël Carré <funman@videolan.org>
 Ralph Giles <giles@xiph.org>
+Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com>
 Rob Bradford <rob@linux.intel.com>
-Ronald S. Bultje <rbultje@google.com>
+Ronald S. Bultje <rsbultje@gmail.com>
 Rui Ueyama <ruiu@google.com>
 Sami Pietilä <samipietila@google.com>
+Sarah Parker <sarahparker@google.com>
+Sasi Inguva <isasi@google.com>
 Scott Graham <scottmg@chromium.org>
 Scott LaVarnway <slavarnway@google.com>
 Sean McGovern <gseanmcg@gmail.com>
+Sergey Kolomenkin <kolomenkin@gmail.com>
 Sergey Ulanov <sergeyu@chromium.org>
 Shimon Doodkin <helpmepro1@gmail.com>
+Shunyao Li <shunyaoli@google.com>
 Stefan Holmer <holmer@google.com>
 Suman Sunkara <sunkaras@google.com>
 Taekhyun Kim <takim@nvidia.com>
@@ -115,10 +143,14 @@ Thijs Vermeir <thijsvermeir@gmail.com>
 Tim Kopp <tkopp@google.com>
 Timothy B. Terriberry <tterribe@xiph.org>
 Tom Finegan <tomfinegan@google.com>
+Tristan Matthews <le.businessman@gmail.com>
+Urvang Joshi <urvang@google.com>
 Vignesh Venkatasubramanian <vigneshv@google.com>
 Yaowu Xu <yaowu@google.com>
+Yi Luo <luoyi@google.com>
 Yongzhe Wang <yongzhe@google.com>
 Yunqing Wang <yunqingwang@google.com>
+Yury Gitman <yuryg@google.com>
 Zoe Liu <zoeliu@google.com>
 Google Inc.
 The Mozilla Foundation
--- a/66
+++ b/66
@@ -1,7 +1,65 @@
-xxxx-yy-zz v1.4.0 "Changes for next release"
-  vpxenc is changed to use VP9 by default.
-  Encoder controls added for 1 pass SVC.
-  Decoder control to toggle on/off loopfilter.
+2017-01-09 v1.6.1 "Long Tailed Duck"
+  This release improves upon the VP9 encoder and speeds up the encoding and
+  decoding processes.
+
+  - Upgrading:
+    This release is ABI compatible with 1.6.0.
+
+  - Enhancements:
+    Faster VP9 encoding and decoding.
+    High bit depth builds now provide similar speed for 8 bit encode and decode
+    for x86 targets. Other platforms and higher bit depth improvements are in
+    progress.
+
+  - Bug Fixes:
+    A variety of fuzzing issues.
+
+2016-07-20 v1.6.0 "Khaki Campbell Duck"
+  This release improves upon the VP9 encoder and speeds up the encoding and
+  decoding processes.
+
+  - Upgrading:
+    This release is ABI incompatible with 1.5.0 due to a new 'color_range' enum
+    in vpx_image and some minor changes to the VP8_COMP structure.
+
+    The default key frame interval for VP9 has changed from 128 to 9999.
+
+  - Enhancement:
+    A core focus has been performance for low end Intel processors. SSSE3
+    instructions such as 'pshufb' have been avoided and instructions have been
+    reordered to better accommodate the more constrained pipelines.
+
+    As a result, devices based on Celeron processors have seen substantial
+    decoding improvements. From Indian Runner Duck to Javan Whistling Duck,
+    decoding speed improved between 10 and 30%. Between Javan Whistling Duck
+    and Khaki Campbell Duck, it improved another 10 to 15%.
+
+    While Celeron benefited most, Core-i5 also improved 5% and 10% between the
+    respective releases.
+
+    Realtime performance for WebRTC for both speed and quality has received a
+    lot of attention.
+
+  - Bug Fixes:
+    A number of fuzzing issues, found variously by Mozilla, Chromium and others,
+    have been fixed and we strongly recommend updating.
+
+2015-11-09 v1.5.0 "Javan Whistling Duck"
+  This release improves upon the VP9 encoder and speeds up the encoding and
+  decoding processes.
+
+  - Upgrading:
+    This release is ABI incompatible with 1.4.0. It drops deprecated VP8
+    controls and adds a variety of VP9 controls for testing.
+
+    The vpxenc utility now prefers VP9 by default.
+
+  - Enhancements:
+    Faster VP9 encoding and decoding
+    Smaller library size by combining functions used by VP8 and VP9
+
+  - Bug Fixes:
+    A variety of fuzzing issues

 2015-04-03 v1.4.0 "Indian Runner Duck"
  This release includes significant improvements to the VP9 codec.
--- a/37
+++ b/37
@@ -1,4 +1,4 @@
-README - 23 March 2015
+README - 9 January 2017

 Welcome to the WebM VP8/VP9 Codec SDK!

@@ -47,11 +47,9 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  --help output of the configure script. As of this writing, the list of
  available targets is:

-    armv6-darwin-gcc
-    armv6-linux-rvct
-    armv6-linux-gcc
-    armv6-none-rvct
+    arm64-android-gcc
    arm64-darwin-gcc
+    arm64-linux-gcc
    armv7-android-gcc
    armv7-darwin-gcc
    armv7-linux-rvct
@@ -59,7 +57,9 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    armv7-none-rvct
    armv7-win32-vs11
    armv7-win32-vs12
+    armv7-win32-vs14
    armv7s-darwin-gcc
+    armv8-linux-gcc
    mips32-linux-gcc
    mips64-linux-gcc
    sparc-solaris-gcc
@@ -73,34 +73,34 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-darwin12-gcc
    x86-darwin13-gcc
    x86-darwin14-gcc
+    x86-darwin15-gcc
    x86-iphonesimulator-gcc
    x86-linux-gcc
    x86-linux-icc
    x86-os2-gcc
    x86-solaris-gcc
    x86-win32-gcc
-    x86-win32-vs7
-    x86-win32-vs8
-    x86-win32-vs9
    x86-win32-vs10
    x86-win32-vs11
    x86-win32-vs12
+    x86-win32-vs14
+    x86_64-android-gcc
    x86_64-darwin9-gcc
    x86_64-darwin10-gcc
    x86_64-darwin11-gcc
    x86_64-darwin12-gcc
    x86_64-darwin13-gcc
    x86_64-darwin14-gcc
+    x86_64-darwin15-gcc
    x86_64-iphonesimulator-gcc
    x86_64-linux-gcc
    x86_64-linux-icc
    x86_64-solaris-gcc
    x86_64-win64-gcc
-    x86_64-win64-vs8
-    x86_64-win64-vs9
    x86_64-win64-vs10
    x86_64-win64-vs11
    x86_64-win64-vs12
+    x86_64-win64-vs14
    generic-gnu

  The generic-gnu target, in conjunction with the CROSS environment variable,
@@ -129,7 +129,22 @@ VP8/VP9 TEST VECTORS:
  $ ./configure --enable-unit-tests
  $ LIBVPX_TEST_DATA_PATH=../libvpx-test-data make testdata

+CODE STYLE:
+  The coding style used by this project is enforced with clang-format using the
+  configuration contained in the .clang-format file in the root of the
+  repository.
+
+  Before pushing changes for review you can format your code with:
+  # Apply clang-format to modified .c, .h and .cc files
+  $ clang-format -i --style=file \
+    $(git diff --name-only --diff-filter=ACMR '*.[hc]' '*.cc')
+
+  Check the .clang-format file for the version used to generate it if there is
+  any difference between your local formatting and the review system.
+
+  See also: http://clang.llvm.org/docs/ClangFormat.html
+
 SUPPORT
  This library is an open source project supported by its community. Please
-  please email webm-discuss@webmproject.org for help.
+  email webm-discuss@webmproject.org for help.

--- a/args.c
+++ b/args.c
@@ -8,12 +8,12 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
 #include "args.h"

+#include "vpx/vpx_integer.h"
 #include "vpx_ports/msvc.h"

 #if defined(__GNUC__) && __GNUC__
@@ -22,42 +22,36 @@ extern void die(const char *fmt, ...) __attribute__((noreturn));
 extern void die(const char *fmt, ...);
 #endif

-
 struct arg arg_init(char **argv) {
  struct arg a;

-  a.argv      = argv;
+  a.argv = argv;
  a.argv_step = 1;
-  a.name      = NULL;
-  a.val       = NULL;
-  a.def       = NULL;
+  a.name = NULL;
+  a.val = NULL;
+  a.def = NULL;
  return a;
 }

 int arg_match(struct arg *arg_, const struct arg_def *def, char **argv) {
  struct arg arg;

-  if (!argv[0] || argv[0][0] != '-')
-    return 0;
+  if (!argv[0] || argv[0][0] != '-') return 0;

  arg = arg_init(argv);

-  if (def->short_name
-      && strlen(arg.argv[0]) == strlen(def->short_name) + 1
-      && !strcmp(arg.argv[0] + 1, def->short_name)) {
-
+  if (def->short_name && strlen(arg.argv[0]) == strlen(def->short_name) + 1 &&
+      !strcmp(arg.argv[0] + 1, def->short_name)) {
    arg.name = arg.argv[0] + 1;
    arg.val = def->has_val ? arg.argv[1] : NULL;
    arg.argv_step = def->has_val ? 2 : 1;
  } else if (def->long_name) {
    const size_t name_len = strlen(def->long_name);

-    if (strlen(arg.argv[0]) >= name_len + 2
-        && arg.argv[0][1] == '-'
-        && !strncmp(arg.argv[0] + 2, def->long_name, name_len)
-        && (arg.argv[0][name_len + 2] == '='
-            || arg.argv[0][name_len + 2] == '\0')) {
-
+    if (strlen(arg.argv[0]) >= name_len + 2 && arg.argv[0][1] == '-' &&
+        !strncmp(arg.argv[0] + 2, def->long_name, name_len) &&
+        (arg.argv[0][name_len + 2] == '=' ||
+         arg.argv[0][name_len + 2] == '\0')) {
      arg.name = arg.argv[0] + 2;
      arg.val = arg.name[name_len] == '=' ? arg.name + name_len + 1 : NULL;
      arg.argv_step = 1;
@@ -70,8 +64,7 @@ int arg_match(struct arg *arg_, const struct arg_def *def, char **argv) {
  if (arg.name && arg.val && !def->has_val)
    die("Error: option %s requires no argument.\n", arg.name);

-  if (arg.name
-      && (arg.val || !def->has_val)) {
+  if (arg.name && (arg.val || !def->has_val)) {
    arg.def = def;
    *arg_ = arg;
    return 1;
@@ -80,15 +73,12 @@ int arg_match(struct arg *arg_, const struct arg_def *def, char **argv) {
  return 0;
 }

-
 const char *arg_next(struct arg *arg) {
-  if (arg->argv[0])
-    arg->argv += arg->argv_step;
+  if (arg->argv[0]) arg->argv += arg->argv_step;

  return *arg->argv;
 }

-
 char **argv_dup(int argc, const char **argv) {
  char **new_argv = malloc((argc + 1) * sizeof(*argv));

@@ -97,9 +87,8 @@ char **argv_dup(int argc, const char **argv) {
  return new_argv;
 }

-
 void arg_show_usage(FILE *fp, const struct arg_def *const *defs) {
-  char option_text[40] = {0};
+  char option_text[40] = { 0 };

  for (; *defs; defs++) {
    const struct arg_def *def = *defs;
@@ -109,15 +98,12 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs) {
    if (def->short_name && def->long_name) {
      char *comma = def->has_val ? "," : ",      ";

-      snprintf(option_text, 37, "-%s%s%s --%s%6s",
-               def->short_name, short_val, comma,
-               def->long_name, long_val);
+      snprintf(option_text, 37, "-%s%s%s --%s%6s", def->short_name, short_val,
+               comma, def->long_name, long_val);
    } else if (def->short_name)
-      snprintf(option_text, 37, "-%s%s",
-               def->short_name, short_val);
+      snprintf(option_text, 37, "-%s%s", def->short_name, short_val);
    else if (def->long_name)
-      snprintf(option_text, 37, "          --%s%s",
-               def->long_name, long_val);
+      snprintf(option_text, 37, "          --%s%s", def->long_name, long_val);

    fprintf(fp, "  %-37s\t%s\n", option_text, def->desc);

@@ -127,110 +113,103 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs) {
      fprintf(fp, "  %-37s\t  ", "");

      for (listptr = def->enums; listptr->name; listptr++)
-        fprintf(fp, "%s%s", listptr->name,
-                listptr[1].name ? ", " : "\n");
+        fprintf(fp, "%s%s", listptr->name, listptr[1].name ? ", " : "\n");
    }
  }
 }

-
 unsigned int arg_parse_uint(const struct arg *arg) {
-  long int   rawval;
-  char      *endptr;
+  uint32_t rawval;
+  char *endptr;

-  rawval = strtol(arg->val, &endptr, 10);
+  rawval = (uint32_t)strtoul(arg->val, &endptr, 10);

  if (arg->val[0] != '\0' && endptr[0] == '\0') {
-    if (rawval >= 0 && rawval <= UINT_MAX)
-      return rawval;
+    if (rawval <= UINT_MAX) return rawval;

-    die("Option %s: Value %ld out of range for unsigned int\n",
-        arg->name, rawval);
+    die("Option %s: Value %ld out of range for unsigned int\n", arg->name,
+        rawval);
  }

  die("Option %s: Invalid character '%c'\n", arg->name, *endptr);
  return 0;
 }

-
 int arg_parse_int(const struct arg *arg) {
-  long int   rawval;
-  char      *endptr;
+  int32_t rawval;
+  char *endptr;

-  rawval = strtol(arg->val, &endptr, 10);
+  rawval = (int32_t)strtol(arg->val, &endptr, 10);

  if (arg->val[0] != '\0' && endptr[0] == '\0') {
-    if (rawval >= INT_MIN && rawval <= INT_MAX)
-      return rawval;
+    if (rawval >= INT_MIN && rawval <= INT_MAX) return (int)rawval;

-    die("Option %s: Value %ld out of range for signed int\n",
-        arg->name, rawval);
+    die("Option %s: Value %ld out of range for signed int\n", arg->name,
+        rawval);
  }

  die("Option %s: Invalid character '%c'\n", arg->name, *endptr);
  return 0;
 }

-
 struct vpx_rational {
  int num; /**< fraction numerator */
  int den; /**< fraction denominator */
 };
 struct vpx_rational arg_parse_rational(const struct arg *arg) {
-  long int             rawval;
-  char                *endptr;
-  struct vpx_rational  rat;
+  long int rawval;
+  char *endptr;
+  struct vpx_rational rat;

  /* parse numerator */
  rawval = strtol(arg->val, &endptr, 10);

  if (arg->val[0] != '\0' && endptr[0] == '/') {
    if (rawval >= INT_MIN && rawval <= INT_MAX)
-      rat.num = rawval;
-    else die("Option %s: Value %ld out of range for signed int\n",
-               arg->name, rawval);
-  } else die("Option %s: Expected / at '%c'\n", arg->name, *endptr);
+      rat.num = (int)rawval;
+    else
+      die("Option %s: Value %ld out of range for signed int\n", arg->name,
+          rawval);
+  } else
+    die("Option %s: Expected / at '%c'\n", arg->name, *endptr);

  /* parse denominator */
  rawval = strtol(endptr + 1, &endptr, 10);

  if (arg->val[0] != '\0' && endptr[0] == '\0') {
    if (rawval >= INT_MIN && rawval <= INT_MAX)
-      rat.den = rawval;
-    else die("Option %s: Value %ld out of range for signed int\n",
-               arg->name, rawval);
-  } else die("Option %s: Invalid character '%c'\n", arg->name, *endptr);
+      rat.den = (int)rawval;
+    else
+      die("Option %s: Value %ld out of range for signed int\n", arg->name,
+          rawval);
+  } else
+    die("Option %s: Invalid character '%c'\n", arg->name, *endptr);

  return rat;
 }

-
 int arg_parse_enum(const struct arg *arg) {
  const struct arg_enum_list *listptr;
-  long int                    rawval;
-  char                       *endptr;
+  long int rawval;
+  char *endptr;

  /* First see if the value can be parsed as a raw value */
  rawval = strtol(arg->val, &endptr, 10);
  if (arg->val[0] != '\0' && endptr[0] == '\0') {
    /* Got a raw value, make sure it's valid */
    for (listptr = arg->def->enums; listptr->name; listptr++)
-      if (listptr->val == rawval)
-        return rawval;
+      if (listptr->val == rawval) return (int)rawval;
  }

  /* Next see if it can be parsed as a string */
  for (listptr = arg->def->enums; listptr->name; listptr++)
-    if (!strcmp(arg->val, listptr->name))
-      return listptr->val;
+    if (!strcmp(arg->val, listptr->name)) return listptr->val;

  die("Option %s: Invalid value '%s'\n", arg->name, arg->val);
  return 0;
 }

-
 int arg_parse_enum_or_int(const struct arg *arg) {
-  if (arg->def->enums)
-    return arg_parse_enum(arg);
+  if (arg->def->enums) return arg_parse_enum(arg);
  return arg_parse_int(arg);
 }
--- a/args.h
+++ b/args.h
@@ -8,7 +8,6 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-
 #ifndef ARGS_H_
 #define ARGS_H_
 #include <stdio.h>
@@ -18,29 +17,33 @@ extern "C" {
 #endif

 struct arg {
-  char                 **argv;
-  const char            *name;
-  const char            *val;
-  unsigned int           argv_step;
-  const struct arg_def  *def;
+  char **argv;
+  const char *name;
+  const char *val;
+  unsigned int argv_step;
+  const struct arg_def *def;
 };

 struct arg_enum_list {
  const char *name;
-  int         val;
+  int val;
 };
-#define ARG_ENUM_LIST_END {0}
+#define ARG_ENUM_LIST_END \
+  { 0 }

 typedef struct arg_def {
  const char *short_name;
  const char *long_name;
-  int         has_val;
+  int has_val;
  const char *desc;
  const struct arg_enum_list *enums;
 } arg_def_t;
-#define ARG_DEF(s,l,v,d) {s,l,v,d, NULL}
-#define ARG_DEF_ENUM(s,l,v,d,e) {s,l,v,d,e}
-#define ARG_DEF_LIST_END {0}
+#define ARG_DEF(s, l, v, d) \
+  { s, l, v, d, NULL }
+#define ARG_DEF_ENUM(s, l, v, d, e) \
+  { s, l, v, d, e }
+#define ARG_DEF_LIST_END \
+  { 0 }

 struct arg arg_init(char **argv);
 int arg_match(struct arg *arg_, const struct arg_def *def, char **argv);
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -29,11 +29,6 @@
 # include $(CLEAR_VARS)
 # include jni/libvpx/build/make/Android.mk
 #
-# There are currently two TARGET_ARCH_ABI targets for ARM.
-# armeabi and armeabi-v7a.  armeabi-v7a is selected by creating an
-# Application.mk in the jni directory that contains:
-# APP_ABI := armeabi-v7a
-#
 # By default libvpx will detect at runtime the existance of NEON extension.
 # For this we import the 'cpufeatures' module from the NDK sources.
 # libvpx can also be configured without this runtime detection method.
@@ -42,31 +37,49 @@
 #     --disable-neon-asm
 # will remove any NEON dependency.

-# To change to building armeabi, run ./libvpx/configure again, but with
-# --target=armv6-android-gcc and modify the Application.mk file to
-# set APP_ABI := armeabi
 #
 # Running ndk-build will build libvpx and include it in your project.
 #

+# Alternatively, building the examples and unit tests can be accomplished in the
+# following way:
+#
+# Create a standalone toolchain from the NDK:
+# https://developer.android.com/ndk/guides/standalone_toolchain.html
+#
+# For example - to test on arm64 devices with clang:
+# $NDK/build/tools/make_standalone_toolchain.py \
+#   --arch arm64 --install-dir=/tmp/my-android-toolchain
+# export PATH=/tmp/my-android-toolchain/bin:$PATH
+# CROSS=aarch64-linux-android- CC=clang CXX=clang++ /path/to/libvpx/configure \
+#   --target=arm64-android-gcc
+#
+# Push the resulting binaries to a device and run them:
+# adb push test_libvpx /data/tmp/test_libvpx
+# adb shell /data/tmp/test_libvpx --gtest_filter=\*Sixtap\*
+#
+# Make sure to push the test data as well and set LIBVPX_TEST_DATA
+
 CONFIG_DIR := $(LOCAL_PATH)/
 LIBVPX_PATH := $(LOCAL_PATH)/libvpx
 ASM_CNV_PATH_LOCAL := $(TARGET_ARCH_ABI)/ads2gas
 ASM_CNV_PATH := $(LOCAL_PATH)/$(ASM_CNV_PATH_LOCAL)
+ifneq ($(V),1)
+  qexec := @
+endif

 # Use the makefiles generated by upstream configure to determine which files to
 # build. Also set any architecture-specific flags.
 ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
  include $(CONFIG_DIR)libs-armv7-android-gcc.mk
  LOCAL_ARM_MODE := arm
-else ifeq  ($(TARGET_ARCH_ABI),armeabi)
-  include $(CONFIG_DIR)libs-armv6-android-gcc.mk
-  LOCAL_ARM_MODE := arm
 else ifeq  ($(TARGET_ARCH_ABI),arm64-v8a)
-  include $(CONFIG_DIR)libs-armv8-android-gcc.mk
+  include $(CONFIG_DIR)libs-arm64-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq ($(TARGET_ARCH_ABI),x86)
  include $(CONFIG_DIR)libs-x86-android-gcc.mk
+else ifeq ($(TARGET_ARCH_ABI),x86_64)
+  include $(CONFIG_DIR)libs-x86_64-android-gcc.mk
 else ifeq ($(TARGET_ARCH_ABI),mips)
  include $(CONFIG_DIR)libs-mips-android-gcc.mk
 else
@@ -91,10 +104,10 @@ LOCAL_CFLAGS := -O3
 # like x86inc.asm and x86_abi_support.asm
 LOCAL_ASMFLAGS := -I$(LIBVPX_PATH)

-.PRECIOUS: %.asm.s
-$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm
-	@mkdir -p $(dir $@)
-	@$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@
+.PRECIOUS: %.asm.S
+$(ASM_CNV_PATH)/libvpx/%.asm.S: $(LIBVPX_PATH)/%.asm
+	$(qexec)mkdir -p $(dir $@)
+	$(qexec)$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@

 # For building *_rtcd.h, which have rules in libs.mk
 TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN)))
@@ -122,7 +135,7 @@ endif

 # Pull out assembly files, splitting NEON from the rest.  This is
 # done to specify that the NEON assembly files use NEON assembler flags.
-# x86 assembly matches %.asm, arm matches %.asm.s
+# x86 assembly matches %.asm, arm matches %.asm.S

 # x86:

@@ -130,31 +143,44 @@ CODEC_SRCS_ASM_X86 = $(filter %.asm, $(CODEC_SRCS_UNIQUE))
 LOCAL_SRC_FILES += $(foreach file, $(CODEC_SRCS_ASM_X86), libvpx/$(file))

 # arm:
-CODEC_SRCS_ASM_ARM_ALL = $(filter %.asm.s, $(CODEC_SRCS_UNIQUE))
+CODEC_SRCS_ASM_ARM_ALL = $(filter %.asm.S, $(CODEC_SRCS_UNIQUE))
 CODEC_SRCS_ASM_ARM = $(foreach v, \
                     $(CODEC_SRCS_ASM_ARM_ALL), \
                     $(if $(findstring neon,$(v)),,$(v)))
-CODEC_SRCS_ASM_ADS2GAS = $(patsubst %.s, \
-                         $(ASM_CNV_PATH_LOCAL)/libvpx/%.s, \
+CODEC_SRCS_ASM_ADS2GAS = $(patsubst %.S, \
+                         $(ASM_CNV_PATH_LOCAL)/libvpx/%.S, \
                         $(CODEC_SRCS_ASM_ARM))
 LOCAL_SRC_FILES += $(CODEC_SRCS_ASM_ADS2GAS)

 ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
+  ASM_INCLUDES := vpx_dsp/arm/idct_neon.asm.S
  CODEC_SRCS_ASM_NEON = $(foreach v, \
                        $(CODEC_SRCS_ASM_ARM_ALL),\
                        $(if $(findstring neon,$(v)),$(v),))
-  CODEC_SRCS_ASM_NEON_ADS2GAS = $(patsubst %.s, \
-                                $(ASM_CNV_PATH_LOCAL)/libvpx/%.s, \
+  CODEC_SRCS_ASM_NEON := $(filter-out $(addprefix %, $(ASM_INCLUDES)), \
+                         $(CODEC_SRCS_ASM_NEON))
+  CODEC_SRCS_ASM_NEON_ADS2GAS = $(patsubst %.S, \
+                                $(ASM_CNV_PATH_LOCAL)/libvpx/%.S, \
                                $(CODEC_SRCS_ASM_NEON))
-  LOCAL_SRC_FILES += $(patsubst %.s, \
-                     %.s.neon, \
+  LOCAL_SRC_FILES += $(patsubst %.S, \
+                     %.S.neon, \
                     $(CODEC_SRCS_ASM_NEON_ADS2GAS))
+
+  NEON_ASM_TARGETS = $(patsubst %.S, \
+                     $(ASM_CNV_PATH)/libvpx/%.S, \
+                     $(CODEC_SRCS_ASM_NEON))
+# add a dependency to the full path to the ads2gas output to ensure the
+# includes are converted first.
+ifneq ($(strip $(NEON_ASM_TARGETS)),)
+$(NEON_ASM_TARGETS): $(addprefix $(ASM_CNV_PATH)/libvpx/, $(ASM_INCLUDES))
+endif
 endif

 LOCAL_CFLAGS += \
    -DHAVE_CONFIG_H=vpx_config.h \
    -I$(LIBVPX_PATH) \
-    -I$(ASM_CNV_PATH)
+    -I$(ASM_CNV_PATH) \
+    -I$(ASM_CNV_PATH)/libvpx

 LOCAL_MODULE := libvpx

@@ -164,17 +190,20 @@ endif

 # Add a dependency to force generation of the RTCD files.
 define rtcd_dep_template
+rtcd_dep_template_SRCS := $(addprefix $(LOCAL_PATH)/, $(LOCAL_SRC_FILES))
+rtcd_dep_template_SRCS := $$(rtcd_dep_template_SRCS:.neon=)
 ifeq ($(CONFIG_VP8), yes)
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h
+$$(rtcd_dep_template_SRCS): vp8_rtcd.h
 endif
 ifeq ($(CONFIG_VP9), yes)
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp9_rtcd.h
+$$(rtcd_dep_template_SRCS): vp9_rtcd.h
 endif
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_scale_rtcd.h
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_dsp_rtcd.h
+$$(rtcd_dep_template_SRCS): vpx_scale_rtcd.h
+$$(rtcd_dep_template_SRCS): vpx_dsp_rtcd.h

-ifeq ($(TARGET_ARCH_ABI),x86)
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_config.asm
+rtcd_dep_template_CONFIG_ASM_ABIS := x86 x86_64 armeabi-v7a
+ifneq ($$(findstring $(TARGET_ARCH_ABI),$$(rtcd_dep_template_CONFIG_ASM_ABIS)),)
+$$(rtcd_dep_template_SRCS): vpx_config.asm
 endif
 endef

@@ -183,16 +212,17 @@ $(eval $(call rtcd_dep_template))
 .PHONY: clean
 clean:
 	@echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]"
-	@$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
-	@$(RM) -r $(ASM_CNV_PATH)
-	@$(RM) $(CLEAN-OBJS)
+	$(qexec)$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
+	$(qexec)$(RM) -r $(ASM_CNV_PATH)
+	$(qexec)$(RM) $(CLEAN-OBJS)

 ifeq ($(ENABLE_SHARED),1)
+  LOCAL_CFLAGS += -fPIC
  include $(BUILD_SHARED_LIBRARY)
 else
  include $(BUILD_STATIC_LIBRARY)
 endif

 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
-$(call import-module,cpufeatures)
+$(call import-module,android/cpufeatures)
 endif
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -26,7 +26,7 @@ test-no-data-check:: .DEFAULT
 testdata:: .DEFAULT
 utiltest: .DEFAULT
 exampletest-no-data-check utiltest-no-data-check: .DEFAULT
-
+test_%: .DEFAULT ;

 # Note: md5sum is not installed on OS X, but openssl is. Openssl may not be
 # installed on cygwin, so we need to autodetect here.
@@ -90,7 +90,7 @@ all:

 .PHONY: clean
 clean::
-	rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.s.o=.asm.s)
+	rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.S.o=.asm.S)
 	rm -f $(CLEAN-OBJS)

 .PHONY: clean
@@ -119,27 +119,25 @@ utiltest:
 test-no-data-check::
 exampletest-no-data-check utiltest-no-data-check:

-# Add compiler flags for intrinsic files
+# Force to realign stack always on OS/2
 ifeq ($(TOOLCHAIN), x86-os2-gcc)
-STACKREALIGN=-mstackrealign
-else
-STACKREALIGN=
+CFLAGS += -mstackrealign
 endif

 $(BUILD_PFX)%_mmx.c.d: CFLAGS += -mmmx
 $(BUILD_PFX)%_mmx.c.o: CFLAGS += -mmmx
-$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2 $(STACKREALIGN)
-$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2 $(STACKREALIGN)
-$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3 $(STACKREALIGN)
-$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3 $(STACKREALIGN)
-$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3 $(STACKREALIGN)
-$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3 $(STACKREALIGN)
-$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1 $(STACKREALIGN)
-$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1 $(STACKREALIGN)
-$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx $(STACKREALIGN)
-$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx $(STACKREALIGN)
-$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 $(STACKREALIGN)
-$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(STACKREALIGN)
+$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2
+$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2
+$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3
+$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3
+$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3
+$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3
+$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1
+$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1
+$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx
+$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx
+$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2
+$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2

 $(BUILD_PFX)%.c.d: %.c
 	$(if $(quiet),@echo "    [DEP] $@")
@@ -182,13 +180,13 @@ $(BUILD_PFX)%.asm.o: %.asm
 	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<

-$(BUILD_PFX)%.s.d: %.s
+$(BUILD_PFX)%.S.d: %.S
 	$(if $(quiet),@echo "    [DEP] $@")
 	$(qexec)mkdir -p $(dir $@)
 	$(qexec)$(SRC_PATH_BARE)/build/make/gen_asm_deps.sh \
            --build-pfx=$(BUILD_PFX) --depfile=$@ $(ASFLAGS) $< > $@

-$(BUILD_PFX)%.s.o: %.s
+$(BUILD_PFX)%.S.o: %.S
 	$(if $(quiet),@echo "    [AS] $@")
 	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<
@@ -200,8 +198,8 @@ $(BUILD_PFX)%.c.S: %.c
 	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CC) -S $(CFLAGS) -o $@ $<

-.PRECIOUS: %.asm.s
-$(BUILD_PFX)%.asm.s: %.asm
+.PRECIOUS: %.asm.S
+$(BUILD_PFX)%.asm.S: %.asm
 	$(if $(quiet),@echo "    [ASM CONVERSION] $@")
 	$(qexec)mkdir -p $(dir $@)
 	$(qexec)$(ASM_CONVERSION) <$< >$@
@@ -285,7 +283,7 @@ define archive_template
 # for creating them.
 $(1):
 	$(if $(quiet),@echo "    [AR] $$@")
-	$(qexec)$$(AR) $$(ARFLAGS) $$@ $$?
+	$(qexec)$$(AR) $$(ARFLAGS) $$@ $$^
 endef

 define so_template
@@ -420,7 +418,6 @@ ifneq ($(call enabled,DIST-SRCS),)
    DIST-SRCS-yes            += build/make/gen_asm_deps.sh
    DIST-SRCS-yes            += build/make/Makefile
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_def.sh
-    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_proj.sh
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_sln.sh
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_vcxproj.sh
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/msvs_common.sh
@@ -451,3 +448,5 @@ all: $(BUILD_TARGETS)
 install:: $(INSTALL_TARGETS)
 dist: $(INSTALL_TARGETS)
 test::
+
+.SUFFIXES:  # Delete default suffix rules
--- a/build/make/ads2gas.pl
+++ b/build/make/ads2gas.pl
@@ -138,14 +138,6 @@ while (<STDIN>)
    s/DCD(.*)/.long $1/;
    s/DCB(.*)/.byte $1/;

-    # RN to .req
-    if (s/RN\s+([Rr]\d+|lr)/.req $1/)
-    {
-        print;
-        print "$comment_sub$comment\n" if defined $comment;
-        next;
-    }
-
    # Make function visible to linker, and make additional symbol with
    # prepended underscore
    s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/;
--- a/build/make/ads2gas_apple.pl
+++ b/build/make/ads2gas_apple.pl
@@ -18,12 +18,6 @@
 # Usage: cat inputfile | perl ads2gas_apple.pl > outputfile
 #

-my $chromium = 0;
-
-foreach my $arg (@ARGV) {
-    $chromium = 1 if ($arg eq "-chromium");
-}
-
 print "@ This file was created from a .asm file\n";
 print "@  using the ads2gas_apple.pl script.\n\n";
 print "\t.set WIDE_REFERENCE, 0\n";
@@ -126,18 +120,6 @@ while (<STDIN>)
    s/DCD(.*)/.long $1/;
    s/DCB(.*)/.byte $1/;

-    # Build a hash of all the register - alias pairs.
-    if (s/(.*)RN(.*)/$1 .req $2/g)
-    {
-        $register_aliases{trim($1)} = trim($2);
-        next;
-    }
-
-    while (($key, $value) = each(%register_aliases))
-    {
-        s/\b$key\b/$value/g;
-    }
-
    # Make function visible to linker, and make additional symbol with
    # prepended underscore
    s/EXPORT\s+\|([\$\w]*)\|/.globl _$1\n\t.globl $1/;
@@ -218,18 +200,5 @@ while (<STDIN>)
    s/\bMEND\b/.endm/;              # No need to tell it where to stop assembling
    next if /^\s*END\s*$/;

-    # Clang used by Chromium differs slightly from clang in XCode in what it
-    # will accept in the assembly.
-    if ($chromium) {
-        s/qsubaddx/qsax/i;
-        s/qaddsubx/qasx/i;
-        s/ldrneb/ldrbne/i;
-        s/ldrneh/ldrhne/i;
-        s/(vqshrun\.s16 .*, \#)0$/${1}8/i;
-
-        # http://llvm.org/bugs/show_bug.cgi?id=16022
-        s/\.include/#include/;
-    }
-
    print;
 }
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -73,6 +73,7 @@ Build options:
  --target=TARGET             target platform tuple [generic-gnu]
  --cpu=CPU                   optimize for a specific cpu rather than a family
  --extra-cflags=ECFLAGS      add ECFLAGS to CFLAGS [$CFLAGS]
+  --extra-cxxflags=ECXXFLAGS  add ECXXFLAGS to CXXFLAGS [$CXXFLAGS]
  ${toggle_extra_warnings}    emit harmless warnings (always non-fatal)
  ${toggle_werror}            treat warnings as errors, if possible
                              (not available with all compilers)
@@ -184,6 +185,7 @@ add_extralibs() {
 #
 # Boolean Manipulation Functions
 #
+
 enable_feature(){
  set_all yes $*
 }
@@ -200,6 +202,24 @@ disabled(){
  eval test "x\$$1" = "xno"
 }

+enable_codec(){
+  enabled "${1}" || echo "  enabling ${1}"
+  enable_feature "${1}"
+
+  is_in "${1}" vp8 vp9 && enable_feature "${1}_encoder" "${1}_decoder"
+}
+
+disable_codec(){
+  disabled "${1}" || echo "  disabling ${1}"
+  disable_feature "${1}"
+
+  is_in "${1}" vp8 vp9 && disable_feature "${1}_encoder" "${1}_decoder"
+}
+
+# Iterates through positional parameters, checks to confirm the parameter has
+# not been explicitly (force) disabled, and enables the setting controlled by
+# the parameter when the setting is not disabled.
+# Note: Does NOT alter RTCD generation options ($RTCD_OPTIONS).
 soft_enable() {
  for var in $*; do
    if ! disabled $var; then
@@ -209,6 +229,10 @@ soft_enable() {
  done
 }

+# Iterates through positional parameters, checks to confirm the parameter has
+# not been explicitly (force) enabled, and disables the setting controlled by
+# the parameter when the setting is not enabled.
+# Note: Does NOT alter RTCD generation options ($RTCD_OPTIONS).
 soft_disable() {
  for var in $*; do
    if ! enabled $var; then
@@ -337,6 +361,10 @@ check_add_cflags() {
  check_cflags "$@" && add_cflags_only "$@"
 }

+check_add_cxxflags() {
+  check_cxxflags "$@" && add_cxxflags_only "$@"
+}
+
 check_add_asflags() {
  log add_asflags "$@"
  add_asflags "$@"
@@ -428,7 +456,7 @@ NM=${NM}

 CFLAGS  = ${CFLAGS}
 CXXFLAGS  = ${CXXFLAGS}
-ARFLAGS = -rus\$(if \$(quiet),c,v)
+ARFLAGS = -crs\$(if \$(quiet),,v)
 LDFLAGS = ${LDFLAGS}
 ASFLAGS = ${ASFLAGS}
 extralibs = ${extralibs}
@@ -503,24 +531,25 @@ process_common_cmdline() {
      --extra-cflags=*)
        extra_cflags="${optval}"
        ;;
+      --extra-cxxflags=*)
+        extra_cxxflags="${optval}"
+        ;;
      --enable-?*|--disable-?*)
        eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
-        if echo "${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null; then
+        if is_in ${option} ${ARCH_EXT_LIST}; then
          [ $action = "disable" ] && RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${option} "
        elif [ $action = "disable" ] && ! disabled $option ; then
-          echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
-            die_unknown $opt
+          is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt
          log_echo "  disabling $option"
        elif [ $action = "enable" ] && ! enabled $option ; then
-          echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
-            die_unknown $opt
+          is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt
          log_echo "  enabling $option"
        fi
        ${action}_feature $option
        ;;
      --require-?*)
        eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
-        if echo "${ARCH_EXT_LIST}" none | grep "^ *$option\$" >/dev/null; then
+        if is_in ${option} ${ARCH_EXT_LIST}; then
            RTCD_OPTIONS="${RTCD_OPTIONS}${opt} "
        else
            die_unknown $opt
@@ -606,7 +635,7 @@ setup_gnu_toolchain() {
  AS=${AS:-${CROSS}as}
  STRIP=${STRIP:-${CROSS}strip}
  NM=${NM:-${CROSS}nm}
-  AS_SFX=.s
+  AS_SFX=.S
  EXE_SFX=
 }

@@ -617,16 +646,41 @@ show_darwin_sdk_path() {
    xcodebuild -sdk $1 -version Path 2>/dev/null
 }

+# Print the major version number of the Darwin SDK specified by $1.
+show_darwin_sdk_major_version() {
+  xcrun --sdk $1 --show-sdk-version 2>/dev/null | cut -d. -f1
+}
+
+# Print the Xcode version.
+show_xcode_version() {
+  xcodebuild -version | head -n1 | cut -d' ' -f2
+}
+
+# Fails when Xcode version is less than 6.3.
+check_xcode_minimum_version() {
+  xcode_major=$(show_xcode_version | cut -f1 -d.)
+  xcode_minor=$(show_xcode_version | cut -f2 -d.)
+  xcode_min_major=6
+  xcode_min_minor=3
+  if [ ${xcode_major} -lt ${xcode_min_major} ]; then
+    return 1
+  fi
+  if [ ${xcode_major} -eq ${xcode_min_major} ] \
+    && [ ${xcode_minor} -lt ${xcode_min_minor} ]; then
+    return 1
+  fi
+}
+
 process_common_toolchain() {
  if [ -z "$toolchain" ]; then
    gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}"

    # detect tgt_isa
    case "$gcctarget" in
-      armv6*)
-        tgt_isa=armv6
+      aarch64*)
+        tgt_isa=arm64
        ;;
-      armv7*-hardfloat*)
+      armv7*-hardfloat* | armv7*-gnueabihf | arm-*-gnueabihf)
        tgt_isa=armv7
        float_abi=hard
        ;;
@@ -667,6 +721,10 @@ process_common_toolchain() {
        tgt_isa=x86_64
        tgt_os=darwin14
        ;;
+      *darwin15*)
+        tgt_isa=x86_64
+        tgt_os=darwin15
+        ;;
      x86_64*mingw32*)
        tgt_os=win64
        ;;
@@ -723,19 +781,27 @@ process_common_toolchain() {
  enabled shared && soft_enable pic

  # Minimum iOS version for all target platforms (darwin and iphonesimulator).
-  IOS_VERSION_MIN="6.0"
+  # Shared library framework builds are only possible on iOS 8 and later.
+  if enabled shared; then
+    IOS_VERSION_OPTIONS="--enable-shared"
+    IOS_VERSION_MIN="8.0"
+  else
+    IOS_VERSION_OPTIONS=""
+    IOS_VERSION_MIN="6.0"
+  fi

  # Handle darwin variants. Newer SDKs allow targeting older
  # platforms, so use the newest one available.
  case ${toolchain} in
    arm*-darwin*)
-      ios_sdk_dir="$(show_darwin_sdk_path iphoneos)"
-      if [ -d "${ios_sdk_dir}" ]; then
-        add_cflags  "-isysroot ${ios_sdk_dir}"
-        add_ldflags "-isysroot ${ios_sdk_dir}"
+      add_cflags "-miphoneos-version-min=${IOS_VERSION_MIN}"
+      iphoneos_sdk_dir="$(show_darwin_sdk_path iphoneos)"
+      if [ -d "${iphoneos_sdk_dir}" ]; then
+        add_cflags  "-isysroot ${iphoneos_sdk_dir}"
+        add_ldflags "-isysroot ${iphoneos_sdk_dir}"
      fi
      ;;
-    *-darwin*)
+    x86*-darwin*)
      osx_sdk_dir="$(show_darwin_sdk_path macosx)"
      if [ -d "${osx_sdk_dir}" ]; then
        add_cflags  "-isysroot ${osx_sdk_dir}"
@@ -773,6 +839,10 @@ process_common_toolchain() {
      add_cflags  "-mmacosx-version-min=10.10"
      add_ldflags "-mmacosx-version-min=10.10"
      ;;
+    *-darwin15-*)
+      add_cflags  "-mmacosx-version-min=10.11"
+      add_ldflags "-mmacosx-version-min=10.11"
+      ;;
    *-iphonesimulator-*)
      add_cflags  "-miphoneos-version-min=${IOS_VERSION_MIN}"
      add_ldflags "-miphoneos-version-min=${IOS_VERSION_MIN}"
@@ -810,17 +880,6 @@ process_common_toolchain() {
          if disabled neon && enabled neon_asm; then
            die "Disabling neon while keeping neon-asm is not supported"
          fi
-          case ${toolchain} in
-            *-darwin*)
-              # Neon is guaranteed on iOS 6+ devices, while old media extensions
-              # no longer assemble with iOS 9 SDK
-              ;;
-            *)
-              soft_enable media
-          esac
-          ;;
-        armv6)
-          soft_enable media
          ;;
      esac

@@ -828,7 +887,6 @@ process_common_toolchain() {

      case ${tgt_cc} in
        gcc)
-          CROSS=${CROSS:-arm-none-linux-gnueabi-}
          link_with_cc=gcc
          setup_gnu_toolchain
          arch_int=${tgt_isa##armv}
@@ -850,6 +908,9 @@ EOF
              check_add_cflags -mfpu=neon #-ftree-vectorize
              check_add_asflags -mfpu=neon
            fi
+          elif [ ${tgt_isa} = "arm64" ] || [ ${tgt_isa} = "armv8" ]; then
+            check_add_cflags -march=armv8-a
+            check_add_asflags -march=armv8-a
          else
            check_add_cflags -march=${tgt_isa}
            check_add_asflags -march=${tgt_isa}
@@ -865,7 +926,7 @@ EOF
          ;;
        vs*)
          asm_conversion_cmd="${source_path}/build/make/ads2armasm_ms.pl"
-          AS_SFX=.s
+          AS_SFX=.S
          msvs_arch_dir=arm-msvs
          disable_feature multithread
          disable_feature unit_tests
@@ -875,6 +936,7 @@ EOF
            # only "AppContainerApplication" which requires an AppxManifest.
            # Therefore disable the examples, just build the library.
            disable_feature examples
+            disable_feature tools
          fi
          ;;
        rvct)
@@ -917,41 +979,50 @@ EOF
          ;;

        android*)
-          SDK_PATH=${sdk_path}
-          COMPILER_LOCATION=`find "${SDK_PATH}" \
-                             -name "arm-linux-androideabi-gcc*" -print -quit`
-          TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi-
-          CC=${TOOLCHAIN_PATH}gcc
-          CXX=${TOOLCHAIN_PATH}g++
-          AR=${TOOLCHAIN_PATH}ar
-          LD=${TOOLCHAIN_PATH}gcc
-          AS=${TOOLCHAIN_PATH}as
-          STRIP=${TOOLCHAIN_PATH}strip
-          NM=${TOOLCHAIN_PATH}nm
+          if [ -n "${sdk_path}" ]; then
+            SDK_PATH=${sdk_path}
+            COMPILER_LOCATION=`find "${SDK_PATH}" \
+              -name "arm-linux-androideabi-gcc*" -print -quit`
+            TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi-
+            CC=${TOOLCHAIN_PATH}gcc
+            CXX=${TOOLCHAIN_PATH}g++
+            AR=${TOOLCHAIN_PATH}ar
+            LD=${TOOLCHAIN_PATH}gcc
+            AS=${TOOLCHAIN_PATH}as
+            STRIP=${TOOLCHAIN_PATH}strip
+            NM=${TOOLCHAIN_PATH}nm

-          if [ -z "${alt_libc}" ]; then
-            alt_libc=`find "${SDK_PATH}" -name arch-arm -print | \
-              awk '{n = split($0,a,"/"); \
+            if [ -z "${alt_libc}" ]; then
+              alt_libc=`find "${SDK_PATH}" -name arch-arm -print | \
+                awk '{n = split($0,a,"/"); \
                split(a[n-1],b,"-"); \
                print $0 " " b[2]}' | \
                sort -g -k 2 | \
                awk '{ print $1 }' | tail -1`
-          fi
+            fi

-          add_cflags "--sysroot=${alt_libc}"
-          add_ldflags "--sysroot=${alt_libc}"
+            if [ -d "${alt_libc}" ]; then
+              add_cflags "--sysroot=${alt_libc}"
+              add_ldflags "--sysroot=${alt_libc}"
+            fi

-          # linker flag that routes around a CPU bug in some
-          # Cortex-A8 implementations (NDK Dev Guide)
-          add_ldflags "-Wl,--fix-cortex-a8"
+            # linker flag that routes around a CPU bug in some
+            # Cortex-A8 implementations (NDK Dev Guide)
+            add_ldflags "-Wl,--fix-cortex-a8"

-          enable_feature pic
-          soft_enable realtime_only
-          if [ ${tgt_isa} = "armv7" ]; then
-            soft_enable runtime_cpu_detect
-          fi
-          if enabled runtime_cpu_detect; then
-            add_cflags "-I${SDK_PATH}/sources/android/cpufeatures"
+            enable_feature pic
+            soft_enable realtime_only
+            if [ ${tgt_isa} = "armv7" ]; then
+              soft_enable runtime_cpu_detect
+            fi
+            if enabled runtime_cpu_detect; then
+              add_cflags "-I${SDK_PATH}/sources/android/cpufeatures"
+            fi
+          else
+            echo "Assuming standalone build with NDK toolchain."
+            echo "See build/make/Android.mk for details."
+            check_add_ldflags -static
+            soft_enable unit_tests
          fi
          ;;

@@ -964,19 +1035,8 @@ EOF
          STRIP="$(${XCRUN_FIND} strip)"
          NM="$(${XCRUN_FIND} nm)"
          RANLIB="$(${XCRUN_FIND} ranlib)"
-          AS_SFX=.s
-
-          # Special handling of ld for armv6 because libclang_rt.ios.a does
-          # not contain armv6 support in Apple's clang package:
-          #   Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn).
-          # TODO(tomfinegan): Remove this. Our minimum iOS version (6.0)
-          # renders support for armv6 unnecessary because the 3GS and up
-          # support neon.
-          if [ "${tgt_isa}" = "armv6" ]; then
-            LD="$(${XCRUN_FIND} ld)"
-          else
-            LD="${CXX:-$(${XCRUN_FIND} ld)}"
-          fi
+          AS_SFX=.S
+          LD="${CXX:-$(${XCRUN_FIND} ld)}"

          # ASFLAGS is written here instead of using check_add_asflags
          # because we need to overwrite all of ASFLAGS and purge the
@@ -1002,7 +1062,26 @@ EOF
            [ -d "${try_dir}" ] && add_ldflags -L"${try_dir}"
          done

+          case ${tgt_isa} in
+            armv7|armv7s|armv8|arm64)
+              if enabled neon && ! check_xcode_minimum_version; then
+                soft_disable neon
+                log_echo "  neon disabled: upgrade Xcode (need v6.3+)."
+                if enabled neon_asm; then
+                  soft_disable neon_asm
+                  log_echo "  neon_asm disabled: upgrade Xcode (need v6.3+)."
+                fi
+              fi
+              ;;
+          esac
+
          asm_conversion_cmd="${source_path}/build/make/ads2gas_apple.pl"
+
+          if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ]; then
+            check_add_cflags -fembed-bitcode
+            check_add_asflags -fembed-bitcode
+            check_add_ldflags -fembed-bitcode
+          fi
          ;;

        linux*)
@@ -1010,7 +1089,7 @@ EOF
          if enabled rvct; then
            # Check if we have CodeSourcery GCC in PATH. Needed for
            # libraries
-            hash arm-none-linux-gnueabi-gcc 2>&- || \
+            which arm-none-linux-gnueabi-gcc 2>&- || \
              die "Couldn't find CodeSourcery GCC from PATH"

            # Use armcc as a linker to enable translation of
@@ -1045,13 +1124,13 @@ EOF
      if [ -n "${tune_cpu}" ]; then
        case ${tune_cpu} in
          p5600)
-            check_add_cflags -mips32r5 -funroll-loops -mload-store-pairs
+            check_add_cflags -mips32r5 -mload-store-pairs
            check_add_cflags -msched-weight -mhard-float -mfp64
            check_add_asflags -mips32r5 -mhard-float -mfp64
            check_add_ldflags -mfp64
            ;;
-          i6400)
-            check_add_cflags -mips64r6 -mabi=64 -funroll-loops -msched-weight 
+          i6400|p6600)
+            check_add_cflags -mips64r6 -mabi=64 -msched-weight
            check_add_cflags  -mload-store-pairs -mhard-float -mfp64
            check_add_asflags -mips64r6 -mabi=64 -mhard-float -mfp64
            check_add_ldflags -mips64r6 -mabi=64 -mfp64
@@ -1078,10 +1157,12 @@ EOF
          CC=${CC:-${CROSS}gcc}
          CXX=${CXX:-${CROSS}g++}
          LD=${LD:-${CROSS}gcc}
-          CROSS=${CROSS:-g}
+          CROSS=${CROSS-g}
          ;;
        os2)
+          disable_feature pic
          AS=${AS:-nasm}
+          add_ldflags -Zhigh-mem
          ;;
      esac

@@ -1129,6 +1210,12 @@ EOF
              soft_disable avx2
              ;;
          esac
+          case $vc_version in
+            7|8|9)
+              echo "${tgt_cc} omits stdint.h, disabling webm-io..."
+              soft_disable webm_io
+              ;;
+          esac
          ;;
      esac

@@ -1149,32 +1236,43 @@ EOF
      soft_enable runtime_cpu_detect
      # We can't use 'check_cflags' until the compiler is configured and CC is
      # populated.
-      check_gcc_machine_option mmx
-      check_gcc_machine_option sse
-      check_gcc_machine_option sse2
-      check_gcc_machine_option sse3
-      check_gcc_machine_option ssse3
-      check_gcc_machine_option sse4 sse4_1
-      check_gcc_machine_option avx
-      check_gcc_machine_option avx2
-
-      case "${AS}" in
-        auto|"")
-          which nasm >/dev/null 2>&1 && AS=nasm
-          which yasm >/dev/null 2>&1 && AS=yasm
-          if [ "${AS}" = nasm ] ; then
-            # Apple ships version 0.98 of nasm through at least Xcode 6. Revisit
-            # this check if they start shipping a compatible version.
-            apple=`nasm -v | grep "Apple"`
-            [ -n "${apple}" ] \
-              && echo "Unsupported version of nasm: ${apple}" \
-              && AS=""
+      for ext in ${ARCH_EXT_LIST_X86}; do
+        # disable higher order extensions to simplify asm dependencies
+        if [ "$disable_exts" = "yes" ]; then
+          if ! disabled $ext; then
+            RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${ext} "
+            disable_feature $ext
          fi
-          [ "${AS}" = auto ] || [ -z "${AS}" ] \
-            && die "Neither yasm nor nasm have been found"
-          ;;
-      esac
-      log_echo "  using $AS"
+        elif disabled $ext; then
+          disable_exts="yes"
+        else
+          # use the shortened version for the flag: sse4_1 -> sse4
+          check_gcc_machine_option ${ext%_*} $ext
+        fi
+      done
+
+      if enabled external_build; then
+        log_echo "  skipping assembler detection"
+      else
+        case "${AS}" in
+          auto|"")
+            which nasm >/dev/null 2>&1 && AS=nasm
+            which yasm >/dev/null 2>&1 && AS=yasm
+            if [ "${AS}" = nasm ] ; then
+              # Apple ships version 0.98 of nasm through at least Xcode 6. Revisit
+              # this check if they start shipping a compatible version.
+              apple=`nasm -v | grep "Apple"`
+              [ -n "${apple}" ] \
+                && echo "Unsupported version of nasm: ${apple}" \
+                && AS=""
+            fi
+            [ "${AS}" = auto ] || [ -z "${AS}" ] \
+              && die "Neither yasm nor nasm have been found." \
+                     "See the prerequisites section in the README for more info."
+            ;;
+        esac
+        log_echo "  using $AS"
+      fi
      [ "${AS##*/}" = nasm ] && add_asflags -Ox
      AS_SFX=.asm
      case  ${tgt_os} in
@@ -1210,6 +1308,13 @@ EOF
          enabled x86 && sim_arch="-arch i386" || sim_arch="-arch x86_64"
          add_cflags  ${sim_arch}
          add_ldflags ${sim_arch}
+
+          if [ "$(show_darwin_sdk_major_version iphonesimulator)" -gt 8 ]; then
+            # yasm v1.3.0 doesn't know what -fembed-bitcode means, so turning it
+            # on is pointless (unless building a C-only lib). Warn the user, but
+            # do nothing here.
+            log "Warning: Bitcode embed disabled for simulator targets."
+          fi
          ;;
        os2)
          add_asflags -f aout
@@ -1262,10 +1367,6 @@ EOF
    fi
  fi

-  if [ "${tgt_isa}" = "x86_64" ] || [ "${tgt_isa}" = "x86" ]; then
-    soft_enable use_x86inc
-  fi
-
  # Position Independent Code (PIC) support, for building relocatable
  # shared objects
  enabled gcc && enabled pic && check_add_cflags -fPIC
@@ -1295,6 +1396,7 @@ EOF
      *-win*-vs*)
        ;;
      *-android-gcc)
+        # bionic includes basic pthread functionality, obviating -lpthread.
        ;;
      *)
        check_header pthread.h && add_extralibs -lpthread
@@ -1323,12 +1425,6 @@ EOF
    add_cflags -D_LARGEFILE_SOURCE
    add_cflags -D_FILE_OFFSET_BITS=64
  fi
-
-  # append any user defined extra cflags
-  if [ -n "${extra_cflags}" ] ; then
-    check_add_cflags ${extra_cflags} || \
-    die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
-  fi
 }

 process_toolchain() {
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -1,486 +0,0 @@
-#!/bin/bash
-##
-##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license
-##  that can be found in the LICENSE file in the root of the source
-##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may
-##  be found in the AUTHORS file in the root of the source tree.
-##
-
-self=$0
-self_basename=${self##*/}
-self_dirname=$(dirname "$0")
-
-. "$self_dirname/msvs_common.sh"|| exit 127
-
-show_help() {
-    cat <<EOF
-Usage: ${self_basename} --name=projname [options] file1 [file2 ...]
-
-This script generates a Visual Studio project file from a list of source
-code files.
-
-Options:
-    --help                      Print this message
-    --exe                       Generate a project for building an Application
-    --lib                       Generate a project for creating a static library
-    --dll                       Generate a project for creating a dll
-    --static-crt                Use the static C runtime (/MT)
-    --target=isa-os-cc          Target specifier (required)
-    --out=filename              Write output to a file [stdout]
-    --name=project_name         Name of the project (required)
-    --proj-guid=GUID            GUID to use for the project
-    --module-def=filename       File containing export definitions (for DLLs)
-    --ver=version               Version (7,8,9) of visual studio to generate for
-    --src-path-bare=dir         Path to root of source tree
-    -Ipath/to/include           Additional include directories
-    -DFLAG[=value]              Preprocessor macros to define
-    -Lpath/to/lib               Additional library search paths
-    -llibname                   Library to link against
-EOF
-    exit 1
-}
-
-generate_filter() {
-    local var=$1
-    local name=$2
-    local pats=$3
-    local file_list_sz
-    local i
-    local f
-    local saveIFS="$IFS"
-    local pack
-    echo "generating filter '$name' from ${#file_list[@]} files" >&2
-    IFS=*
-
-    open_tag Filter \
-        Name=$name \
-        Filter=$pats \
-        UniqueIdentifier=`generate_uuid` \
-
-    file_list_sz=${#file_list[@]}
-    for i in ${!file_list[@]}; do
-        f=${file_list[i]}
-        for pat in ${pats//;/$IFS}; do
-            if [ "${f##*.}" == "$pat" ]; then
-                unset file_list[i]
-
-                objf=$(echo ${f%.*}.obj \
-                       | sed -e "s,$src_path_bare,," \
-                             -e 's/^[\./]\+//g' -e 's,[:/ ],_,g')
-                open_tag File RelativePath="$f"
-
-                if [ "$pat" == "asm" ] && $asm_use_custom_step; then
-                    for plat in "${platforms[@]}"; do
-                        for cfg in Debug Release; do
-                            open_tag FileConfiguration \
-                                Name="${cfg}|${plat}" \
-
-                            tag Tool \
-                                Name="VCCustomBuildTool" \
-                                Description="Assembling \$(InputFileName)" \
-                                CommandLine="$(eval echo \$asm_${cfg}_cmdline) -o \$(IntDir)\\$objf" \
-                                Outputs="\$(IntDir)\\$objf" \
-
-                            close_tag FileConfiguration
-                        done
-                    done
-                fi
-                if [ "$pat" == "c" ] || \
-                   [ "$pat" == "cc" ] || [ "$pat" == "cpp" ]; then
-                    for plat in "${platforms[@]}"; do
-                        for cfg in Debug Release; do
-                            open_tag FileConfiguration \
-                                Name="${cfg}|${plat}" \
-
-                            tag Tool \
-                                Name="VCCLCompilerTool" \
-                                ObjectFile="\$(IntDir)\\$objf" \
-
-                            close_tag FileConfiguration
-                        done
-                    done
-                fi
-                close_tag File
-
-                break
-            fi
-        done
-    done
-
-    close_tag Filter
-    IFS="$saveIFS"
-}
-
-# Process command line
-unset target
-for opt in "$@"; do
-    optval="${opt#*=}"
-    case "$opt" in
-        --help|-h) show_help
-        ;;
-        --target=*) target="${optval}"
-        ;;
-        --out=*) outfile="$optval"
-        ;;
-        --name=*) name="${optval}"
-        ;;
-        --proj-guid=*) guid="${optval}"
-        ;;
-        --module-def=*) link_opts="${link_opts} ModuleDefinitionFile=${optval}"
-        ;;
-        --exe) proj_kind="exe"
-        ;;
-        --dll) proj_kind="dll"
-        ;;
-        --lib) proj_kind="lib"
-        ;;
-        --src-path-bare=*)
-            src_path_bare=$(fix_path "$optval")
-            src_path_bare=${src_path_bare%/}
-        ;;
-        --static-crt) use_static_runtime=true
-        ;;
-        --ver=*)
-            vs_ver="$optval"
-            case "$optval" in
-                [789])
-                ;;
-                *) die Unrecognized Visual Studio Version in $opt
-                ;;
-            esac
-        ;;
-        -I*)
-            opt=${opt##-I}
-            opt=$(fix_path "$opt")
-            opt="${opt%/}"
-            incs="${incs}${incs:+;}&quot;${opt}&quot;"
-            yasmincs="${yasmincs} -I&quot;${opt}&quot;"
-        ;;
-        -D*) defines="${defines}${defines:+;}${opt##-D}"
-        ;;
-        -L*) # fudge . to $(OutDir)
-            if [ "${opt##-L}" == "." ]; then
-                libdirs="${libdirs}${libdirs:+;}&quot;\$(OutDir)&quot;"
-            else
-                 # Also try directories for this platform/configuration
-                 opt=${opt##-L}
-                 opt=$(fix_path "$opt")
-                 libdirs="${libdirs}${libdirs:+;}&quot;${opt}&quot;"
-                 libdirs="${libdirs}${libdirs:+;}&quot;${opt}/\$(PlatformName)/\$(ConfigurationName)&quot;"
-                 libdirs="${libdirs}${libdirs:+;}&quot;${opt}/\$(PlatformName)&quot;"
-            fi
-        ;;
-        -l*) libs="${libs}${libs:+ }${opt##-l}.lib"
-        ;;
-        -*) die_unknown $opt
-        ;;
-        *)
-            # The paths in file_list are fixed outside of the loop.
-            file_list[${#file_list[@]}]="$opt"
-            case "$opt" in
-                 *.asm) uses_asm=true
-                 ;;
-            esac
-        ;;
-    esac
-done
-
-# Make one call to fix_path for file_list to improve performance.
-fix_file_list
-
-outfile=${outfile:-/dev/stdout}
-guid=${guid:-`generate_uuid`}
-asm_use_custom_step=false
-uses_asm=${uses_asm:-false}
-case "${vs_ver:-8}" in
-    7) vs_ver_id="7.10"
-       asm_use_custom_step=$uses_asm
-       warn_64bit='Detect64BitPortabilityProblems=true'
-    ;;
-    8) vs_ver_id="8.00"
-       asm_use_custom_step=$uses_asm
-       warn_64bit='Detect64BitPortabilityProblems=true'
-    ;;
-    9) vs_ver_id="9.00"
-       asm_use_custom_step=$uses_asm
-       warn_64bit='Detect64BitPortabilityProblems=false'
-    ;;
-esac
-
-[ -n "$name" ] || die "Project name (--name) must be specified!"
-[ -n "$target" ] || die "Target (--target) must be specified!"
-
-if ${use_static_runtime:-false}; then
-    release_runtime=0
-    debug_runtime=1
-    lib_sfx=mt
-else
-    release_runtime=2
-    debug_runtime=3
-    lib_sfx=md
-fi
-
-# Calculate debug lib names: If a lib ends in ${lib_sfx}.lib, then rename
-# it to ${lib_sfx}d.lib. This precludes linking to release libs from a
-# debug exe, so this may need to be refactored later.
-for lib in ${libs}; do
-    if [ "$lib" != "${lib%${lib_sfx}.lib}" ]; then
-        lib=${lib%.lib}d.lib
-    fi
-    debug_libs="${debug_libs}${debug_libs:+ }${lib}"
-done
-
-
-# List Keyword for this target
-case "$target" in
-    x86*) keyword="ManagedCProj"
-    ;;
-    *) die "Unsupported target $target!"
-esac
-
-# List of all platforms supported for this target
-case "$target" in
-    x86_64*)
-        platforms[0]="x64"
-        asm_Debug_cmdline="yasm -Xvc -g cv8 -f win64 ${yasmincs} &quot;\$(InputPath)&quot;"
-        asm_Release_cmdline="yasm -Xvc -f win64 ${yasmincs} &quot;\$(InputPath)&quot;"
-    ;;
-    x86*)
-        platforms[0]="Win32"
-        asm_Debug_cmdline="yasm -Xvc -g cv8 -f win32 ${yasmincs} &quot;\$(InputPath)&quot;"
-        asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} &quot;\$(InputPath)&quot;"
-    ;;
-    *) die "Unsupported target $target!"
-    ;;
-esac
-
-generate_vcproj() {
-    case "$proj_kind" in
-        exe) vs_ConfigurationType=1
-        ;;
-        dll) vs_ConfigurationType=2
-        ;;
-        *)   vs_ConfigurationType=4
-        ;;
-    esac
-
-    echo "<?xml version=\"1.0\" encoding=\"Windows-1252\"?>"
-    open_tag VisualStudioProject \
-        ProjectType="Visual C++" \
-        Version="${vs_ver_id}" \
-        Name="${name}" \
-        ProjectGUID="{${guid}}" \
-        RootNamespace="${name}" \
-        Keyword="${keyword}" \
-
-    open_tag Platforms
-    for plat in "${platforms[@]}"; do
-        tag Platform Name="$plat"
-    done
-    close_tag Platforms
-
-    open_tag Configurations
-    for plat in "${platforms[@]}"; do
-        plat_no_ws=`echo $plat | sed 's/[^A-Za-z0-9_]/_/g'`
-        open_tag Configuration \
-            Name="Debug|$plat" \
-            OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \
-            IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \
-            ConfigurationType="$vs_ConfigurationType" \
-            CharacterSet="1" \
-
-        case "$target" in
-            x86*)
-                case "$name" in
-                    vpx)
-                        tag Tool \
-                            Name="VCCLCompilerTool" \
-                            Optimization="0" \
-                            AdditionalIncludeDirectories="$incs" \
-                            PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
-                            RuntimeLibrary="$debug_runtime" \
-                            UsePrecompiledHeader="0" \
-                            WarningLevel="3" \
-                            DebugInformationFormat="2" \
-                            $warn_64bit \
-
-                        $uses_asm && tag Tool Name="YASM"  IncludePaths="$incs" Debug="true"
-                    ;;
-                    *)
-                        tag Tool \
-                            Name="VCCLCompilerTool" \
-                            Optimization="0" \
-                            AdditionalIncludeDirectories="$incs" \
-                            PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
-                            RuntimeLibrary="$debug_runtime" \
-                            UsePrecompiledHeader="0" \
-                            WarningLevel="3" \
-                            DebugInformationFormat="2" \
-                            $warn_64bit \
-
-                        $uses_asm && tag Tool Name="YASM"  IncludePaths="$incs" Debug="true"
-                    ;;
-                esac
-            ;;
-        esac
-
-        case "$proj_kind" in
-            exe)
-                case "$target" in
-                    x86*)
-                        case "$name" in
-                            *)
-                                tag Tool \
-                                    Name="VCLinkerTool" \
-                                    AdditionalDependencies="$debug_libs \$(NoInherit)" \
-                                    AdditionalLibraryDirectories="$libdirs" \
-                                    GenerateDebugInformation="true" \
-                                    ProgramDatabaseFile="\$(OutDir)/${name}.pdb" \
-                            ;;
-                        esac
-                    ;;
-                 esac
-            ;;
-            lib)
-                case "$target" in
-                    x86*)
-                        tag Tool \
-                            Name="VCLibrarianTool" \
-                            OutputFile="\$(OutDir)/${name}${lib_sfx}d.lib" \
-
-                    ;;
-                esac
-            ;;
-            dll)
-                tag Tool \
-                    Name="VCLinkerTool" \
-                    AdditionalDependencies="\$(NoInherit)" \
-                    LinkIncremental="2" \
-                    GenerateDebugInformation="true" \
-                    AssemblyDebug="1" \
-                    TargetMachine="1" \
-                    $link_opts \
-
-            ;;
-        esac
-
-        close_tag Configuration
-
-        open_tag Configuration \
-            Name="Release|$plat" \
-            OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \
-            IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \
-            ConfigurationType="$vs_ConfigurationType" \
-            CharacterSet="1" \
-            WholeProgramOptimization="0" \
-
-        case "$target" in
-            x86*)
-                case "$name" in
-                    vpx)
-                        tag Tool \
-                            Name="VCCLCompilerTool" \
-                            Optimization="2" \
-                            FavorSizeorSpeed="1" \
-                            AdditionalIncludeDirectories="$incs" \
-                            PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
-                            RuntimeLibrary="$release_runtime" \
-                            UsePrecompiledHeader="0" \
-                            WarningLevel="3" \
-                            DebugInformationFormat="0" \
-                            $warn_64bit \
-
-                        $uses_asm && tag Tool Name="YASM"  IncludePaths="$incs"
-                    ;;
-                    *)
-                        tag Tool \
-                            Name="VCCLCompilerTool" \
-                            AdditionalIncludeDirectories="$incs" \
-                            Optimization="2" \
-                            FavorSizeorSpeed="1" \
-                            PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
-                            RuntimeLibrary="$release_runtime" \
-                            UsePrecompiledHeader="0" \
-                            WarningLevel="3" \
-                            DebugInformationFormat="0" \
-                            $warn_64bit \
-
-                        $uses_asm && tag Tool Name="YASM"  IncludePaths="$incs"
-                    ;;
-                esac
-            ;;
-        esac
-
-        case "$proj_kind" in
-            exe)
-                case "$target" in
-                    x86*)
-                        case "$name" in
-                            *)
-                                tag Tool \
-                                    Name="VCLinkerTool" \
-                                    AdditionalDependencies="$libs \$(NoInherit)" \
-                                    AdditionalLibraryDirectories="$libdirs" \
-
-                            ;;
-                        esac
-                    ;;
-                 esac
-            ;;
-            lib)
-                case "$target" in
-                    x86*)
-                        tag Tool \
-                            Name="VCLibrarianTool" \
-                            OutputFile="\$(OutDir)/${name}${lib_sfx}.lib" \
-
-                    ;;
-                esac
-            ;;
-            dll) # note differences to debug version: LinkIncremental, AssemblyDebug
-                tag Tool \
-                    Name="VCLinkerTool" \
-                    AdditionalDependencies="\$(NoInherit)" \
-                    LinkIncremental="1" \
-                    GenerateDebugInformation="true" \
-                    TargetMachine="1" \
-                    $link_opts \
-
-            ;;
-        esac
-
-        close_tag Configuration
-    done
-    close_tag Configurations
-
-    open_tag Files
-    generate_filter srcs   "Source Files"   "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx"
-    generate_filter hdrs   "Header Files"   "h;hm;inl;inc;xsd"
-    generate_filter resrcs "Resource Files" "rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
-    generate_filter resrcs "Build Files"    "mk"
-    close_tag Files
-
-    tag       Globals
-    close_tag VisualStudioProject
-
-    # This must be done from within the {} subshell
-    echo "Ignored files list (${#file_list[@]} items) is:" >&2
-    for f in "${file_list[@]}"; do
-        echo "    $f" >&2
-    done
-}
-
-generate_vcproj |
-    sed  -e '/"/s;\([^ "]\)/;\1\\;g' > ${outfile}
-
-exit
-<!--
-TODO: Add any files not captured by filters.
-                <File
-                        RelativePath=".\ReadMe.txt"
-                        >
-                </File>
-->
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -19,13 +19,13 @@ show_help() {
    cat <<EOF
 Usage: ${self_basename} [options] file1 [file2 ...]

-This script generates a Visual Studio 2005 solution file from a list of project
+This script generates a Visual Studio solution file from a list of project
 files.

 Options:
    --help                      Print this message
    --out=outfile               Redirect output to a file
-    --ver=version               Version (7,8,9,10,11) of visual studio to generate for
+    --ver=version               Version (7,8,9,10,11,12,14) of visual studio to generate for
    --target=isa-os-cc          Target specifier
 EOF
    exit 1
@@ -55,16 +55,11 @@ indent_pop() {

 parse_project() {
    local file=$1
-    if [ "$sfx" = "vcproj" ]; then
-        local name=`grep Name "$file" | awk 'BEGIN {FS="\""}{if (NR==1) print $2}'`
-        local guid=`grep ProjectGUID "$file" | awk 'BEGIN {FS="\""}{if (NR==1) print $2}'`
-    else
-        local name=`grep RootNamespace "$file" | sed 's,.*<.*>\(.*\)</.*>.*,\1,'`
-        local guid=`grep ProjectGuid "$file" | sed 's,.*<.*>\(.*\)</.*>.*,\1,'`
-    fi
+    local name=`grep RootNamespace "$file" | sed 's,.*<.*>\(.*\)</.*>.*,\1,'`
+    local guid=`grep ProjectGuid "$file" | sed 's,.*<.*>\(.*\)</.*>.*,\1,'`

    # save the project GUID to a varaible, normalizing to the basename of the
-    # vcproj file without the extension
+    # vcxproj file without the extension
    local var
    var=${file##*/}
    var=${var%%.${sfx}}
@@ -72,13 +67,8 @@ parse_project() {
    eval "${var}_name=$name"
    eval "${var}_guid=$guid"

-    if [ "$sfx" = "vcproj" ]; then
-        cur_config_list=`grep -A1 '<Configuration' $file |
-            grep Name | cut -d\" -f2`
-    else
-        cur_config_list=`grep -B1 'Label="Configuration"' $file |
-            grep Condition | cut -d\' -f4`
-    fi
+    cur_config_list=`grep -B1 'Label="Configuration"' $file |
+        grep Condition | cut -d\' -f4`
    new_config_list=$(for i in $config_list $cur_config_list; do
        echo $i
    done | sort | uniq)
@@ -103,25 +93,6 @@ process_project() {
    eval "${var}_guid=$guid"

    echo "Project(\"{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}\") = \"$name\", \"$file\", \"$guid\""
-    indent_push
-
-    eval "local deps=\"\${${var}_deps}\""
-    if [ -n "$deps" ] && [ "$sfx" = "vcproj" ]; then
-        echo "${indent}ProjectSection(ProjectDependencies) = postProject"
-        indent_push
-
-        for dep in $deps; do
-            eval "local dep_guid=\${${dep}_guid}"
-            [ -z "${dep_guid}" ] && die "Unknown GUID for $dep (dependency of $var)"
-            echo "${indent}$dep_guid = $dep_guid"
-        done
-
-        indent_pop
-        echo "${indent}EndProjectSection"
-
-    fi
-
-    indent_pop
    echo "EndProject"
 }

@@ -191,11 +162,7 @@ process_makefile() {
    IFS=$'\r'$'\n'
    local TAB=$'\t'
    cat <<EOF
-ifeq (\$(CONFIG_VS_VERSION),7)
-MSBUILD_TOOL := devenv.com
-else
 MSBUILD_TOOL := msbuild.exe
-endif
 found_devenv := \$(shell which \$(MSBUILD_TOOL) >/dev/null 2>&1 && echo yes)
 .nodevenv.once:
 ${TAB}@echo "  * \$(MSBUILD_TOOL) not found in path."
@@ -204,7 +171,7 @@ ${TAB}@echo "  * You will have to build all configurations manually using the"
 ${TAB}@echo "  * Visual Studio IDE. To allow make to build them automatically,"
 ${TAB}@echo "  * add the Common7/IDE directory of your Visual Studio"
 ${TAB}@echo "  * installation to your path, eg:"
-${TAB}@echo "  *   C:\Program Files\Microsoft Visual Studio 8\Common7\IDE"
+${TAB}@echo "  *   C:\Program Files\Microsoft Visual Studio 10.0\Common7\IDE"
 ${TAB}@echo "  * "
 ${TAB}@touch \$@
 CLEAN-OBJS += \$(if \$(found_devenv),,.nodevenv.once)
@@ -221,16 +188,9 @@ clean::
 ${TAB}rm -rf "$platform"/"$config"
 .PHONY: $nows_sln_config
 ifneq (\$(found_devenv),)
-  ifeq (\$(CONFIG_VS_VERSION),7)
-$nows_sln_config: $outfile
-${TAB}\$(MSBUILD_TOOL) $outfile -build "$config"
-
-  else
 $nows_sln_config: $outfile
 ${TAB}\$(MSBUILD_TOOL) $outfile -m -t:Build \\
 ${TAB}${TAB}-p:Configuration="$config" -p:Platform="$platform"
-
-  endif
 else
 $nows_sln_config: $outfile .nodevenv.once
 ${TAB}@echo "  * Skipping build of $sln_config (\$(MSBUILD_TOOL) not in path)."
@@ -255,23 +215,12 @@ for opt in "$@"; do
    ;;
    --ver=*) vs_ver="$optval"
             case $optval in
-             [789]|10|11|12)
+             10|11|12|14)
             ;;
             *) die Unrecognized Visual Studio Version in $opt
             ;;
             esac
    ;;
-    --ver=*) vs_ver="$optval"
-             case $optval in
-             7) sln_vers="8.00"
-                sln_vers_str="Visual Studio .NET 2003"
-             ;;
-             [89])
-             ;;
-             *) die "Unrecognized Visual Studio Version '$optval' in $opt"
-             ;;
-             esac
-    ;;
    --target=*) target="${optval}"
    ;;
    -*) die_unknown $opt
@@ -281,16 +230,7 @@ for opt in "$@"; do
 done
 outfile=${outfile:-/dev/stdout}
 mkoutfile=${mkoutfile:-/dev/stdout}
-case "${vs_ver:-8}" in
-    7) sln_vers="8.00"
-       sln_vers_str="Visual Studio .NET 2003"
-    ;;
-    8) sln_vers="9.00"
-       sln_vers_str="Visual Studio 2005"
-    ;;
-    9) sln_vers="10.00"
-       sln_vers_str="Visual Studio 2008"
-    ;;
+case "${vs_ver:-10}" in
    10) sln_vers="11.00"
       sln_vers_str="Visual Studio 2010"
    ;;
@@ -300,15 +240,11 @@ case "${vs_ver:-8}" in
    12) sln_vers="12.00"
       sln_vers_str="Visual Studio 2013"
    ;;
-esac
-case "${vs_ver:-8}" in
-    [789])
-    sfx=vcproj
-    ;;
-    10|11|12)
-    sfx=vcxproj
+    14) sln_vers="14.00"
+       sln_vers_str="Visual Studio 2015"
    ;;
 esac
+sfx=vcxproj

 for f in "${file_list[@]}"; do
    parse_project $f
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -34,7 +34,7 @@ Options:
    --name=project_name         Name of the project (required)
    --proj-guid=GUID            GUID to use for the project
    --module-def=filename       File containing export definitions (for DLLs)
-    --ver=version               Version (10,11,12) of visual studio to generate for
+    --ver=version               Version (10,11,12,14) of visual studio to generate for
    --src-path-bare=dir         Path to root of source tree
    -Ipath/to/include           Additional include directories
    -DFLAG[=value]              Preprocessor macros to define
@@ -82,7 +82,7 @@ generate_filter() {
                       | sed -e "s,$src_path_bare,," \
                             -e 's/^[\./]\+//g' -e 's,[:/ ],_,g')

-                if ([ "$pat" == "asm" ] || [ "$pat" == "s" ]) && $asm_use_custom_step; then
+                if ([ "$pat" == "asm" ] || [ "$pat" == "s" ] || [ "$pat" == "S" ]) && $asm_use_custom_step; then
                    # Avoid object file name collisions, i.e. vpx_config.c and
                    # vpx_config.asm produce the same object file without
                    # this additional suffix.
@@ -168,7 +168,7 @@ for opt in "$@"; do
        --ver=*)
            vs_ver="$optval"
            case "$optval" in
-                10|11|12)
+                10|11|12|14)
                ;;
                *) die Unrecognized Visual Studio Version in $opt
                ;;
@@ -203,7 +203,7 @@ for opt in "$@"; do
            # The paths in file_list are fixed outside of the loop.
            file_list[${#file_list[@]}]="$opt"
            case "$opt" in
-                 *.asm|*.s) uses_asm=true
+                 *.asm|*.[Ss]) uses_asm=true
                 ;;
            esac
        ;;
@@ -211,14 +211,14 @@ for opt in "$@"; do
 done

 # Make one call to fix_path for file_list to improve performance.
-fix_file_list
+fix_file_list file_list

 outfile=${outfile:-/dev/stdout}
 guid=${guid:-`generate_uuid`}
 asm_use_custom_step=false
 uses_asm=${uses_asm:-false}
 case "${vs_ver:-11}" in
-    10|11|12)
+    10|11|12|14)
       asm_use_custom_step=$uses_asm
    ;;
 esac
@@ -344,6 +344,9 @@ generate_vcxproj() {
                # has to enable AppContainerApplication as well.
                tag_content PlatformToolset v120
            fi
+            if [ "$vs_ver" = "14" ]; then
+                tag_content PlatformToolset v140
+            fi
            tag_content CharacterSet Unicode
            if [ "$config" = "Release" ]; then
                tag_content WholeProgramOptimization true
@@ -449,7 +452,7 @@ generate_vcxproj() {
    done

    open_tag ItemGroup
-    generate_filter "Source Files"   "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx;s"
+    generate_filter "Source Files"   "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx;s;S"
    close_tag ItemGroup
    open_tag ItemGroup
    generate_filter "Header Files"   "h;hm;inl;inc;xsd"
--- a/build/make/ios-Info.plist
+++ b/build/make/ios-Info.plist
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>en</string>
+	<key>CFBundleExecutable</key>
+	<string>VPX</string>
+	<key>CFBundleIdentifier</key>
+	<string>org.webmproject.VPX</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>VPX</string>
+	<key>CFBundlePackageType</key>
+	<string>FMWK</string>
+	<key>CFBundleShortVersionString</key>
+	<string>${VERSION}</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleSupportedPlatforms</key>
+	<array>
+		<string>iPhoneOS</string>
+	</array>
+	<key>CFBundleVersion</key>
+	<string>${VERSION}</string>
+	<key>MinimumOSVersion</key>
+	<string>${IOS_VERSION_MIN}</string>
+	<key>UIDeviceFamily</key>
+	<array>
+		<integer>1</integer>
+		<integer>2</integer>
+	</array>
+	<key>VPXFullVersion</key>
+	<string>${FULLVERSION}</string>
+</dict>
+</plist>
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -24,32 +24,44 @@ CONFIGURE_ARGS="--disable-docs
                --disable-unit-tests"
 DIST_DIR="_dist"
 FRAMEWORK_DIR="VPX.framework"
+FRAMEWORK_LIB="VPX.framework/VPX"
 HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
-MAKE_JOBS=1
 SCRIPT_DIR=$(dirname "$0")
 LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd)
 LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
 ORIG_PWD="$(pwd)"
-TARGETS="arm64-darwin-gcc
-         armv7-darwin-gcc
-         armv7s-darwin-gcc
-         x86-iphonesimulator-gcc
-         x86_64-iphonesimulator-gcc"
+ARM_TARGETS="arm64-darwin-gcc
+             armv7-darwin-gcc
+             armv7s-darwin-gcc"
+SIM_TARGETS="x86-iphonesimulator-gcc
+             x86_64-iphonesimulator-gcc"
+OSX_TARGETS="x86-darwin15-gcc
+             x86_64-darwin15-gcc"
+TARGETS="${ARM_TARGETS} ${SIM_TARGETS}"

 # Configures for the target specified by $1, and invokes make with the dist
 # target using $DIST_DIR as the distribution output directory.
 build_target() {
  local target="$1"
  local old_pwd="$(pwd)"
+  local target_specific_flags=""

  vlog "***Building target: ${target}***"

+  case "${target}" in
+    x86-*)
+      target_specific_flags="--enable-pic"
+      vlog "Enabled PIC for ${target}"
+      ;;
+  esac
+
  mkdir "${target}"
  cd "${target}"
  eval "${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
-    ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${devnull}
+    ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${target_specific_flags} \
+    ${devnull}
  export DIST_DIR
-  eval make -j ${MAKE_JOBS} dist ${devnull}
+  eval make dist ${devnull}
  cd "${old_pwd}"

  vlog "***Done building target: ${target}***"
@@ -126,6 +138,44 @@ create_vpx_framework_config_shim() {
  printf "#endif  // ${include_guard}" >> "${config_file}"
 }

+# Verifies that $FRAMEWORK_LIB fat library contains requested builds.
+verify_framework_targets() {
+  local requested_cpus=""
+  local cpu=""
+
+  # Extract CPU from full target name.
+  for target; do
+    cpu="${target%%-*}"
+    if [ "${cpu}" = "x86" ]; then
+      # lipo -info outputs i386 for libvpx x86 targets.
+      cpu="i386"
+    fi
+    requested_cpus="${requested_cpus}${cpu} "
+  done
+
+  # Get target CPUs present in framework library.
+  local targets_built=$(${LIPO} -info ${FRAMEWORK_LIB})
+
+  # $LIPO -info outputs a string like the following:
+  #   Architectures in the fat file: $FRAMEWORK_LIB <architectures>
+  # Capture only the architecture strings.
+  targets_built=${targets_built##*: }
+
+  # Sort CPU strings to make the next step a simple string compare.
+  local actual=$(echo ${targets_built} | tr " " "\n" | sort | tr "\n" " ")
+  local requested=$(echo ${requested_cpus} | tr " " "\n" | sort | tr "\n" " ")
+
+  vlog "Requested ${FRAMEWORK_LIB} CPUs: ${requested}"
+  vlog "Actual ${FRAMEWORK_LIB} CPUs: ${actual}"
+
+  if [ "${requested}" != "${actual}" ]; then
+    elog "Actual ${FRAMEWORK_LIB} targets do not match requested target list."
+    elog "  Requested target CPUs: ${requested}"
+    elog "  Actual target CPUs: ${actual}"
+    return 1
+  fi
+}
+
 # Configures and builds each target specified by $1, and then builds
 # VPX.framework.
 build_framework() {
@@ -146,7 +196,12 @@ build_framework() {
  for target in ${targets}; do
    build_target "${target}"
    target_dist_dir="${BUILD_ROOT}/${target}/${DIST_DIR}"
-    lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.a"
+    if [ "${ENABLE_SHARED}" = "yes" ]; then
+      local suffix="dylib"
+    else
+      local suffix="a"
+    fi
+    lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.${suffix}"
  done

  cd "${ORIG_PWD}"
@@ -165,13 +220,25 @@ build_framework() {
  # Copy in vpx_version.h.
  cp -p "${BUILD_ROOT}/${target}/vpx_version.h" "${HEADER_DIR}"

-  vlog "Created fat library ${FRAMEWORK_DIR}/VPX containing:"
+  if [ "${ENABLE_SHARED}" = "yes" ]; then
+    # Adjust the dylib's name so dynamic linking in apps works as expected.
+    install_name_tool -id '@rpath/VPX.framework/VPX' ${FRAMEWORK_DIR}/VPX
+
+    # Copy in Info.plist.
+    cat "${SCRIPT_DIR}/ios-Info.plist" \
+      | sed "s/\${FULLVERSION}/${FULLVERSION}/g" \
+      | sed "s/\${VERSION}/${VERSION}/g" \
+      | sed "s/\${IOS_VERSION_MIN}/${IOS_VERSION_MIN}/g" \
+      > "${FRAMEWORK_DIR}/Info.plist"
+  fi
+
+  # Confirm VPX.framework/VPX contains the targets requested.
+  verify_framework_targets ${targets}
+
+  vlog "Created fat library ${FRAMEWORK_LIB} containing:"
  for lib in ${lib_list}; do
    vlog "  $(echo ${lib} | awk -F / '{print $2, $NF}')"
  done
-
-  # TODO(tomfinegan): Verify that expected targets are included within
-  # VPX.framework/VPX via lipo -info.
 }

 # Trap function. Cleans up the subtree used to build all targets contained in
@@ -189,16 +256,30 @@ cleanup() {
  fi
 }

+print_list() {
+  local indent="$1"
+  shift
+  local list="$@"
+  for entry in ${list}; do
+    echo "${indent}${entry}"
+  done
+}
+
 iosbuild_usage() {
 cat << EOF
  Usage: ${0##*/} [arguments]
    --help: Display this message and exit.
+    --enable-shared: Build a dynamic framework for use on iOS 8 or later.
    --extra-configure-args <args>: Extra args to pass when configuring libvpx.
-    --jobs: Number of make jobs.
+    --macosx: Uses darwin15 targets instead of iphonesimulator targets for x86
+              and x86_64. Allows linking to framework when builds target MacOSX
+              instead of iOS.
    --preserve-build-output: Do not delete the build directory.
    --show-build-output: Show output from each library build.
    --targets <targets>: Override default target list. Defaults:
-         ${TARGETS}
+$(print_list "        " ${TARGETS})
+    --test-link: Confirms all targets can be linked. Functionally identical to
+                 passing --enable-examples via --extra-configure-args.
    --verbose: Output information about the environment and each stage of the
               build.
 EOF
@@ -227,9 +308,8 @@ while [ -n "$1" ]; do
      iosbuild_usage
      exit
      ;;
-    --jobs)
-      MAKE_JOBS="$2"
-      shift
+    --enable-shared)
+      ENABLE_SHARED=yes
      ;;
    --preserve-build-output)
      PRESERVE_BUILD_OUTPUT=yes
@@ -237,10 +317,16 @@ while [ -n "$1" ]; do
    --show-build-output)
      devnull=
      ;;
+    --test-link)
+      EXTRA_CONFIGURE_ARGS="${EXTRA_CONFIGURE_ARGS} --enable-examples"
+      ;;
    --targets)
      TARGETS="$2"
      shift
      ;;
+    --macosx)
+      TARGETS="${ARM_TARGETS} ${OSX_TARGETS}"
+      ;;
    --verbose)
      VERBOSE=yes
      ;;
@@ -252,6 +338,21 @@ while [ -n "$1" ]; do
  shift
 done

+if [ "${ENABLE_SHARED}" = "yes" ]; then
+  CONFIGURE_ARGS="--enable-shared ${CONFIGURE_ARGS}"
+fi
+
+FULLVERSION=$("${SCRIPT_DIR}"/version.sh --bare "${LIBVPX_SOURCE_DIR}")
+VERSION=$(echo "${FULLVERSION}" | sed -E 's/^v([0-9]+\.[0-9]+\.[0-9]+).*$/\1/')
+
+if [ "$ENABLE_SHARED" = "yes" ]; then
+  IOS_VERSION_OPTIONS="--enable-shared"
+  IOS_VERSION_MIN="8.0"
+else
+  IOS_VERSION_OPTIONS=""
+  IOS_VERSION_MIN="6.0"
+fi
+
 if [ "${VERBOSE}" = "yes" ]; then
 cat << EOF
  BUILD_ROOT=${BUILD_ROOT}
@@ -259,16 +360,24 @@ cat << EOF
  CONFIGURE_ARGS=${CONFIGURE_ARGS}
  EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS}
  FRAMEWORK_DIR=${FRAMEWORK_DIR}
+  FRAMEWORK_LIB=${FRAMEWORK_LIB}
  HEADER_DIR=${HEADER_DIR}
-  MAKE_JOBS=${MAKE_JOBS}
-  PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
  LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR}
  LIPO=${LIPO}
+  MAKEFLAGS=${MAKEFLAGS}
  ORIG_PWD=${ORIG_PWD}
-  TARGETS="${TARGETS}"
+  PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
+  TARGETS="$(print_list "" ${TARGETS})"
+  ENABLE_SHARED=${ENABLE_SHARED}
+  OSX_TARGETS="${OSX_TARGETS}"
+  SIM_TARGETS="${SIM_TARGETS}"
+  SCRIPT_DIR="${SCRIPT_DIR}"
+  FULLVERSION="${FULLVERSION}"
+  VERSION="${VERSION}"
+  IOS_VERSION_MIN="${IOS_VERSION_MIN}"
 EOF
 fi

 build_framework "${TARGETS}"
 echo "Successfully built '${FRAMEWORK_DIR}' for:"
-echo "         ${TARGETS}"
+print_list "" ${TARGETS}
--- a/build/make/msvs_common.sh
+++ b/build/make/msvs_common.sh
@@ -39,11 +39,12 @@ fix_path() {
 }

 # Corrects the paths in file_list in one pass for efficiency.
+# $1 is the name of the array to be modified.
 fix_file_list() {
-    # TODO(jzern): this could be more generic and take the array as a param.
-    files=$(fix_path "${file_list[@]}")
+    declare -n array_ref=$1
+    files=$(fix_path "${array_ref[@]}")
    local IFS=$'\n'
-    file_list=($files)
+    array_ref=($files)
 }

 generate_uuid() {
--- a/build/make/rtcd.pl
+++ b/build/make/rtcd.pl
@@ -319,13 +319,14 @@ EOF

  print <<EOF;
 #if HAVE_DSPR2
+void vpx_dsputil_static_init();
 #if CONFIG_VP8
 void dsputil_static_init();
-dsputil_static_init();
 #endif
-#if CONFIG_VP9
-void vp9_dsputil_static_init();
-vp9_dsputil_static_init();
+
+vpx_dsputil_static_init();
+#if CONFIG_VP8
+dsputil_static_init();
 #endif
 #endif
 }
@@ -383,13 +384,8 @@ if ($opts{arch} eq 'x86') {
  }
  close CONFIG_FILE;
  mips;
-} elsif ($opts{arch} eq 'armv6') {
-  @ALL_ARCHS = filter(qw/media/);
-  arm;
 } elsif ($opts{arch} =~ /armv7\w?/) {
-  @ALL_ARCHS = filter(qw/media neon_asm neon/);
-  @REQUIRES = filter(keys %required ? keys %required : qw/media/);
-  &require(@REQUIRES);
+  @ALL_ARCHS = filter(qw/neon_asm neon/);
  arm;
 } elsif ($opts{arch} eq 'armv8' || $opts{arch} eq 'arm64' ) {
  @ALL_ARCHS = filter(qw/neon/);
--- a/build/make/version.sh
+++ b/build/make/version.sh
@@ -24,8 +24,9 @@ out_file=${2}
 id=${3:-VERSION_STRING}

 git_version_id=""
-if [ -d "${source_path}/.git" ]; then
+if [ -e "${source_path}/.git" ]; then
    # Source Path is a git working copy. Check for local modifications.
+    # Note that git submodules may have a file as .git, not a directory.
    export GIT_DIR="${source_path}/.git"
    git_version_id=`git describe --match=v[0-9]* 2>/dev/null`
 fi
--- a/codereview.settings
+++ b/codereview.settings
@@ -0,0 +1,5 @@
+# This file is used by gcl to get repository specific information.
+GERRIT_HOST: chromium-review.googlesource.com
+GERRIT_PORT: 29418
+CODE_REVIEW_SERVER: chromium-review.googlesource.com
+GERRIT_SQUASH_UPLOADS: False
--- a/153
+++ b/153
@@ -22,6 +22,7 @@ show_help(){
 Advanced options:
  ${toggle_libs}                  libraries
  ${toggle_examples}              examples
+  ${toggle_tools}                 tools
  ${toggle_docs}                  documentation
  ${toggle_unit_tests}            unit tests
  ${toggle_decode_perf_tests}     build decoder perf tests with unit tests
@@ -35,6 +36,9 @@ Advanced options:
  ${toggle_debug_libs}            in/exclude debug version of libraries
  ${toggle_static_msvcrt}         use static MSVCRT (VS builds only)
  ${toggle_vp9_highbitdepth}      use VP9 high bit depth (10/12) profiles
+  ${toggle_better_hw_compatibility}
+                                  enable encoder to produce streams with better
+                                  hardware decoder compatibility
  ${toggle_vp8}                   VP8 codec support
  ${toggle_vp9}                   VP9 codec support
  ${toggle_internal_stats}        output of encoder internal stats for debug, if supported (encoders)
@@ -94,11 +98,9 @@ EOF

 # all_platforms is a list of all supported target platforms. Maintain
 # alphabetically by architecture, generic-gnu last.
-all_platforms="${all_platforms} armv6-darwin-gcc"
-all_platforms="${all_platforms} armv6-linux-rvct"
-all_platforms="${all_platforms} armv6-linux-gcc"
-all_platforms="${all_platforms} armv6-none-rvct"
+all_platforms="${all_platforms} arm64-android-gcc"
 all_platforms="${all_platforms} arm64-darwin-gcc"
+all_platforms="${all_platforms} arm64-linux-gcc"
 all_platforms="${all_platforms} armv7-android-gcc"   #neon Cortex-A8
 all_platforms="${all_platforms} armv7-darwin-gcc"    #neon Cortex-A8
 all_platforms="${all_platforms} armv7-linux-rvct"    #neon Cortex-A8
@@ -106,7 +108,9 @@ all_platforms="${all_platforms} armv7-linux-gcc"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-none-rvct"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-win32-vs11"
 all_platforms="${all_platforms} armv7-win32-vs12"
+all_platforms="${all_platforms} armv7-win32-vs14"
 all_platforms="${all_platforms} armv7s-darwin-gcc"
+all_platforms="${all_platforms} armv8-linux-gcc"
 all_platforms="${all_platforms} mips32-linux-gcc"
 all_platforms="${all_platforms} mips64-linux-gcc"
 all_platforms="${all_platforms} sparc-solaris-gcc"
@@ -120,39 +124,39 @@ all_platforms="${all_platforms} x86-darwin11-gcc"
 all_platforms="${all_platforms} x86-darwin12-gcc"
 all_platforms="${all_platforms} x86-darwin13-gcc"
 all_platforms="${all_platforms} x86-darwin14-gcc"
+all_platforms="${all_platforms} x86-darwin15-gcc"
 all_platforms="${all_platforms} x86-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86-linux-gcc"
 all_platforms="${all_platforms} x86-linux-icc"
 all_platforms="${all_platforms} x86-os2-gcc"
 all_platforms="${all_platforms} x86-solaris-gcc"
 all_platforms="${all_platforms} x86-win32-gcc"
-all_platforms="${all_platforms} x86-win32-vs7"
-all_platforms="${all_platforms} x86-win32-vs8"
-all_platforms="${all_platforms} x86-win32-vs9"
 all_platforms="${all_platforms} x86-win32-vs10"
 all_platforms="${all_platforms} x86-win32-vs11"
 all_platforms="${all_platforms} x86-win32-vs12"
+all_platforms="${all_platforms} x86-win32-vs14"
+all_platforms="${all_platforms} x86_64-android-gcc"
 all_platforms="${all_platforms} x86_64-darwin9-gcc"
 all_platforms="${all_platforms} x86_64-darwin10-gcc"
 all_platforms="${all_platforms} x86_64-darwin11-gcc"
 all_platforms="${all_platforms} x86_64-darwin12-gcc"
 all_platforms="${all_platforms} x86_64-darwin13-gcc"
 all_platforms="${all_platforms} x86_64-darwin14-gcc"
+all_platforms="${all_platforms} x86_64-darwin15-gcc"
 all_platforms="${all_platforms} x86_64-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
 all_platforms="${all_platforms} x86_64-linux-icc"
 all_platforms="${all_platforms} x86_64-solaris-gcc"
 all_platforms="${all_platforms} x86_64-win64-gcc"
-all_platforms="${all_platforms} x86_64-win64-vs8"
-all_platforms="${all_platforms} x86_64-win64-vs9"
 all_platforms="${all_platforms} x86_64-win64-vs10"
 all_platforms="${all_platforms} x86_64-win64-vs11"
 all_platforms="${all_platforms} x86_64-win64-vs12"
+all_platforms="${all_platforms} x86_64-win64-vs14"
 all_platforms="${all_platforms} generic-gnu"

 # all_targets is a list of all targets that can be configured
 # note that these should be in dependency order for now.
-all_targets="libs examples docs"
+all_targets="libs examples tools docs"

 # all targets available are enabled, by default.
 for t in ${all_targets}; do
@@ -185,8 +189,8 @@ if [ ${doxy_major:-0} -ge 1 ]; then
 fi

 # disable codecs when their source directory does not exist
-[ -d "${source_path}/vp8" ] || disable_feature vp8
-[ -d "${source_path}/vp9" ] || disable_feature vp9
+[ -d "${source_path}/vp8" ] || disable_codec vp8
+[ -d "${source_path}/vp9" ] || disable_codec vp9

 # install everything except the sources, by default. sources will have
 # to be enabled when doing dist builds, since that's no longer a common
@@ -220,17 +224,7 @@ ARCH_LIST="
    x86
    x86_64
 "
-ARCH_EXT_LIST="
-    edsp
-    media
-    neon
-    neon_asm
-
-    mips32
-    dspr2
-    msa
-    mips64
-
+ARCH_EXT_LIST_X86="
    mmx
    sse
    sse2
@@ -240,18 +234,28 @@ ARCH_EXT_LIST="
    avx
    avx2
 "
+ARCH_EXT_LIST="
+    neon
+    neon_asm
+
+    mips32
+    dspr2
+    msa
+    mips64
+
+    ${ARCH_EXT_LIST_X86}
+"
 HAVE_LIST="
    ${ARCH_EXT_LIST}
    vpx_ports
-    stdint_h
    pthread_h
-    sys_mman_h
    unistd_h
 "
 EXPERIMENT_LIST="
    spatial_svc
    fp_mb_stats
    emulate_hardware
+    misc_fixes
 "
 CONFIG_LIST="
    dependency_tracking
@@ -260,7 +264,6 @@ CONFIG_LIST="
    install_bins
    install_libs
    install_srcs
-    use_x86inc
    debug
    gprof
    gcov
@@ -304,6 +307,7 @@ CONFIG_LIST="
    vp9_temporal_denoising
    coefficient_range_checking
    vp9_highbitdepth
+    better_hw_compatibility
    experimental
    size_limit
    ${EXPERIMENT_LIST}
@@ -321,7 +325,6 @@ CMDLINE_SELECT="
    gprof
    gcov
    pic
-    use_x86inc
    optimizations
    ccache
    runtime_cpu_detect
@@ -329,6 +332,7 @@ CMDLINE_SELECT="

    libs
    examples
+    tools
    docs
    libc
    as
@@ -362,6 +366,7 @@ CMDLINE_SELECT="
    temporal_denoising
    vp9_temporal_denoising
    coefficient_range_checking
+    better_hw_compatibility
    vp9_highbitdepth
    experimental
 "
@@ -370,15 +375,19 @@ process_cmdline() {
    for opt do
        optval="${opt#*=}"
        case "$opt" in
-        --disable-codecs) for c in ${CODECS}; do disable_feature $c; done ;;
+        --disable-codecs)
+          for c in ${CODEC_FAMILIES}; do disable_codec $c; done
+          ;;
        --enable-?*|--disable-?*)
        eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
-        if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then
+        if is_in ${option} ${EXPERIMENT_LIST}; then
            if enabled experimental; then
                ${action}_feature $option
            else
                log_echo "Ignoring $opt -- not in experimental mode."
            fi
+        elif is_in ${option} "${CODECS} ${CODEC_FAMILIES}"; then
+            ${action}_codec ${option}
        else
            process_common_cmdline $opt
        fi
@@ -392,14 +401,6 @@ process_cmdline() {
 post_process_cmdline() {
    c=""

-    # If the codec family is disabled, disable all components of that family.
-    # If the codec family is enabled, enable all components of that family.
-    log_echo "Configuring selected codecs"
-    for c in ${CODECS}; do
-        disabled ${c%%_*} && disable_feature ${c}
-        enabled ${c%%_*} && enable_feature ${c}
-    done
-
    # Enable all detected codecs, if they haven't been disabled
    for c in ${CODECS}; do soft_enable $c; done

@@ -477,7 +478,7 @@ EOF
    #
    # Write makefiles for all enabled targets
    #
-    for tgt in libs examples docs solution; do
+    for tgt in libs examples tools docs solution; do
        tgt_fn="$tgt-$toolchain.mk"

        if enabled $tgt; then
@@ -494,13 +495,18 @@ process_detect() {
        # Can only build shared libs on a subset of platforms. Doing this check
        # here rather than at option parse time because the target auto-detect
        # magic happens after the command line has been parsed.
-        if ! enabled linux && ! enabled os2; then
+        case "${tgt_os}" in
+        linux|os2|darwin*|iphonesimulator*)
+            # Supported platforms
+            ;;
+        *)
            if enabled gnu; then
                echo "--enable-shared is only supported on ELF; assuming this is OK"
            else
-                die "--enable-shared only supported on ELF and OS/2 for now"
+                die "--enable-shared only supported on ELF, OS/2, and Darwin for now"
            fi
-        fi
+            ;;
+        esac
    fi
    if [ -z "$CC" ] || enabled external_build; then
        echo "Bypassing toolchain for environment detection."
@@ -527,16 +533,12 @@ process_detect() {
            # Specialize windows and POSIX environments.
            case $toolchain in
                *-win*-*)
-                    case $header-$toolchain in
-                        stdint*-gcc) true;;
-                        *) false;;
-                    esac && enable_feature $var
-                    ;;
+                    # Don't check for any headers in Windows builds.
+                    false
+                ;;
                *)
                    case $header in
-                        stdint.h) true;;
                        pthread.h) true;;
-                        sys/mman.h) true;;
                        unistd.h) true;;
                        *) false;;
                    esac && enable_feature $var
@@ -552,9 +554,7 @@ process_detect() {
 int main(void) {return 0;}
 EOF
    # check system headers
-    check_header stdint.h
    check_header pthread.h
-    check_header sys/mman.h
    check_header unistd.h # for sysconf(3) and friends.

    check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports
@@ -569,23 +569,30 @@ process_toolchain() {
        check_add_cflags -Wall
        check_add_cflags -Wdeclaration-after-statement
        check_add_cflags -Wdisabled-optimization
+        check_add_cflags -Wfloat-conversion
        check_add_cflags -Wpointer-arith
        check_add_cflags -Wtype-limits
        check_add_cflags -Wcast-qual
        check_add_cflags -Wvla
        check_add_cflags -Wimplicit-function-declaration
        check_add_cflags -Wuninitialized
-        check_add_cflags -Wunused-variable
-        case ${CC} in
-          *clang*)
-              # libvpx and/or clang have issues with aliasing:
-              # https://code.google.com/p/webm/issues/detail?id=603
-              # work around them until they are fixed
-              check_add_cflags -fno-strict-aliasing
-          ;;
-          *) check_add_cflags -Wunused-but-set-variable ;;
-        esac
-        enabled extra_warnings || check_add_cflags -Wno-unused-function
+        check_add_cflags -Wunused
+        # -Wextra has some tricky cases. Rather than fix them all now, get the
+        # flag for as many files as possible and fix the remaining issues
+        # piecemeal.
+        # https://bugs.chromium.org/p/webm/issues/detail?id=1069
+        check_add_cflags -Wextra
+        # check_add_cflags also adds to cxxflags. gtest does not do well with
+        # -Wundef so add it explicitly to CFLAGS only.
+        check_cflags -Wundef && add_cflags_only -Wundef
+        if enabled mips || [ -z "${INLINE}" ]; then
+          enabled extra_warnings || check_add_cflags -Wno-unused-function
+        fi
+        if ! enabled vp9_highbitdepth; then
+          # Avoid this warning for third_party C++ sources. Some reorganization
+          # would be needed to apply this only to test/*.cc.
+          check_cflags -Wshorten-64-to-32 && add_cflags_only -Wshorten-64-to-32
+        fi
    fi

    if enabled icc; then
@@ -633,17 +640,9 @@ process_toolchain() {
        vs*) enable_feature msvs
             enable_feature solution
             vs_version=${tgt_cc##vs}
-             case $vs_version in
-             [789])
-                 VCPROJ_SFX=vcproj
-                 gen_vcproj_cmd=${source_path}/build/make/gen_msvs_proj.sh
-                 ;;
-             10|11|12)
-                 VCPROJ_SFX=vcxproj
-                 gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
-                 enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
-                 ;;
-             esac
+             VCPROJ_SFX=vcxproj
+             gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
+             enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
             all_targets="${all_targets} solution"
             INLINE="__forceinline"
        ;;
@@ -704,6 +703,16 @@ EOF
    esac
    # libwebm needs to be linked with C++ standard library
    enabled webm_io && LD=${CXX}
+
+    # append any user defined extra cflags
+    if [ -n "${extra_cflags}" ] ; then
+        check_add_cflags ${extra_cflags} || \
+        die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
+    fi
+    if [ -n "${extra_cxxflags}" ]; then
+        check_add_cxxflags ${extra_cxxflags} || \
+        die "Requested extra CXXFLAGS '${extra_cxxflags}' not supported by compiler"
+    fi
 }


--- a/examples.mk
+++ b/examples.mk
@@ -22,33 +22,44 @@ LIBYUV_SRCS +=  third_party/libyuv/include/libyuv/basic_types.h  \
                third_party/libyuv/source/planar_functions.cc \
                third_party/libyuv/source/row_any.cc \
                third_party/libyuv/source/row_common.cc \
+                third_party/libyuv/source/row_gcc.cc \
                third_party/libyuv/source/row_mips.cc \
                third_party/libyuv/source/row_neon.cc \
                third_party/libyuv/source/row_neon64.cc \
-                third_party/libyuv/source/row_posix.cc \
                third_party/libyuv/source/row_win.cc \
                third_party/libyuv/source/scale.cc \
+                third_party/libyuv/source/scale_any.cc \
                third_party/libyuv/source/scale_common.cc \
+                third_party/libyuv/source/scale_gcc.cc \
                third_party/libyuv/source/scale_mips.cc \
                third_party/libyuv/source/scale_neon.cc \
                third_party/libyuv/source/scale_neon64.cc \
-                third_party/libyuv/source/scale_posix.cc \
                third_party/libyuv/source/scale_win.cc \

-LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \
-                      third_party/libwebm/mkvmuxerutil.cpp \
-                      third_party/libwebm/mkvwriter.cpp \
-                      third_party/libwebm/mkvmuxer.hpp \
-                      third_party/libwebm/mkvmuxertypes.hpp \
-                      third_party/libwebm/mkvmuxerutil.hpp \
-                      third_party/libwebm/mkvparser.hpp \
-                      third_party/libwebm/mkvwriter.hpp \
-                      third_party/libwebm/webmids.hpp
+LIBWEBM_COMMON_SRCS += third_party/libwebm/common/hdr_util.cc \
+                       third_party/libwebm/common/hdr_util.h \
+                       third_party/libwebm/common/webmids.h
+
+LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer/mkvmuxer.cc \
+                      third_party/libwebm/mkvmuxer/mkvmuxerutil.cc \
+                      third_party/libwebm/mkvmuxer/mkvwriter.cc \
+                      third_party/libwebm/mkvmuxer/mkvmuxer.h \
+                      third_party/libwebm/mkvmuxer/mkvmuxertypes.h \
+                      third_party/libwebm/mkvmuxer/mkvmuxerutil.h \
+                      third_party/libwebm/mkvparser/mkvparser.h \
+                      third_party/libwebm/mkvmuxer/mkvwriter.h
+
+LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser/mkvparser.cc \
+                      third_party/libwebm/mkvparser/mkvreader.cc \
+                      third_party/libwebm/mkvparser/mkvparser.h \
+                      third_party/libwebm/mkvparser/mkvreader.h
+
+# Add compile flags and include path for libwebm sources.
+ifeq ($(CONFIG_WEBM_IO),yes)
+  CXXFLAGS     += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS
+  INC_PATH-yes += $(SRC_PATH_BARE)/third_party/libwebm
+endif

-LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser.cpp \
-                      third_party/libwebm/mkvreader.cpp \
-                      third_party/libwebm/mkvparser.hpp \
-                      third_party/libwebm/mkvreader.hpp

 # List of examples to build. UTILS are tools meant for distribution
 # while EXAMPLES demonstrate specific portions of the API.
@@ -65,8 +76,11 @@ vpxdec.SRCS                 += tools_common.c tools_common.h
 vpxdec.SRCS                 += y4menc.c y4menc.h
 ifeq ($(CONFIG_LIBYUV),yes)
  vpxdec.SRCS                 += $(LIBYUV_SRCS)
+  $(BUILD_PFX)third_party/libyuv/%.cc.o: CXXFLAGS += -Wno-unused-parameter
 endif
 ifeq ($(CONFIG_WEBM_IO),yes)
+  vpxdec.SRCS                 += $(LIBWEBM_COMMON_SRCS)
+  vpxdec.SRCS                 += $(LIBWEBM_MUXER_SRCS)
  vpxdec.SRCS                 += $(LIBWEBM_PARSER_SRCS)
  vpxdec.SRCS                 += webmdec.cc webmdec.h
 endif
@@ -88,7 +102,9 @@ ifeq ($(CONFIG_LIBYUV),yes)
  vpxenc.SRCS                 += $(LIBYUV_SRCS)
 endif
 ifeq ($(CONFIG_WEBM_IO),yes)
+  vpxenc.SRCS                 += $(LIBWEBM_COMMON_SRCS)
  vpxenc.SRCS                 += $(LIBWEBM_MUXER_SRCS)
+  vpxenc.SRCS                 += $(LIBWEBM_PARSER_SRCS)
  vpxenc.SRCS                 += webmenc.cc webmenc.h
 endif
 vpxenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
@@ -200,6 +216,17 @@ vp8cx_set_ref.SRCS                 += vpx_ports/msvc.h
 vp8cx_set_ref.GUID                  = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
 vp8cx_set_ref.DESCRIPTION           = VP8 set encoder reference frame

+ifeq ($(CONFIG_VP9_ENCODER),yes)
+ifeq ($(CONFIG_DECODERS),yes)
+EXAMPLES-yes                       += vp9cx_set_ref.c
+vp9cx_set_ref.SRCS                 += ivfenc.h ivfenc.c
+vp9cx_set_ref.SRCS                 += tools_common.h tools_common.c
+vp9cx_set_ref.SRCS                 += video_common.h
+vp9cx_set_ref.SRCS                 += video_writer.h video_writer.c
+vp9cx_set_ref.GUID                  = 65D7F14A-2EE6-4293-B958-AB5107A03B55
+vp9cx_set_ref.DESCRIPTION           = VP9 set encoder reference frame
+endif
+endif

 ifeq ($(CONFIG_MULTI_RES_ENCODING),yes)
 ifeq ($(CONFIG_LIBYUV),yes)
@@ -324,8 +351,8 @@ endif
 # the makefiles). We may want to revisit this.
 define vcproj_template
 $(1): $($(1:.$(VCPROJ_SFX)=).SRCS) vpx.$(VCPROJ_SFX)
-	@echo "    [vcproj] $$@"
-	$$(GEN_VCPROJ)\
+	$(if $(quiet),@echo "    [vcproj] $$@")
+	$(qexec)$$(GEN_VCPROJ)\
            --exe\
            --target=$$(TOOLCHAIN)\
            --name=$$(@:.$(VCPROJ_SFX)=)\
--- a/examples/decode_to_md5.c
+++ b/examples/decode_to_md5.c
@@ -65,8 +65,7 @@ static void get_image_md5(const vpx_image_t *img, unsigned char digest[16]) {
 static void print_md5(FILE *stream, unsigned char digest[16]) {
  int i;

-  for (i = 0; i < 16; ++i)
-    fprintf(stream, "%02x", digest[i]);
+  for (i = 0; i < 16; ++i) fprintf(stream, "%02x", digest[i]);
 }

 static const char *exec_name;
@@ -86,12 +85,10 @@ int main(int argc, char **argv) {

  exec_name = argv[0];

-  if (argc != 3)
-    die("Invalid number of arguments.");
+  if (argc != 3) die("Invalid number of arguments.");

  reader = vpx_video_reader_open(argv[1]);
-  if (!reader)
-    die("Failed to open %s for reading.", argv[1]);
+  if (!reader) die("Failed to open %s for reading.", argv[1]);

  if (!(outfile = fopen(argv[2], "wb")))
    die("Failed to open %s for writing.", argv[2]);
@@ -99,8 +96,7 @@ int main(int argc, char **argv) {
  info = vpx_video_reader_get_info(reader);

  decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc);
-  if (!decoder)
-    die("Unknown input codec.");
+  if (!decoder) die("Unknown input codec.");

  printf("Using %s\n", vpx_codec_iface_name(decoder->codec_interface()));

@@ -111,8 +107,8 @@ int main(int argc, char **argv) {
    vpx_codec_iter_t iter = NULL;
    vpx_image_t *img = NULL;
    size_t frame_size = 0;
-    const unsigned char *frame = vpx_video_reader_get_frame(reader,
-                                                            &frame_size);
+    const unsigned char *frame =
+        vpx_video_reader_get_frame(reader, &frame_size);
    if (vpx_codec_decode(&codec, frame, (unsigned int)frame_size, NULL, 0))
      die_codec(&codec, "Failed to decode frame");

@@ -121,14 +117,13 @@ int main(int argc, char **argv) {

      get_image_md5(img, digest);
      print_md5(outfile, digest);
-      fprintf(outfile, "  img-%dx%d-%04d.i420\n",
-              img->d_w, img->d_h, ++frame_cnt);
+      fprintf(outfile, "  img-%dx%d-%04d.i420\n", img->d_w, img->d_h,
+              ++frame_cnt);
    }
  }

  printf("Processed %d frames.\n", frame_cnt);
-  if (vpx_codec_destroy(&codec))
-    die_codec(&codec, "Failed to destroy codec.");
+  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");

  vpx_video_reader_close(reader);

--- a/examples/decode_with_drops.c
+++ b/examples/decode_with_drops.c
@@ -84,18 +84,16 @@ int main(int argc, char **argv) {

  exec_name = argv[0];

-  if (argc != 4)
-    die("Invalid number of arguments.");
+  if (argc != 4) die("Invalid number of arguments.");

  reader = vpx_video_reader_open(argv[1]);
-  if (!reader)
-    die("Failed to open %s for reading.", argv[1]);
+  if (!reader) die("Failed to open %s for reading.", argv[1]);

  if (!(outfile = fopen(argv[2], "wb")))
    die("Failed to open %s for writing.", argv[2]);

-  n = strtol(argv[3], &nptr, 0);
-  m = strtol(nptr + 1, NULL, 0);
+  n = (int)strtol(argv[3], &nptr, 0);
+  m = (int)strtol(nptr + 1, NULL, 0);
  is_range = (*nptr == '-');
  if (!n || !m || (*nptr != '-' && *nptr != '/'))
    die("Couldn't parse pattern %s.\n", argv[3]);
@@ -103,8 +101,7 @@ int main(int argc, char **argv) {
  info = vpx_video_reader_get_info(reader);

  decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc);
-  if (!decoder)
-    die("Unknown input codec.");
+  if (!decoder) die("Unknown input codec.");

  printf("Using %s\n", vpx_codec_iface_name(decoder->codec_interface()));

@@ -116,8 +113,8 @@ int main(int argc, char **argv) {
    vpx_image_t *img = NULL;
    size_t frame_size = 0;
    int skip;
-    const unsigned char *frame = vpx_video_reader_get_frame(reader,
-                                                            &frame_size);
+    const unsigned char *frame =
+        vpx_video_reader_get_frame(reader, &frame_size);
    if (vpx_codec_decode(&codec, frame, (unsigned int)frame_size, NULL, 0))
      die_codec(&codec, "Failed to decode frame.");

@@ -139,8 +136,7 @@ int main(int argc, char **argv) {
  }

  printf("Processed %d frames.\n", frame_cnt);
-  if (vpx_codec_destroy(&codec))
-    die_codec(&codec, "Failed to destroy codec.");
+  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");

  printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n",
         info->frame_width, info->frame_height, argv[2]);
--- a/examples/postproc.c
+++ b/examples/postproc.c
@@ -68,12 +68,10 @@ int main(int argc, char **argv) {

  exec_name = argv[0];

-  if (argc != 3)
-    die("Invalid number of arguments.");
+  if (argc != 3) die("Invalid number of arguments.");

  reader = vpx_video_reader_open(argv[1]);
-  if (!reader)
-    die("Failed to open %s for reading.", argv[1]);
+  if (!reader) die("Failed to open %s for reading.", argv[1]);

  if (!(outfile = fopen(argv[2], "wb")))
    die("Failed to open %s for writing", argv[2]);
@@ -81,8 +79,7 @@ int main(int argc, char **argv) {
  info = vpx_video_reader_get_info(reader);

  decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc);
-  if (!decoder)
-    die("Unknown input codec.");
+  if (!decoder) die("Unknown input codec.");

  printf("Using %s\n", vpx_codec_iface_name(decoder->codec_interface()));

@@ -91,26 +88,25 @@ int main(int argc, char **argv) {
  if (res == VPX_CODEC_INCAPABLE)
    die_codec(&codec, "Postproc not supported by this decoder.");

-  if (res)
-    die_codec(&codec, "Failed to initialize decoder.");
+  if (res) die_codec(&codec, "Failed to initialize decoder.");

  while (vpx_video_reader_read_frame(reader)) {
    vpx_codec_iter_t iter = NULL;
    vpx_image_t *img = NULL;
    size_t frame_size = 0;
-    const unsigned char *frame = vpx_video_reader_get_frame(reader,
-                                                            &frame_size);
+    const unsigned char *frame =
+        vpx_video_reader_get_frame(reader, &frame_size);

    ++frame_cnt;

    if (frame_cnt % 30 == 1) {
-      vp8_postproc_cfg_t pp = {0, 0, 0};
+      vp8_postproc_cfg_t pp = { 0, 0, 0 };

-    if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
-      die_codec(&codec, "Failed to turn off postproc.");
+      if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
+        die_codec(&codec, "Failed to turn off postproc.");
    } else if (frame_cnt % 30 == 16) {
-      vp8_postproc_cfg_t pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE,
-                               4, 0};
+      vp8_postproc_cfg_t pp = { VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE, 4,
+                                0 };
      if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
        die_codec(&codec, "Failed to turn on postproc.");
    };
@@ -125,8 +121,7 @@ int main(int argc, char **argv) {
  }

  printf("Processed %d frames.\n", frame_cnt);
-  if (vpx_codec_destroy(&codec))
-    die_codec(&codec, "Failed to destroy codec");
+  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");

  printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n",
         info->frame_width, info->frame_height, argv[2]);
--- a/examples/resize_util.c
+++ b/examples/resize_util.c
@@ -34,10 +34,8 @@ void usage_exit(void) {

 static int parse_dim(char *v, int *width, int *height) {
  char *x = strchr(v, 'x');
-  if (x == NULL)
-    x = strchr(v, 'X');
-  if (x == NULL)
-    return 0;
+  if (x == NULL) x = strchr(v, 'X');
+  if (x == NULL) return 0;
  *width = atoi(v);
  *height = atoi(&x[1]);
  if (*width <= 0 || *height <= 0)
@@ -93,30 +91,25 @@ int main(int argc, char *argv[]) {
  else
    frames = INT_MAX;

-  printf("Input size:  %dx%d\n",
-         width, height);
-  printf("Target size: %dx%d, Frames: ",
-         target_width, target_height);
+  printf("Input size:  %dx%d\n", width, height);
+  printf("Target size: %dx%d, Frames: ", target_width, target_height);
  if (frames == INT_MAX)
    printf("All\n");
  else
    printf("%d\n", frames);

-  inbuf = (uint8_t*)malloc(width * height * 3 / 2);
-  outbuf = (uint8_t*)malloc(target_width * target_height * 3 / 2);
+  inbuf = (uint8_t *)malloc(width * height * 3 / 2);
+  outbuf = (uint8_t *)malloc(target_width * target_height * 3 / 2);
  inbuf_u = inbuf + width * height;
  inbuf_v = inbuf_u + width * height / 4;
  outbuf_u = outbuf + target_width * target_height;
  outbuf_v = outbuf_u + target_width * target_height / 4;
  f = 0;
  while (f < frames) {
-    if (fread(inbuf, width * height * 3 / 2, 1, fpin) != 1)
-      break;
-    vp9_resize_frame420(inbuf, width, inbuf_u, inbuf_v, width / 2,
-                        height, width,
-                        outbuf, target_width, outbuf_u, outbuf_v,
-                        target_width / 2,
-                        target_height, target_width);
+    if (fread(inbuf, width * height * 3 / 2, 1, fpin) != 1) break;
+    vp9_resize_frame420(inbuf, width, inbuf_u, inbuf_v, width / 2, height,
+                        width, outbuf, target_width, outbuf_u, outbuf_v,
+                        target_width / 2, target_height, target_width);
    fwrite(outbuf, target_width * target_height * 3 / 2, 1, fpout);
    f++;
  }
--- a/examples/set_maps.c
+++ b/examples/set_maps.c
@@ -8,7 +8,6 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-
 // VP8 Set Active and ROI Maps
 // ===========================
 //
@@ -86,8 +85,7 @@ static void set_roi_map(const vpx_codec_enc_cfg_t *cfg,
  roi.static_threshold[3] = 0;

  roi.roi_map = (uint8_t *)malloc(roi.rows * roi.cols);
-  for (i = 0; i < roi.rows * roi.cols; ++i)
-    roi.roi_map[i] = i % 4;
+  for (i = 0; i < roi.rows * roi.cols; ++i) roi.roi_map[i] = i % 4;

  if (vpx_codec_control(codec, VP8E_SET_ROI_MAP, &roi))
    die_codec(codec, "Failed to set ROI map");
@@ -98,14 +96,13 @@ static void set_roi_map(const vpx_codec_enc_cfg_t *cfg,
 static void set_active_map(const vpx_codec_enc_cfg_t *cfg,
                           vpx_codec_ctx_t *codec) {
  unsigned int i;
-  vpx_active_map_t map = {0, 0, 0};
+  vpx_active_map_t map = { 0, 0, 0 };

  map.rows = (cfg->g_h + 15) / 16;
  map.cols = (cfg->g_w + 15) / 16;

  map.active_map = (uint8_t *)malloc(map.rows * map.cols);
-  for (i = 0; i < map.rows * map.cols; ++i)
-    map.active_map[i] = i % 2;
+  for (i = 0; i < map.rows * map.cols; ++i) map.active_map[i] = i % 2;

  if (vpx_codec_control(codec, VP8E_SET_ACTIVEMAP, &map))
    die_codec(codec, "Failed to set active map");
@@ -115,7 +112,7 @@ static void set_active_map(const vpx_codec_enc_cfg_t *cfg,

 static void unset_active_map(const vpx_codec_enc_cfg_t *cfg,
                             vpx_codec_ctx_t *codec) {
-  vpx_active_map_t map = {0, 0, 0};
+  vpx_active_map_t map = { 0, 0, 0 };

  map.rows = (cfg->g_h + 15) / 16;
  map.cols = (cfg->g_w + 15) / 16;
@@ -125,25 +122,21 @@ static void unset_active_map(const vpx_codec_enc_cfg_t *cfg,
    die_codec(codec, "Failed to set active map");
 }

-static int encode_frame(vpx_codec_ctx_t *codec,
-                        vpx_image_t *img,
-                        int frame_index,
-                        VpxVideoWriter *writer) {
+static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img,
+                        int frame_index, VpxVideoWriter *writer) {
  int got_pkts = 0;
  vpx_codec_iter_t iter = NULL;
  const vpx_codec_cx_pkt_t *pkt = NULL;
-  const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0,
-                                               VPX_DL_GOOD_QUALITY);
-  if (res != VPX_CODEC_OK)
-    die_codec(codec, "Failed to encode frame");
+  const vpx_codec_err_t res =
+      vpx_codec_encode(codec, img, frame_index, 1, 0, VPX_DL_GOOD_QUALITY);
+  if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame");

  while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) {
    got_pkts = 1;

    if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
      const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
-      if (!vpx_video_writer_write_frame(writer,
-                                        pkt->data.frame.buf,
+      if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf,
                                        pkt->data.frame.sz,
                                        pkt->data.frame.pts)) {
        die_codec(codec, "Failed to write compressed frame");
@@ -167,12 +160,11 @@ int main(int argc, char **argv) {
  VpxVideoInfo info;
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;
-  const int fps = 2;        // TODO(dkovalev) add command line argument
+  const int fps = 2;  // TODO(dkovalev) add command line argument
  const double bits_per_pixel_per_frame = 0.067;

  exec_name = argv[0];
-  if (argc != 6)
-    die("Invalid number of arguments");
+  if (argc != 6) die("Invalid number of arguments");

  memset(&info, 0, sizeof(info));

@@ -182,40 +174,36 @@ int main(int argc, char **argv) {
  }
  assert(encoder != NULL);
  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = strtol(argv[2], NULL, 0);
-  info.frame_height = strtol(argv[3], NULL, 0);
+  info.frame_width = (int)strtol(argv[2], NULL, 0);
+  info.frame_height = (int)strtol(argv[3], NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

-  if (info.frame_width <= 0 ||
-      info.frame_height <= 0 ||
-      (info.frame_width % 2) != 0 ||
-      (info.frame_height % 2) != 0) {
+  if (info.frame_width <= 0 || info.frame_height <= 0 ||
+      (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) {
    die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
  }

  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width,
-                                             info.frame_height, 1)) {
+                     info.frame_height, 1)) {
    die("Failed to allocate image.");
  }

  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));

  res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
-  if (res)
-    die_codec(&codec, "Failed to get default codec config.");
+  if (res) die_codec(&codec, "Failed to get default codec config.");

  cfg.g_w = info.frame_width;
  cfg.g_h = info.frame_height;
  cfg.g_timebase.num = info.time_base.numerator;
  cfg.g_timebase.den = info.time_base.denominator;
-  cfg.rc_target_bitrate = (unsigned int)(bits_per_pixel_per_frame * cfg.g_w *
-                                         cfg.g_h * fps / 1000);
+  cfg.rc_target_bitrate =
+      (unsigned int)(bits_per_pixel_per_frame * cfg.g_w * cfg.g_h * fps / 1000);
  cfg.g_lag_in_frames = 0;

  writer = vpx_video_writer_open(argv[5], kContainerIVF, &info);
-  if (!writer)
-    die("Failed to open %s for writing.", argv[5]);
+  if (!writer) die("Failed to open %s for writing.", argv[5]);

  if (!(infile = fopen(argv[4], "rb")))
    die("Failed to open %s for reading.", argv[4]);
@@ -239,15 +227,15 @@ int main(int argc, char **argv) {
  }

  // Flush encoder.
-  while (encode_frame(&codec, NULL, -1, writer)) {}
+  while (encode_frame(&codec, NULL, -1, writer)) {
+  }

  printf("\n");
  fclose(infile);
  printf("Processed %d frames.\n", frame_count);

  vpx_img_free(&raw);
-  if (vpx_codec_destroy(&codec))
-    die_codec(&codec, "Failed to destroy codec.");
+  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");

  vpx_video_writer_close(writer);

--- a/examples/simple_decoder.c
+++ b/examples/simple_decoder.c
@@ -8,7 +8,6 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-
 // Simple Decoder
 // ==============
 //
@@ -103,12 +102,10 @@ int main(int argc, char **argv) {

  exec_name = argv[0];

-  if (argc != 3)
-    die("Invalid number of arguments.");
+  if (argc != 3) die("Invalid number of arguments.");

  reader = vpx_video_reader_open(argv[1]);
-  if (!reader)
-    die("Failed to open %s for reading.", argv[1]);
+  if (!reader) die("Failed to open %s for reading.", argv[1]);

  if (!(outfile = fopen(argv[2], "wb")))
    die("Failed to open %s for writing.", argv[2]);
@@ -116,8 +113,7 @@ int main(int argc, char **argv) {
  info = vpx_video_reader_get_info(reader);

  decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc);
-  if (!decoder)
-    die("Unknown input codec.");
+  if (!decoder) die("Unknown input codec.");

  printf("Using %s\n", vpx_codec_iface_name(decoder->codec_interface()));

@@ -128,8 +124,8 @@ int main(int argc, char **argv) {
    vpx_codec_iter_t iter = NULL;
    vpx_image_t *img = NULL;
    size_t frame_size = 0;
-    const unsigned char *frame = vpx_video_reader_get_frame(reader,
-                                                            &frame_size);
+    const unsigned char *frame =
+        vpx_video_reader_get_frame(reader, &frame_size);
    if (vpx_codec_decode(&codec, frame, (unsigned int)frame_size, NULL, 0))
      die_codec(&codec, "Failed to decode frame.");

@@ -140,8 +136,7 @@ int main(int argc, char **argv) {
  }

  printf("Processed %d frames.\n", frame_cnt);
-  if (vpx_codec_destroy(&codec))
-    die_codec(&codec, "Failed to destroy codec");
+  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");

  printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n",
         info->frame_width, info->frame_height, argv[2]);
--- a/examples/simple_encoder.c
+++ b/examples/simple_encoder.c
@@ -109,32 +109,27 @@ static const char *exec_name;
 void usage_exit(void) {
  fprintf(stderr,
          "Usage: %s <codec> <width> <height> <infile> <outfile> "
-              "<keyframe-interval> [<error-resilient>]\nSee comments in "
-              "simple_encoder.c for more information.\n",
+          "<keyframe-interval> <error-resilient> <frames to encode>\n"
+          "See comments in simple_encoder.c for more information.\n",
          exec_name);
  exit(EXIT_FAILURE);
 }

-static int encode_frame(vpx_codec_ctx_t *codec,
-                        vpx_image_t *img,
-                        int frame_index,
-                        int flags,
-                        VpxVideoWriter *writer) {
+static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img,
+                        int frame_index, int flags, VpxVideoWriter *writer) {
  int got_pkts = 0;
  vpx_codec_iter_t iter = NULL;
  const vpx_codec_cx_pkt_t *pkt = NULL;
-  const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1,
-                                               flags, VPX_DL_GOOD_QUALITY);
-  if (res != VPX_CODEC_OK)
-    die_codec(codec, "Failed to encode frame");
+  const vpx_codec_err_t res =
+      vpx_codec_encode(codec, img, frame_index, 1, flags, VPX_DL_GOOD_QUALITY);
+  if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame");

  while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) {
    got_pkts = 1;

    if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
      const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
-      if (!vpx_video_writer_write_frame(writer,
-                                        pkt->data.frame.buf,
+      if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf,
                                        pkt->data.frame.sz,
                                        pkt->data.frame.pts)) {
        die_codec(codec, "Failed to write compressed frame");
@@ -147,6 +142,7 @@ static int encode_frame(vpx_codec_ctx_t *codec,
  return got_pkts;
 }

+// TODO(tomfinegan): Improve command line parsing and add args for bitrate/fps.
 int main(int argc, char **argv) {
  FILE *infile = NULL;
  vpx_codec_ctx_t codec;
@@ -154,15 +150,14 @@ int main(int argc, char **argv) {
  int frame_count = 0;
  vpx_image_t raw;
  vpx_codec_err_t res;
-  VpxVideoInfo info = {0};
+  VpxVideoInfo info = { 0, 0, 0, { 0, 0 } };
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;
-  const int fps = 30;        // TODO(dkovalev) add command line argument
-  const int bitrate = 200;   // kbit/s TODO(dkovalev) add command line argument
+  const int fps = 30;
+  const int bitrate = 200;
  int keyframe_interval = 0;
-
-  // TODO(dkovalev): Add some simple command line parsing code to make the
-  // command line more flexible.
+  int max_frames = 0;
+  int frames_encoded = 0;
  const char *codec_arg = NULL;
  const char *width_arg = NULL;
  const char *height_arg = NULL;
@@ -172,8 +167,7 @@ int main(int argc, char **argv) {

  exec_name = argv[0];

-  if (argc < 7)
-    die("Invalid number of arguments");
+  if (argc != 9) die("Invalid number of arguments");

  codec_arg = argv[1];
  width_arg = argv[2];
@@ -181,49 +175,44 @@ int main(int argc, char **argv) {
  infile_arg = argv[4];
  outfile_arg = argv[5];
  keyframe_interval_arg = argv[6];
+  max_frames = (int)strtol(argv[8], NULL, 0);

  encoder = get_vpx_encoder_by_name(codec_arg);
-  if (!encoder)
-     die("Unsupported codec.");
+  if (!encoder) die("Unsupported codec.");

  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = strtol(width_arg, NULL, 0);
-  info.frame_height = strtol(height_arg, NULL, 0);
+  info.frame_width = (int)strtol(width_arg, NULL, 0);
+  info.frame_height = (int)strtol(height_arg, NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

-  if (info.frame_width <= 0 ||
-      info.frame_height <= 0 ||
-      (info.frame_width % 2) != 0 ||
-      (info.frame_height % 2) != 0) {
+  if (info.frame_width <= 0 || info.frame_height <= 0 ||
+      (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) {
    die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
  }

  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width,
-                                             info.frame_height, 1)) {
+                     info.frame_height, 1)) {
    die("Failed to allocate image.");
  }

-  keyframe_interval = strtol(keyframe_interval_arg, NULL, 0);
-  if (keyframe_interval < 0)
-    die("Invalid keyframe interval value.");
+  keyframe_interval = (int)strtol(keyframe_interval_arg, NULL, 0);
+  if (keyframe_interval < 0) die("Invalid keyframe interval value.");

  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));

  res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
-  if (res)
-    die_codec(&codec, "Failed to get default codec config.");
+  if (res) die_codec(&codec, "Failed to get default codec config.");

  cfg.g_w = info.frame_width;
  cfg.g_h = info.frame_height;
  cfg.g_timebase.num = info.time_base.numerator;
  cfg.g_timebase.den = info.time_base.denominator;
  cfg.rc_target_bitrate = bitrate;
-  cfg.g_error_resilient = argc > 7 ? strtol(argv[7], NULL, 0) : 0;
+  cfg.g_error_resilient = (vpx_codec_er_flags_t)strtoul(argv[7], NULL, 0);

  writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
-  if (!writer)
-    die("Failed to open %s for writing.", outfile_arg);
+  if (!writer) die("Failed to open %s for writing.", outfile_arg);

  if (!(infile = fopen(infile_arg, "rb")))
    die("Failed to open %s for reading.", infile_arg);
@@ -237,18 +226,20 @@ int main(int argc, char **argv) {
    if (keyframe_interval > 0 && frame_count % keyframe_interval == 0)
      flags |= VPX_EFLAG_FORCE_KF;
    encode_frame(&codec, &raw, frame_count++, flags, writer);
+    frames_encoded++;
+    if (max_frames > 0 && frames_encoded >= max_frames) break;
  }

  // Flush encoder.
-  while (encode_frame(&codec, NULL, -1, 0, writer)) {};
+  while (encode_frame(&codec, NULL, -1, 0, writer)) {
+  }

  printf("\n");
  fclose(infile);
  printf("Processed %d frames.\n", frame_count);

  vpx_img_free(&raw);
-  if (vpx_codec_destroy(&codec))
-    die_codec(&codec, "Failed to destroy codec.");
+  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");

  vpx_video_writer_close(writer);

--- a/examples/twopass_encoder.c
+++ b/examples/twopass_encoder.c
@@ -59,25 +59,23 @@
 static const char *exec_name;

 void usage_exit(void) {
-  fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
+  fprintf(stderr,
+          "Usage: %s <codec> <width> <height> <infile> <outfile> "
+          "<frame limit>\n",
          exec_name);
  exit(EXIT_FAILURE);
 }

-static int get_frame_stats(vpx_codec_ctx_t *ctx,
-                           const vpx_image_t *img,
-                           vpx_codec_pts_t pts,
-                           unsigned int duration,
-                           vpx_enc_frame_flags_t flags,
-                           unsigned int deadline,
+static int get_frame_stats(vpx_codec_ctx_t *ctx, const vpx_image_t *img,
+                           vpx_codec_pts_t pts, unsigned int duration,
+                           vpx_enc_frame_flags_t flags, unsigned int deadline,
                           vpx_fixed_buf_t *stats) {
  int got_pkts = 0;
  vpx_codec_iter_t iter = NULL;
  const vpx_codec_cx_pkt_t *pkt = NULL;
-  const vpx_codec_err_t res = vpx_codec_encode(ctx, img, pts, duration, flags,
-                                               deadline);
-  if (res != VPX_CODEC_OK)
-    die_codec(ctx, "Failed to get frame stats.");
+  const vpx_codec_err_t res =
+      vpx_codec_encode(ctx, img, pts, duration, flags, deadline);
+  if (res != VPX_CODEC_OK) die_codec(ctx, "Failed to get frame stats.");

  while ((pkt = vpx_codec_get_cx_data(ctx, &iter)) != NULL) {
    got_pkts = 1;
@@ -94,20 +92,16 @@ static int get_frame_stats(vpx_codec_ctx_t *ctx,
  return got_pkts;
 }

-static int encode_frame(vpx_codec_ctx_t *ctx,
-                        const vpx_image_t *img,
-                        vpx_codec_pts_t pts,
-                        unsigned int duration,
-                        vpx_enc_frame_flags_t flags,
-                        unsigned int deadline,
+static int encode_frame(vpx_codec_ctx_t *ctx, const vpx_image_t *img,
+                        vpx_codec_pts_t pts, unsigned int duration,
+                        vpx_enc_frame_flags_t flags, unsigned int deadline,
                        VpxVideoWriter *writer) {
  int got_pkts = 0;
  vpx_codec_iter_t iter = NULL;
  const vpx_codec_cx_pkt_t *pkt = NULL;
-  const vpx_codec_err_t res = vpx_codec_encode(ctx, img, pts, duration, flags,
-                                               deadline);
-  if (res != VPX_CODEC_OK)
-    die_codec(ctx, "Failed to encode frame.");
+  const vpx_codec_err_t res =
+      vpx_codec_encode(ctx, img, pts, duration, flags, deadline);
+  if (res != VPX_CODEC_OK) die_codec(ctx, "Failed to encode frame.");

  while ((pkt = vpx_codec_get_cx_data(ctx, &iter)) != NULL) {
    got_pkts = 1;
@@ -115,8 +109,8 @@ static int encode_frame(vpx_codec_ctx_t *ctx,
      const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;

      if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf,
-                                                pkt->data.frame.sz,
-                                                pkt->data.frame.pts))
+                                        pkt->data.frame.sz,
+                                        pkt->data.frame.pts))
        die_codec(ctx, "Failed to write compressed frame.");
      printf(keyframe ? "K" : ".");
      fflush(stdout);
@@ -126,13 +120,12 @@ static int encode_frame(vpx_codec_ctx_t *ctx,
  return got_pkts;
 }

-static vpx_fixed_buf_t pass0(vpx_image_t *raw,
-                             FILE *infile,
+static vpx_fixed_buf_t pass0(vpx_image_t *raw, FILE *infile,
                             const VpxInterface *encoder,
-                             const vpx_codec_enc_cfg_t *cfg) {
+                             const vpx_codec_enc_cfg_t *cfg, int max_frames) {
  vpx_codec_ctx_t codec;
  int frame_count = 0;
-  vpx_fixed_buf_t stats = {NULL, 0};
+  vpx_fixed_buf_t stats = { NULL, 0 };

  if (vpx_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0))
    die_codec(&codec, "Failed to initialize encoder");
@@ -142,37 +135,33 @@ static vpx_fixed_buf_t pass0(vpx_image_t *raw,
    ++frame_count;
    get_frame_stats(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY,
                    &stats);
+    if (max_frames > 0 && frame_count >= max_frames) break;
  }

  // Flush encoder.
-  while (get_frame_stats(&codec, NULL, frame_count, 1, 0,
-                         VPX_DL_GOOD_QUALITY, &stats)) {}
+  while (get_frame_stats(&codec, NULL, frame_count, 1, 0, VPX_DL_GOOD_QUALITY,
+                         &stats)) {
+  }

  printf("Pass 0 complete. Processed %d frames.\n", frame_count);
-  if (vpx_codec_destroy(&codec))
-    die_codec(&codec, "Failed to destroy codec.");
+  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");

  return stats;
 }

-static void pass1(vpx_image_t *raw,
-                  FILE *infile,
-                  const char *outfile_name,
-                  const VpxInterface *encoder,
-                  const vpx_codec_enc_cfg_t *cfg) {
-  VpxVideoInfo info = {
-    encoder->fourcc,
-    cfg->g_w,
-    cfg->g_h,
-    {cfg->g_timebase.num, cfg->g_timebase.den}
-  };
+static void pass1(vpx_image_t *raw, FILE *infile, const char *outfile_name,
+                  const VpxInterface *encoder, const vpx_codec_enc_cfg_t *cfg,
+                  int max_frames) {
+  VpxVideoInfo info = { encoder->fourcc,
+                        cfg->g_w,
+                        cfg->g_h,
+                        { cfg->g_timebase.num, cfg->g_timebase.den } };
  VpxVideoWriter *writer = NULL;
  vpx_codec_ctx_t codec;
  int frame_count = 0;

  writer = vpx_video_writer_open(outfile_name, kContainerIVF, &info);
-  if (!writer)
-    die("Failed to open %s for writing", outfile_name);
+  if (!writer) die("Failed to open %s for writing", outfile_name);

  if (vpx_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0))
    die_codec(&codec, "Failed to initialize encoder");
@@ -181,15 +170,17 @@ static void pass1(vpx_image_t *raw,
  while (vpx_img_read(raw, infile)) {
    ++frame_count;
    encode_frame(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, writer);
+
+    if (max_frames > 0 && frame_count >= max_frames) break;
  }

  // Flush encoder.
-  while (encode_frame(&codec, NULL, -1, 1, 0, VPX_DL_GOOD_QUALITY, writer)) {}
+  while (encode_frame(&codec, NULL, -1, 1, 0, VPX_DL_GOOD_QUALITY, writer)) {
+  }

  printf("\n");

-  if (vpx_codec_destroy(&codec))
-    die_codec(&codec, "Failed to destroy codec.");
+  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");

  vpx_video_writer_close(writer);

@@ -206,26 +197,27 @@ int main(int argc, char **argv) {
  vpx_fixed_buf_t stats;

  const VpxInterface *encoder = NULL;
-  const int fps = 30;        // TODO(dkovalev) add command line argument
-  const int bitrate = 200;   // kbit/s TODO(dkovalev) add command line argument
+  const int fps = 30;       // TODO(dkovalev) add command line argument
+  const int bitrate = 200;  // kbit/s TODO(dkovalev) add command line argument
  const char *const codec_arg = argv[1];
  const char *const width_arg = argv[2];
  const char *const height_arg = argv[3];
  const char *const infile_arg = argv[4];
  const char *const outfile_arg = argv[5];
+  int max_frames = 0;
  exec_name = argv[0];

-  if (argc != 6)
-    die("Invalid number of arguments.");
+  if (argc != 7) die("Invalid number of arguments.");
+
+  max_frames = (int)strtol(argv[6], NULL, 0);

  encoder = get_vpx_encoder_by_name(codec_arg);
-  if (!encoder)
-    die("Unsupported codec.");
+  if (!encoder) die("Unsupported codec.");

-  w = strtol(width_arg, NULL, 0);
-  h = strtol(height_arg, NULL, 0);
+  w = (int)strtol(width_arg, NULL, 0);
+  h = (int)strtol(height_arg, NULL, 0);

-  if (w  <= 0 || h <= 0 || (w % 2) != 0 || (h  % 2) != 0)
+  if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0)
    die("Invalid frame size: %dx%d", w, h);

  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, w, h, 1))
@@ -235,8 +227,7 @@ int main(int argc, char **argv) {

  // Configuration
  res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
-  if (res)
-    die_codec(&codec, "Failed to get default codec config.");
+  if (res) die_codec(&codec, "Failed to get default codec config.");

  cfg.g_w = w;
  cfg.g_h = h;
@@ -249,13 +240,13 @@ int main(int argc, char **argv) {

  // Pass 0
  cfg.g_pass = VPX_RC_FIRST_PASS;
-  stats = pass0(&raw, infile, encoder, &cfg);
+  stats = pass0(&raw, infile, encoder, &cfg, max_frames);

  // Pass 1
  rewind(infile);
  cfg.g_pass = VPX_RC_LAST_PASS;
  cfg.rc_twopass_stats_in = stats;
-  pass1(&raw, infile, outfile_arg, encoder, &cfg);
+  pass1(&raw, infile, outfile_arg, encoder, &cfg, max_frames);
  free(stats.buf);

  vpx_img_free(&raw);
--- a/examples/vp8_multi_resolution_encoder.c
+++ b/examples/vp8_multi_resolution_encoder.c
--- a/examples/vp8cx_set_ref.c
+++ b/examples/vp8cx_set_ref.c
@@ -8,7 +8,6 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-
 // VP8 Set Reference Frame
 // =======================
 //
@@ -52,6 +51,7 @@

 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
+#include "vp8/common/common.h"

 #include "../tools_common.h"
 #include "../video_writer.h"
@@ -64,25 +64,21 @@ void usage_exit(void) {
  exit(EXIT_FAILURE);
 }

-static int encode_frame(vpx_codec_ctx_t *codec,
-                        vpx_image_t *img,
-                        int frame_index,
-                        VpxVideoWriter *writer) {
+static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img,
+                        int frame_index, VpxVideoWriter *writer) {
  int got_pkts = 0;
  vpx_codec_iter_t iter = NULL;
  const vpx_codec_cx_pkt_t *pkt = NULL;
-  const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0,
-                                               VPX_DL_GOOD_QUALITY);
-  if (res != VPX_CODEC_OK)
-    die_codec(codec, "Failed to encode frame");
+  const vpx_codec_err_t res =
+      vpx_codec_encode(codec, img, frame_index, 1, 0, VPX_DL_GOOD_QUALITY);
+  if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame");

  while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) {
    got_pkts = 1;

    if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
      const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
-      if (!vpx_video_writer_write_frame(writer,
-                                        pkt->data.frame.buf,
+      if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf,
                                        pkt->data.frame.sz,
                                        pkt->data.frame.pts)) {
        die_codec(codec, "Failed to write compressed frame");
@@ -98,55 +94,53 @@ static int encode_frame(vpx_codec_ctx_t *codec,

 int main(int argc, char **argv) {
  FILE *infile = NULL;
-  vpx_codec_ctx_t codec = {0};
-  vpx_codec_enc_cfg_t cfg = {0};
+  vpx_codec_ctx_t codec;
+  vpx_codec_enc_cfg_t cfg;
  int frame_count = 0;
  vpx_image_t raw;
  vpx_codec_err_t res;
-  VpxVideoInfo info = {0};
+  VpxVideoInfo info;
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;
  int update_frame_num = 0;
-  const int fps = 30;        // TODO(dkovalev) add command line argument
-  const int bitrate = 200;   // kbit/s TODO(dkovalev) add command line argument
+  const int fps = 30;       // TODO(dkovalev) add command line argument
+  const int bitrate = 200;  // kbit/s TODO(dkovalev) add command line argument
+
+  vp8_zero(codec);
+  vp8_zero(cfg);
+  vp8_zero(info);

  exec_name = argv[0];

-  if (argc != 6)
-    die("Invalid number of arguments");
+  if (argc != 6) die("Invalid number of arguments");

  // TODO(dkovalev): add vp9 support and rename the file accordingly
  encoder = get_vpx_encoder_by_name("vp8");
-  if (!encoder)
-    die("Unsupported codec.");
+  if (!encoder) die("Unsupported codec.");

  update_frame_num = atoi(argv[5]);
-  if (!update_frame_num)
-    die("Couldn't parse frame number '%s'\n", argv[5]);
+  if (!update_frame_num) die("Couldn't parse frame number '%s'\n", argv[5]);

  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = strtol(argv[1], NULL, 0);
-  info.frame_height = strtol(argv[2], NULL, 0);
+  info.frame_width = (int)strtol(argv[1], NULL, 0);
+  info.frame_height = (int)strtol(argv[2], NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

-  if (info.frame_width <= 0 ||
-      info.frame_height <= 0 ||
-      (info.frame_width % 2) != 0 ||
-      (info.frame_height % 2) != 0) {
+  if (info.frame_width <= 0 || info.frame_height <= 0 ||
+      (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) {
    die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
  }

  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width,
-                                             info.frame_height, 1)) {
+                     info.frame_height, 1)) {
    die("Failed to allocate image.");
  }

  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));

  res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
-  if (res)
-    die_codec(&codec, "Failed to get default codec config.");
+  if (res) die_codec(&codec, "Failed to get default codec config.");

  cfg.g_w = info.frame_width;
  cfg.g_h = info.frame_height;
@@ -155,8 +149,7 @@ int main(int argc, char **argv) {
  cfg.rc_target_bitrate = bitrate;

  writer = vpx_video_writer_open(argv[4], kContainerIVF, &info);
-  if (!writer)
-    die("Failed to open %s for writing.", argv[4]);
+  if (!writer) die("Failed to open %s for writing.", argv[4]);

  if (!(infile = fopen(argv[3], "rb")))
    die("Failed to open %s for reading.", argv[3]);
@@ -178,15 +171,15 @@ int main(int argc, char **argv) {
  }

  // Flush encoder.
-  while (encode_frame(&codec, NULL, -1, writer)) {}
+  while (encode_frame(&codec, NULL, -1, writer)) {
+  }

  printf("\n");
  fclose(infile);
  printf("Processed %d frames.\n", frame_count);

  vpx_img_free(&raw);
-  if (vpx_codec_destroy(&codec))
-    die_codec(&codec, "Failed to destroy codec.");
+  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");

  vpx_video_writer_close(writer);

--- a/examples/vp9_lossless_encoder.c
+++ b/examples/vp9_lossless_encoder.c
@@ -14,6 +14,7 @@

 #include "vpx/vpx_encoder.h"
 #include "vpx/vp8cx.h"
+#include "vp9/common/vp9_common.h"

 #include "../tools_common.h"
 #include "../video_writer.h"
@@ -21,32 +22,28 @@
 static const char *exec_name;

 void usage_exit(void) {
-  fprintf(stderr, "vp9_lossless_encoder: Example demonstrating VP9 lossless "
-                  "encoding feature. Supports raw input only.\n");
+  fprintf(stderr,
+          "vp9_lossless_encoder: Example demonstrating VP9 lossless "
+          "encoding feature. Supports raw input only.\n");
  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);
  exit(EXIT_FAILURE);
 }

-static int encode_frame(vpx_codec_ctx_t *codec,
-                        vpx_image_t *img,
-                        int frame_index,
-                        int flags,
-                        VpxVideoWriter *writer) {
+static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img,
+                        int frame_index, int flags, VpxVideoWriter *writer) {
  int got_pkts = 0;
  vpx_codec_iter_t iter = NULL;
  const vpx_codec_cx_pkt_t *pkt = NULL;
-  const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1,
-                                               flags, VPX_DL_GOOD_QUALITY);
-  if (res != VPX_CODEC_OK)
-    die_codec(codec, "Failed to encode frame");
+  const vpx_codec_err_t res =
+      vpx_codec_encode(codec, img, frame_index, 1, flags, VPX_DL_GOOD_QUALITY);
+  if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame");

  while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) {
    got_pkts = 1;

    if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
      const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
-      if (!vpx_video_writer_write_frame(writer,
-                                        pkt->data.frame.buf,
+      if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf,
                                        pkt->data.frame.sz,
                                        pkt->data.frame.pts)) {
        die_codec(codec, "Failed to write compressed frame");
@@ -66,43 +63,40 @@ int main(int argc, char **argv) {
  int frame_count = 0;
  vpx_image_t raw;
  vpx_codec_err_t res;
-  VpxVideoInfo info = {0};
+  VpxVideoInfo info;
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;
  const int fps = 30;

+  vp9_zero(info);
+
  exec_name = argv[0];

-  if (argc < 5)
-    die("Invalid number of arguments");
+  if (argc < 5) die("Invalid number of arguments");

  encoder = get_vpx_encoder_by_name("vp9");
-  if (!encoder)
-     die("Unsupported codec.");
+  if (!encoder) die("Unsupported codec.");

  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = strtol(argv[1], NULL, 0);
-  info.frame_height = strtol(argv[2], NULL, 0);
+  info.frame_width = (int)strtol(argv[1], NULL, 0);
+  info.frame_height = (int)strtol(argv[2], NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

-  if (info.frame_width <= 0 ||
-      info.frame_height <= 0 ||
-      (info.frame_width % 2) != 0 ||
-      (info.frame_height % 2) != 0) {
+  if (info.frame_width <= 0 || info.frame_height <= 0 ||
+      (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) {
    die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
  }

  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width,
-                                             info.frame_height, 1)) {
+                     info.frame_height, 1)) {
    die("Failed to allocate image.");
  }

  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));

  res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
-  if (res)
-    die_codec(&codec, "Failed to get default codec config.");
+  if (res) die_codec(&codec, "Failed to get default codec config.");

  cfg.g_w = info.frame_width;
  cfg.g_h = info.frame_height;
@@ -110,8 +104,7 @@ int main(int argc, char **argv) {
  cfg.g_timebase.den = info.time_base.denominator;

  writer = vpx_video_writer_open(argv[4], kContainerIVF, &info);
-  if (!writer)
-    die("Failed to open %s for writing.", argv[4]);
+  if (!writer) die("Failed to open %s for writing.", argv[4]);

  if (!(infile = fopen(argv[3], "rb")))
    die("Failed to open %s for reading.", argv[3]);
@@ -128,15 +121,15 @@ int main(int argc, char **argv) {
  }

  // Flush encoder.
-  while (encode_frame(&codec, NULL, -1, 0, writer)) {}
+  while (encode_frame(&codec, NULL, -1, 0, writer)) {
+  }

  printf("\n");
  fclose(infile);
  printf("Processed %d frames.\n", frame_count);

  vpx_img_free(&raw);
-  if (vpx_codec_destroy(&codec))
-    die_codec(&codec, "Failed to destroy codec.");
+  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");

  vpx_video_writer_close(writer);

--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -20,15 +20,16 @@
 #include <string.h>
 #include <time.h>

-
 #include "../args.h"
 #include "../tools_common.h"
 #include "../video_writer.h"

+#include "../vpx_ports/vpx_timer.h"
 #include "vpx/svc_context.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
 #include "../vpxstats.h"
+#include "vp9/encoder/vp9_encoder.h"
 #define OUTPUT_RC_STATS 1

 static const arg_def_t skip_frames_arg =
@@ -52,8 +53,9 @@ static const arg_def_t spatial_layers_arg =
 static const arg_def_t temporal_layers_arg =
    ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
 static const arg_def_t temporal_layering_mode_arg =
-    ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
-        "VP9E_TEMPORAL_LAYERING_MODE");
+    ARG_DEF("tlm", "temporal-layering-mode", 1,
+            "temporal layering scheme."
+            "VP9E_TEMPORAL_LAYERING_MODE");
 static const arg_def_t kf_dist_arg =
    ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
 static const arg_def_t scale_factors_arg =
@@ -73,44 +75,59 @@ static const arg_def_t min_bitrate_arg =
 static const arg_def_t max_bitrate_arg =
    ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
 static const arg_def_t lag_in_frame_arg =
-    ARG_DEF(NULL, "lag-in-frames", 1, "Number of frame to input before "
-        "generating any outputs");
+    ARG_DEF(NULL, "lag-in-frames", 1,
+            "Number of frame to input before "
+            "generating any outputs");
 static const arg_def_t rc_end_usage_arg =
    ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
 static const arg_def_t speed_arg =
    ARG_DEF("sp", "speed", 1, "speed configuration");
+static const arg_def_t aqmode_arg =
+    ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
+static const arg_def_t bitrates_arg =
+    ARG_DEF("bl", "bitrates", 1, "bitrates[sl * num_tl + tl]");

 #if CONFIG_VP9_HIGHBITDEPTH
 static const struct arg_enum_list bitdepth_enum[] = {
-  {"8",  VPX_BITS_8},
-  {"10", VPX_BITS_10},
-  {"12", VPX_BITS_12},
-  {NULL, 0}
+  { "8", VPX_BITS_8 }, { "10", VPX_BITS_10 }, { "12", VPX_BITS_12 }, { NULL, 0 }
 };

-static const arg_def_t bitdepth_arg =
-    ARG_DEF_ENUM("d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ",
-                 bitdepth_enum);
+static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
+    "d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ", bitdepth_enum);
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-
-static const arg_def_t *svc_args[] = {
-  &frames_arg,        &width_arg,         &height_arg,
-  &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &spatial_layers_arg,
-  &kf_dist_arg,       &scale_factors_arg, &passes_arg,      &pass_arg,
-  &fpf_name_arg,      &min_q_arg,         &max_q_arg,       &min_bitrate_arg,
-  &max_bitrate_arg,   &temporal_layers_arg, &temporal_layering_mode_arg,
-  &lag_in_frame_arg,  &threads_arg,
+static const arg_def_t *svc_args[] = { &frames_arg,
+                                       &width_arg,
+                                       &height_arg,
+                                       &timebase_arg,
+                                       &bitrate_arg,
+                                       &skip_frames_arg,
+                                       &spatial_layers_arg,
+                                       &kf_dist_arg,
+                                       &scale_factors_arg,
+                                       &passes_arg,
+                                       &pass_arg,
+                                       &fpf_name_arg,
+                                       &min_q_arg,
+                                       &max_q_arg,
+                                       &min_bitrate_arg,
+                                       &max_bitrate_arg,
+                                       &temporal_layers_arg,
+                                       &temporal_layering_mode_arg,
+                                       &lag_in_frame_arg,
+                                       &threads_arg,
+                                       &aqmode_arg,
 #if OUTPUT_RC_STATS
-  &output_rc_stats_arg,
+                                       &output_rc_stats_arg,
 #endif

 #if CONFIG_VP9_HIGHBITDEPTH
-  &bitdepth_arg,
+                                       &bitdepth_arg,
 #endif
-  &speed_arg,
-  &rc_end_usage_arg,  NULL
-};
+                                       &speed_arg,
+                                       &rc_end_usage_arg,
+                                       &bitrates_arg,
+                                       NULL };

 static const uint32_t default_frames_to_skip = 0;
 static const uint32_t default_frames_to_code = 60 * 60;
@@ -124,7 +141,7 @@ static const uint32_t default_temporal_layers = 1;
 static const uint32_t default_kf_dist = 100;
 static const uint32_t default_temporal_layering_mode = 0;
 static const uint32_t default_output_rc_stats = 0;
-static const int32_t default_speed = -1;  // -1 means use library default.
+static const int32_t default_speed = -1;    // -1 means use library default.
 static const uint32_t default_threads = 0;  // zero means use library default.

 typedef struct {
@@ -151,7 +168,7 @@ void usage_exit(void) {
 static void parse_command_line(int argc, const char **argv_,
                               AppInput *app_input, SvcContext *svc_ctx,
                               vpx_codec_enc_cfg_t *enc_cfg) {
-  struct arg arg = {0};
+  struct arg arg = { 0 };
  char **argv = NULL;
  char **argi = NULL;
  char **argj = NULL;
@@ -161,7 +178,7 @@ static void parse_command_line(int argc, const char **argv_,
  const char *fpf_file_name = NULL;
  unsigned int min_bitrate = 0;
  unsigned int max_bitrate = 0;
-  char string_options[1024] = {0};
+  char string_options[1024] = { 0 };

  // initialize SvcContext with parameters that will be passed to vpx_svc_init
  svc_ctx->log_level = SVC_LOG_DEBUG;
@@ -220,11 +237,13 @@ static void parse_command_line(int argc, const char **argv_,
 #endif
    } else if (arg_match(&arg, &speed_arg, argi)) {
      svc_ctx->speed = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &aqmode_arg, argi)) {
+      svc_ctx->aqmode = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &threads_arg, argi)) {
      svc_ctx->threads = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
-      svc_ctx->temporal_layering_mode =
-          enc_cfg->temporal_layering_mode = arg_parse_int(&arg);
+      svc_ctx->temporal_layering_mode = enc_cfg->temporal_layering_mode =
+          arg_parse_int(&arg);
      if (svc_ctx->temporal_layering_mode) {
        enc_cfg->g_error_resilient = 1;
      }
@@ -234,6 +253,9 @@ static void parse_command_line(int argc, const char **argv_,
    } else if (arg_match(&arg, &scale_factors_arg, argi)) {
      snprintf(string_options, sizeof(string_options), "%s scale-factors=%s",
               string_options, arg.val);
+    } else if (arg_match(&arg, &bitrates_arg, argi)) {
+      snprintf(string_options, sizeof(string_options), "%s bitrates=%s",
+               string_options, arg.val);
    } else if (arg_match(&arg, &passes_arg, argi)) {
      passes = arg_parse_uint(&arg);
      if (passes < 1 || passes > 2) {
@@ -272,7 +294,7 @@ static void parse_command_line(int argc, const char **argv_,
          enc_cfg->g_input_bit_depth = 10;
          enc_cfg->g_profile = 2;
          break;
-         case VPX_BITS_12:
+        case VPX_BITS_12:
          enc_cfg->g_input_bit_depth = 12;
          enc_cfg->g_profile = 2;
          break;
@@ -354,9 +376,8 @@ static void parse_command_line(int argc, const char **argv_,
      "num: %d, den: %d, bitrate: %d,\n"
      "gop size: %d\n",
      vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
-      app_input->frames_to_skip,
-      svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
-      enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
+      app_input->frames_to_skip, svc_ctx->spatial_layers, enc_cfg->g_w,
+      enc_cfg->g_h, enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
 }

@@ -393,7 +414,7 @@ struct RateControlStats {
 // Note: these rate control stats assume only 1 key frame in the
 // sequence (i.e., first frame only).
 static void set_rate_control_stats(struct RateControlStats *rc,
-                                     vpx_codec_enc_cfg_t *cfg) {
+                                   vpx_codec_enc_cfg_t *cfg) {
  unsigned int sl, tl;
  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
  // per-frame-bandwidth, for the rate control encoding stats below.
@@ -402,19 +423,18 @@ static void set_rate_control_stats(struct RateControlStats *rc,
  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
      const int layer = sl * cfg->ts_number_layers + tl;
-      const int tlayer0 = sl * cfg->ts_number_layers;
-      rc->layer_framerate[layer] =
-          framerate / cfg->ts_rate_decimator[tl];
+      if (cfg->ts_number_layers == 1)
+        rc->layer_framerate[layer] = framerate;
+      else
+        rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl];
      if (tl > 0) {
-        rc->layer_pfb[layer] = 1000.0 *
-            (cfg->layer_target_bitrate[layer] -
-                cfg->layer_target_bitrate[layer - 1]) /
-            (rc->layer_framerate[layer] -
-                rc->layer_framerate[layer - 1]);
+        rc->layer_pfb[layer] =
+            1000.0 * (cfg->layer_target_bitrate[layer] -
+                      cfg->layer_target_bitrate[layer - 1]) /
+            (rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
      } else {
-        rc->layer_pfb[tlayer0] = 1000.0 *
-            cfg->layer_target_bitrate[tlayer0] /
-            rc->layer_framerate[tlayer0];
+        rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] /
+                               rc->layer_framerate[layer];
      }
      rc->layer_input_frames[layer] = 0;
      rc->layer_enc_frames[layer] = 0;
@@ -434,35 +454,38 @@ static void printout_rate_control_summary(struct RateControlStats *rc,
                                          vpx_codec_enc_cfg_t *cfg,
                                          int frame_cnt) {
  unsigned int sl, tl;
-  int tot_num_frames = 0;
  double perc_fluctuation = 0.0;
+  int tot_num_frames = 0;
  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
  printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
-      cfg->ss_number_layers, cfg->ts_number_layers);
+         cfg->ss_number_layers, cfg->ts_number_layers);
  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+    tot_num_frames = 0;
    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
      const int layer = sl * cfg->ts_number_layers + tl;
-      const int num_dropped = (tl > 0) ?
-          (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) :
-          (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1);
-      if (!sl)
-        tot_num_frames += rc->layer_input_frames[layer];
+      const int num_dropped =
+          (tl > 0)
+              ? (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer])
+              : (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] -
+                 1);
+      tot_num_frames += rc->layer_input_frames[layer];
      rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
-          rc->layer_encoding_bitrate[layer] / tot_num_frames;
-      rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] /
-          rc->layer_enc_frames[layer];
-      rc->layer_avg_rate_mismatch[layer] =
-          100.0 * rc->layer_avg_rate_mismatch[layer] /
-          rc->layer_enc_frames[layer];
+                                          rc->layer_encoding_bitrate[layer] /
+                                          tot_num_frames;
+      rc->layer_avg_frame_size[layer] =
+          rc->layer_avg_frame_size[layer] / rc->layer_enc_frames[layer];
+      rc->layer_avg_rate_mismatch[layer] = 100.0 *
+                                           rc->layer_avg_rate_mismatch[layer] /
+                                           rc->layer_enc_frames[layer];
      printf("For layer#: sl%d tl%d \n", sl, tl);
      printf("Bitrate (target vs actual): %d %f.0 kbps\n",
             cfg->layer_target_bitrate[layer],
             rc->layer_encoding_bitrate[layer]);
      printf("Average frame size (target vs actual): %f %f bits\n",
             rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
-      printf("Average rate_mismatch: %f\n",
-             rc->layer_avg_rate_mismatch[layer]);
-      printf("Number of input frames, encoded (non-key) frames, "
+      printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[layer]);
+      printf(
+          "Number of input frames, encoded (non-key) frames, "
          "and percent dropped frames: %d %d %f.0 \n",
          rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
          100.0 * num_dropped / rc->layer_input_frames[layer]);
@@ -474,19 +497,18 @@ static void printout_rate_control_summary(struct RateControlStats *rc,
      rc->variance_st_encoding_bitrate / rc->window_count -
      (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
-      rc->avg_st_encoding_bitrate;
+                     rc->avg_st_encoding_bitrate;
  printf("Short-time stats, for window of %d frames: \n", rc->window_size);
  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
-         rc->avg_st_encoding_bitrate,
-         sqrt(rc->variance_st_encoding_bitrate),
+         rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
         perc_fluctuation);
  if (frame_cnt != tot_num_frames)
    die("Error: Number of input frames not equal to output encoded frames != "
-        "%d tot_num_frames = %d\n", frame_cnt, tot_num_frames);
+        "%d tot_num_frames = %d\n",
+        frame_cnt, tot_num_frames);
 }

-vpx_codec_err_t parse_superframe_index(const uint8_t *data,
-                                       size_t data_sz,
+vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz,
                                       uint32_t sizes[8], int *count) {
  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
  // it is a super frame index. If the last byte of real video compression
@@ -499,7 +521,6 @@ vpx_codec_err_t parse_superframe_index(const uint8_t *data,
  marker = *(data + data_sz - 1);
  *count = 0;

-
  if ((marker & 0xe0) == 0xc0) {
    const uint32_t frames = (marker & 0x7) + 1;
    const uint32_t mag = ((marker >> 3) & 0x3) + 1;
@@ -507,8 +528,7 @@ vpx_codec_err_t parse_superframe_index(const uint8_t *data,

    // This chunk is marked as having a superframe index but doesn't have
    // enough data for it, thus it's an invalid superframe index.
-    if (data_sz < index_sz)
-      return VPX_CODEC_CORRUPT_FRAME;
+    if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME;

    {
      const uint8_t marker2 = *(data + data_sz - index_sz);
@@ -516,8 +536,7 @@ vpx_codec_err_t parse_superframe_index(const uint8_t *data,
      // This chunk is marked as having a superframe index but doesn't have
      // the matching marker byte at the front of the index therefore it's an
      // invalid chunk.
-      if (marker != marker2)
-        return VPX_CODEC_CORRUPT_FRAME;
+      if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME;
    }

    {
@@ -528,8 +547,7 @@ vpx_codec_err_t parse_superframe_index(const uint8_t *data,
      for (i = 0; i < frames; ++i) {
        uint32_t this_sz = 0;

-        for (j = 0; j < mag; ++j)
-          this_sz |= (*x++) << (j * 8);
+        for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8);
        sizes[i] = this_sz;
      }
      *count = frames;
@@ -539,10 +557,58 @@ vpx_codec_err_t parse_superframe_index(const uint8_t *data,
 }
 #endif

+// Example pattern for spatial layers and 2 temporal layers used in the
+// bypass/flexible mode. The pattern corresponds to the pattern
+// VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
+// non-flexible mode.
+void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
+                                 int is_key_frame,
+                                 vpx_svc_ref_frame_config_t *ref_frame_config) {
+  for (sl = 0; sl < num_spatial_layers; ++sl) {
+    if (!tl) {
+      if (!sl) {
+        ref_frame_config->frame_flags[sl] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
+            VP8_EFLAG_NO_UPD_ARF;
+      } else {
+        if (is_key_frame) {
+          ref_frame_config->frame_flags[sl] =
+              VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
+              VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+        } else {
+          ref_frame_config->frame_flags[sl] =
+              VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+        }
+      }
+    } else if (tl == 1) {
+      if (!sl) {
+        ref_frame_config->frame_flags[sl] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
+            VP8_EFLAG_NO_UPD_GF;
+      } else {
+        ref_frame_config->frame_flags[sl] =
+            VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
+      }
+    }
+    if (tl == 0) {
+      ref_frame_config->lst_fb_idx[sl] = sl;
+      if (sl)
+        ref_frame_config->gld_fb_idx[sl] = sl - 1;
+      else
+        ref_frame_config->gld_fb_idx[sl] = 0;
+      ref_frame_config->alt_fb_idx[sl] = 0;
+    } else if (tl == 1) {
+      ref_frame_config->lst_fb_idx[sl] = sl;
+      ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1;
+      ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl;
+    }
+  }
+}
+
 int main(int argc, const char **argv) {
-  AppInput app_input = {0};
+  AppInput app_input = { 0 };
  VpxVideoWriter *writer = NULL;
-  VpxVideoInfo info = {0};
+  VpxVideoInfo info = { 0 };
  vpx_codec_ctx_t codec;
  vpx_codec_enc_cfg_t enc_cfg;
  SvcContext svc_ctx;
@@ -556,23 +622,26 @@ int main(int argc, const char **argv) {
  int end_of_stream = 0;
  int frames_received = 0;
 #if OUTPUT_RC_STATS
-  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
+  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = { NULL };
  struct RateControlStats rc;
  vpx_svc_layer_id_t layer_id;
-  int sl, tl;
+  vpx_svc_ref_frame_config_t ref_frame_config;
+  unsigned int sl, tl;
  double sum_bitrate = 0.0;
  double sum_bitrate2 = 0.0;
-  double framerate  = 30.0;
+  double framerate = 30.0;
 #endif
+  struct vpx_usec_timer timer;
+  int64_t cx_time = 0;
  memset(&svc_ctx, 0, sizeof(svc_ctx));
  svc_ctx.log_print = 1;
  exec_name = argv[0];
  parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);

-  // Allocate image buffer
+// Allocate image buffer
 #if CONFIG_VP9_HIGHBITDEPTH
-  if (!vpx_img_alloc(&raw, enc_cfg.g_input_bit_depth == 8 ?
-                         VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016,
+  if (!vpx_img_alloc(&raw, enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420
+                                                          : VPX_IMG_FMT_I42016,
                     enc_cfg.g_w, enc_cfg.g_h, 32)) {
    die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
  }
@@ -603,14 +672,14 @@ int main(int argc, const char **argv) {

  if (!(app_input.passes == 2 && app_input.pass == 1)) {
    // We don't save the bitstream for the 1st pass on two pass rate control
-    writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF,
-                                   &info);
+    writer =
+        vpx_video_writer_open(app_input.output_filename, kContainerIVF, &info);
    if (!writer)
      die("Failed to open %s for writing\n", app_input.output_filename);
  }
 #if OUTPUT_RC_STATS
  // For now, just write temporal layer streams.
-  // TODO(wonkap): do spatial by re-writing superframe.
+  // TODO(marpan): do spatial by re-writing superframe.
  if (svc_ctx.output_rc_stat) {
    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
      char file_name[PATH_MAX];
@@ -618,20 +687,22 @@ int main(int argc, const char **argv) {
      snprintf(file_name, sizeof(file_name), "%s_t%d.ivf",
               app_input.output_filename, tl);
      outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
-      if (!outfile[tl])
-        die("Failed to open %s for writing", file_name);
+      if (!outfile[tl]) die("Failed to open %s for writing", file_name);
    }
  }
 #endif

  // skip initial frames
-  for (i = 0; i < app_input.frames_to_skip; ++i)
-    vpx_img_read(&raw, infile);
+  for (i = 0; i < app_input.frames_to_skip; ++i) vpx_img_read(&raw, infile);

  if (svc_ctx.speed != -1)
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
  if (svc_ctx.threads)
    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
+  if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
+    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+  if (svc_ctx.speed >= 5)
+    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);

  // Encode frames
  while (!end_of_stream) {
@@ -643,11 +714,45 @@ int main(int argc, const char **argv) {
      end_of_stream = 1;
    }

-    res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
-                         pts, frame_duration, svc_ctx.speed >= 5 ?
-                         VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
+    // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates)
+    // and the buffer indices for each spatial layer of the current
+    // (super)frame to be encoded. The temporal layer_id for the current frame
+    // also needs to be set.
+    // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
+    // mode to "VP9E_LAYERING_MODE_BYPASS".
+    if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+      layer_id.spatial_layer_id = 0;
+      // Example for 2 temporal layers.
+      if (frame_cnt % 2 == 0)
+        layer_id.temporal_layer_id = 0;
+      else
+        layer_id.temporal_layer_id = 1;
+      // Note that we only set the temporal layer_id, since we are calling
+      // the encode for the whole superframe. The encoder will internally loop
+      // over all the spatial layers for the current superframe.
+      vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
+      set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id,
+                                  svc_ctx.spatial_layers, frame_cnt == 0,
+                                  &ref_frame_config);
+      vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG,
+                        &ref_frame_config);
+      // Keep track of input frames, to account for frame drops in rate control
+      // stats/metrics.
+      for (sl = 0; sl < (unsigned int)enc_cfg.ss_number_layers; ++sl) {
+        ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
+                                layer_id.temporal_layer_id];
+      }
+    }
+
+    vpx_usec_timer_start(&timer);
+    res = vpx_svc_encode(
+        &svc_ctx, &codec, (end_of_stream ? NULL : &raw), pts, frame_duration,
+        svc_ctx.speed >= 5 ? VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
+    vpx_usec_timer_mark(&timer);
+    cx_time += vpx_usec_timer_elapsed(&timer);

    printf("%s", vpx_svc_get_message(&svc_ctx));
+    fflush(stdout);
    if (res != VPX_CODEC_OK) {
      die_codec(&codec, "Failed to encode frame");
    }
@@ -655,42 +760,48 @@ int main(int argc, const char **argv) {
    while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
      switch (cx_pkt->kind) {
        case VPX_CODEC_CX_FRAME_PKT: {
+          SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal;
          if (cx_pkt->data.frame.sz > 0) {
 #if OUTPUT_RC_STATS
            uint32_t sizes[8];
            int count = 0;
 #endif
-            vpx_video_writer_write_frame(writer,
-                                         cx_pkt->data.frame.buf,
+            vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf,
                                         cx_pkt->data.frame.sz,
                                         cx_pkt->data.frame.pts);
 #if OUTPUT_RC_STATS
-            // TODO(marpan/wonkap): Put this (to line728) in separate function.
+            // TODO(marpan): Put this (to line728) in separate function.
            if (svc_ctx.output_rc_stat) {
              vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
              parse_superframe_index(cx_pkt->data.frame.buf,
                                     cx_pkt->data.frame.sz, sizes, &count);
-              for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-                ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
-                                        layer_id.temporal_layer_id];
+              // Note computing input_layer_frames here won't account for frame
+              // drops in rate control stats.
+              // TODO(marpan): Fix this for non-bypass mode so we can get stats
+              // for dropped frames.
+              if (svc_ctx.temporal_layering_mode !=
+                  VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
+                                          layer_id.temporal_layer_id];
+                }
              }
              for (tl = layer_id.temporal_layer_id;
-                  tl < enc_cfg.ts_number_layers; ++tl) {
-                vpx_video_writer_write_frame(outfile[tl],
-                                             cx_pkt->data.frame.buf,
-                                             cx_pkt->data.frame.sz,
-                                             cx_pkt->data.frame.pts);
+                   tl < enc_cfg.ts_number_layers; ++tl) {
+                vpx_video_writer_write_frame(
+                    outfile[tl], cx_pkt->data.frame.buf, cx_pkt->data.frame.sz,
+                    cx_pkt->data.frame.pts);
              }

              for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
                for (tl = layer_id.temporal_layer_id;
-                    tl < enc_cfg.ts_number_layers; ++tl) {
+                     tl < enc_cfg.ts_number_layers; ++tl) {
                  const int layer = sl * enc_cfg.ts_number_layers + tl;
                  ++rc.layer_tot_enc_frames[layer];
                  rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
                  // Keep count of rate control stats per layer, for non-key
                  // frames.
-                  if (tl == layer_id.temporal_layer_id &&
+                  if (tl == (unsigned int)layer_id.temporal_layer_id &&
                      !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
                    rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
                    rc.layer_avg_rate_mismatch[layer] +=
@@ -704,7 +815,7 @@ int main(int argc, const char **argv) {
              // Update for short-time encoding bitrate states, for moving
              // window of size rc->window, shifted by rc->window / 2.
              // Ignore first window segment, due to key frame.
-              if (frame_cnt > rc.window_size) {
+              if (frame_cnt > (unsigned int)rc.window_size) {
                tl = layer_id.temporal_layer_id;
                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
                  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
@@ -720,41 +831,42 @@ int main(int argc, const char **argv) {
              }

              // Second shifted window.
-              if (frame_cnt > rc.window_size + rc.window_size / 2) {
-               tl = layer_id.temporal_layer_id;
-               for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-                 sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
-               }
+              if (frame_cnt >
+                  (unsigned int)(rc.window_size + rc.window_size / 2)) {
+                tl = layer_id.temporal_layer_id;
+                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                  sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
+                }

-               if (frame_cnt > 2 * rc.window_size &&
-                  frame_cnt % rc.window_size == 0) {
-                 rc.window_count += 1;
-                 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
-                 rc.variance_st_encoding_bitrate +=
-                    (sum_bitrate2 / rc.window_size) *
-                    (sum_bitrate2 / rc.window_size);
-                 sum_bitrate2 = 0.0;
-               }
+                if (frame_cnt > (unsigned int)(2 * rc.window_size) &&
+                    frame_cnt % rc.window_size == 0) {
+                  rc.window_count += 1;
+                  rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
+                  rc.variance_st_encoding_bitrate +=
+                      (sum_bitrate2 / rc.window_size) *
+                      (sum_bitrate2 / rc.window_size);
+                  sum_bitrate2 = 0.0;
+                }
              }
            }
 #endif
          }
-
+          /*
          printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
                 !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
                 (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
+          */
+          if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
+            si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
          ++frames_received;
          break;
        }
        case VPX_CODEC_STATS_PKT: {
-          stats_write(&app_input.rc_stats,
-                      cx_pkt->data.twopass_stats.buf,
+          stats_write(&app_input.rc_stats, cx_pkt->data.twopass_stats.buf,
                      cx_pkt->data.twopass_stats.sz);
          break;
        }
-        default: {
-          break;
-        }
+        default: { break; }
      }
    }

@@ -763,6 +875,16 @@ int main(int argc, const char **argv) {
      pts += frame_duration;
    }
  }
+
+  // Compensate for the extra frame count for the bypass mode.
+  if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+    for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+      const int layer =
+          sl * enc_cfg.ts_number_layers + layer_id.temporal_layer_id;
+      --rc.layer_input_frames[layer];
+    }
+  }
+
  printf("Processed %d frames\n", frame_cnt);
  fclose(infile);
 #if OUTPUT_RC_STATS
@@ -772,8 +894,7 @@ int main(int argc, const char **argv) {
  }
 #endif
  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
-  if (app_input.passes == 2)
-    stats_close(&app_input.rc_stats, 1);
+  if (app_input.passes == 2) stats_close(&app_input.rc_stats, 1);
  if (writer) {
    vpx_video_writer_close(writer);
  }
@@ -784,6 +905,9 @@ int main(int argc, const char **argv) {
    }
  }
 #endif
+  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
+         frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
+         1000000 * (double)frame_cnt / (double)cx_time);
  vpx_img_free(&raw);
  // display average size, psnr
  printf("%s", vpx_svc_dump_statistics(&svc_ctx));
--- a/examples/vp9cx_set_ref.c
+++ b/examples/vp9cx_set_ref.c
@@ -0,0 +1,442 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// VP9 Set Reference Frame
+// ============================
+//
+// This is an example demonstrating how to overwrite the VP9 encoder's
+// internal reference frame. In the sample we set the last frame to the
+// current frame. This technique could be used to bounce between two cameras.
+//
+// The decoder would also have to set the reference frame to the same value
+// on the same frame, or the video will become corrupt. The 'test_decode'
+// variable is set to 1 in this example that tests if the encoder and decoder
+// results are matching.
+//
+// Usage
+// -----
+// This example encodes a raw video. And the last argument passed in specifies
+// the frame number to update the reference frame on. For example, run
+// examples/vp9cx_set_ref 352 288 in.yuv out.ivf 4 30
+// The parameter is parsed as follows:
+//
+//
+// Extra Variables
+// ---------------
+// This example maintains the frame number passed on the command line
+// in the `update_frame_num` variable.
+//
+//
+// Configuration
+// -------------
+//
+// The reference frame is updated on the frame specified on the command
+// line.
+//
+// Observing The Effects
+// ---------------------
+// The encoder and decoder results should be matching when the same reference
+// frame setting operation is done in both encoder and decoder. Otherwise,
+// the encoder/decoder mismatch would be seen.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "vpx/vp8cx.h"
+#include "vpx/vpx_decoder.h"
+#include "vpx/vpx_encoder.h"
+#include "vp9/common/vp9_common.h"
+
+#include "./tools_common.h"
+#include "./video_writer.h"
+
+static const char *exec_name;
+
+void usage_exit() {
+  fprintf(stderr,
+          "Usage: %s <width> <height> <infile> <outfile> "
+          "<frame> <limit(optional)>\n",
+          exec_name);
+  exit(EXIT_FAILURE);
+}
+
+static int compare_img(const vpx_image_t *const img1,
+                       const vpx_image_t *const img2) {
+  uint32_t l_w = img1->d_w;
+  uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
+  const uint32_t c_h =
+      (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
+  uint32_t i;
+  int match = 1;
+
+  match &= (img1->fmt == img2->fmt);
+  match &= (img1->d_w == img2->d_w);
+  match &= (img1->d_h == img2->d_h);
+
+  for (i = 0; i < img1->d_h; ++i)
+    match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
+                     img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
+                     l_w) == 0);
+
+  for (i = 0; i < c_h; ++i)
+    match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
+                     img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
+                     c_w) == 0);
+
+  for (i = 0; i < c_h; ++i)
+    match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
+                     img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
+                     c_w) == 0);
+
+  return match;
+}
+
+#define mmin(a, b) ((a) < (b) ? (a) : (b))
+static void find_mismatch(const vpx_image_t *const img1,
+                          const vpx_image_t *const img2, int yloc[4],
+                          int uloc[4], int vloc[4]) {
+  const uint32_t bsize = 64;
+  const uint32_t bsizey = bsize >> img1->y_chroma_shift;
+  const uint32_t bsizex = bsize >> img1->x_chroma_shift;
+  const uint32_t c_w =
+      (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
+  const uint32_t c_h =
+      (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
+  int match = 1;
+  uint32_t i, j;
+  yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1;
+  for (i = 0, match = 1; match && i < img1->d_h; i += bsize) {
+    for (j = 0; match && j < img1->d_w; j += bsize) {
+      int k, l;
+      const int si = mmin(i + bsize, img1->d_h) - i;
+      const int sj = mmin(j + bsize, img1->d_w) - j;
+      for (k = 0; match && k < si; ++k) {
+        for (l = 0; match && l < sj; ++l) {
+          if (*(img1->planes[VPX_PLANE_Y] +
+                (i + k) * img1->stride[VPX_PLANE_Y] + j + l) !=
+              *(img2->planes[VPX_PLANE_Y] +
+                (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) {
+            yloc[0] = i + k;
+            yloc[1] = j + l;
+            yloc[2] = *(img1->planes[VPX_PLANE_Y] +
+                        (i + k) * img1->stride[VPX_PLANE_Y] + j + l);
+            yloc[3] = *(img2->planes[VPX_PLANE_Y] +
+                        (i + k) * img2->stride[VPX_PLANE_Y] + j + l);
+            match = 0;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1;
+  for (i = 0, match = 1; match && i < c_h; i += bsizey) {
+    for (j = 0; match && j < c_w; j += bsizex) {
+      int k, l;
+      const int si = mmin(i + bsizey, c_h - i);
+      const int sj = mmin(j + bsizex, c_w - j);
+      for (k = 0; match && k < si; ++k) {
+        for (l = 0; match && l < sj; ++l) {
+          if (*(img1->planes[VPX_PLANE_U] +
+                (i + k) * img1->stride[VPX_PLANE_U] + j + l) !=
+              *(img2->planes[VPX_PLANE_U] +
+                (i + k) * img2->stride[VPX_PLANE_U] + j + l)) {
+            uloc[0] = i + k;
+            uloc[1] = j + l;
+            uloc[2] = *(img1->planes[VPX_PLANE_U] +
+                        (i + k) * img1->stride[VPX_PLANE_U] + j + l);
+            uloc[3] = *(img2->planes[VPX_PLANE_U] +
+                        (i + k) * img2->stride[VPX_PLANE_U] + j + l);
+            match = 0;
+            break;
+          }
+        }
+      }
+    }
+  }
+  vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1;
+  for (i = 0, match = 1; match && i < c_h; i += bsizey) {
+    for (j = 0; match && j < c_w; j += bsizex) {
+      int k, l;
+      const int si = mmin(i + bsizey, c_h - i);
+      const int sj = mmin(j + bsizex, c_w - j);
+      for (k = 0; match && k < si; ++k) {
+        for (l = 0; match && l < sj; ++l) {
+          if (*(img1->planes[VPX_PLANE_V] +
+                (i + k) * img1->stride[VPX_PLANE_V] + j + l) !=
+              *(img2->planes[VPX_PLANE_V] +
+                (i + k) * img2->stride[VPX_PLANE_V] + j + l)) {
+            vloc[0] = i + k;
+            vloc[1] = j + l;
+            vloc[2] = *(img1->planes[VPX_PLANE_V] +
+                        (i + k) * img1->stride[VPX_PLANE_V] + j + l);
+            vloc[3] = *(img2->planes[VPX_PLANE_V] +
+                        (i + k) * img2->stride[VPX_PLANE_V] + j + l);
+            match = 0;
+            break;
+          }
+        }
+      }
+    }
+  }
+}
+
+static void testing_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder,
+                           unsigned int frame_out, int *mismatch_seen) {
+  vpx_image_t enc_img, dec_img;
+  struct vp9_ref_frame ref_enc, ref_dec;
+
+  if (*mismatch_seen) return;
+
+  ref_enc.idx = 0;
+  ref_dec.idx = 0;
+  if (vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc))
+    die_codec(encoder, "Failed to get encoder reference frame");
+  enc_img = ref_enc.img;
+  if (vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec))
+    die_codec(decoder, "Failed to get decoder reference frame");
+  dec_img = ref_dec.img;
+
+  if (!compare_img(&enc_img, &dec_img)) {
+    int y[4], u[4], v[4];
+
+    *mismatch_seen = 1;
+
+    find_mismatch(&enc_img, &dec_img, y, u, v);
+    printf(
+        "Encode/decode mismatch on frame %d at"
+        " Y[%d, %d] {%d/%d},"
+        " U[%d, %d] {%d/%d},"
+        " V[%d, %d] {%d/%d}",
+        frame_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1],
+        v[2], v[3]);
+  }
+
+  vpx_img_free(&enc_img);
+  vpx_img_free(&dec_img);
+}
+
+static int encode_frame(vpx_codec_ctx_t *ecodec, vpx_image_t *img,
+                        unsigned int frame_in, VpxVideoWriter *writer,
+                        int test_decode, vpx_codec_ctx_t *dcodec,
+                        unsigned int *frame_out, int *mismatch_seen) {
+  int got_pkts = 0;
+  vpx_codec_iter_t iter = NULL;
+  const vpx_codec_cx_pkt_t *pkt = NULL;
+  int got_data;
+  const vpx_codec_err_t res =
+      vpx_codec_encode(ecodec, img, frame_in, 1, 0, VPX_DL_GOOD_QUALITY);
+  if (res != VPX_CODEC_OK) die_codec(ecodec, "Failed to encode frame");
+
+  got_data = 0;
+
+  while ((pkt = vpx_codec_get_cx_data(ecodec, &iter)) != NULL) {
+    got_pkts = 1;
+
+    if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
+      const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
+
+      if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) {
+        *frame_out += 1;
+      }
+
+      if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf,
+                                        pkt->data.frame.sz,
+                                        pkt->data.frame.pts)) {
+        die_codec(ecodec, "Failed to write compressed frame");
+      }
+      printf(keyframe ? "K" : ".");
+      fflush(stdout);
+      got_data = 1;
+
+      // Decode 1 frame.
+      if (test_decode) {
+        if (vpx_codec_decode(dcodec, pkt->data.frame.buf,
+                             (unsigned int)pkt->data.frame.sz, NULL, 0))
+          die_codec(dcodec, "Failed to decode frame.");
+      }
+    }
+  }
+
+  // Mismatch checking
+  if (got_data && test_decode) {
+    testing_decode(ecodec, dcodec, *frame_out, mismatch_seen);
+  }
+
+  return got_pkts;
+}
+
+int main(int argc, char **argv) {
+  FILE *infile = NULL;
+  // Encoder
+  vpx_codec_ctx_t ecodec;
+  vpx_codec_enc_cfg_t cfg;
+  unsigned int frame_in = 0;
+  vpx_image_t raw;
+  vpx_codec_err_t res;
+  VpxVideoInfo info;
+  VpxVideoWriter *writer = NULL;
+  const VpxInterface *encoder = NULL;
+
+  // Test encoder/decoder mismatch.
+  int test_decode = 1;
+  // Decoder
+  vpx_codec_ctx_t dcodec;
+  unsigned int frame_out = 0;
+
+  // The frame number to set reference frame on
+  unsigned int update_frame_num = 0;
+  int mismatch_seen = 0;
+
+  const int fps = 30;
+  const int bitrate = 500;
+
+  const char *width_arg = NULL;
+  const char *height_arg = NULL;
+  const char *infile_arg = NULL;
+  const char *outfile_arg = NULL;
+  const char *update_frame_num_arg = NULL;
+  unsigned int limit = 0;
+
+  vp9_zero(ecodec);
+  vp9_zero(cfg);
+  vp9_zero(info);
+
+  exec_name = argv[0];
+
+  if (argc < 6) die("Invalid number of arguments");
+
+  width_arg = argv[1];
+  height_arg = argv[2];
+  infile_arg = argv[3];
+  outfile_arg = argv[4];
+  update_frame_num_arg = argv[5];
+
+  encoder = get_vpx_encoder_by_name("vp9");
+  if (!encoder) die("Unsupported codec.");
+
+  update_frame_num = (unsigned int)strtoul(update_frame_num_arg, NULL, 0);
+  // In VP9, the reference buffers (cm->buffer_pool->frame_bufs[i].buf) are
+  // allocated while calling vpx_codec_encode(), thus, setting reference for
+  // 1st frame isn't supported.
+  if (update_frame_num <= 1) {
+    die("Couldn't parse frame number '%s'\n", update_frame_num_arg);
+  }
+
+  if (argc > 6) {
+    limit = (unsigned int)strtoul(argv[6], NULL, 0);
+    if (update_frame_num > limit)
+      die("Update frame number couldn't larger than limit\n");
+  }
+
+  info.codec_fourcc = encoder->fourcc;
+  info.frame_width = (int)strtol(width_arg, NULL, 0);
+  info.frame_height = (int)strtol(height_arg, NULL, 0);
+  info.time_base.numerator = 1;
+  info.time_base.denominator = fps;
+
+  if (info.frame_width <= 0 || info.frame_height <= 0 ||
+      (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) {
+    die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
+  }
+
+  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width,
+                     info.frame_height, 1)) {
+    die("Failed to allocate image.");
+  }
+
+  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
+
+  res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
+  if (res) die_codec(&ecodec, "Failed to get default codec config.");
+
+  cfg.g_w = info.frame_width;
+  cfg.g_h = info.frame_height;
+  cfg.g_timebase.num = info.time_base.numerator;
+  cfg.g_timebase.den = info.time_base.denominator;
+  cfg.rc_target_bitrate = bitrate;
+  cfg.g_lag_in_frames = 3;
+
+  writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
+  if (!writer) die("Failed to open %s for writing.", outfile_arg);
+
+  if (!(infile = fopen(infile_arg, "rb")))
+    die("Failed to open %s for reading.", infile_arg);
+
+  if (vpx_codec_enc_init(&ecodec, encoder->codec_interface(), &cfg, 0))
+    die_codec(&ecodec, "Failed to initialize encoder");
+
+  // Disable alt_ref.
+  if (vpx_codec_control(&ecodec, VP8E_SET_ENABLEAUTOALTREF, 0))
+    die_codec(&ecodec, "Failed to set enable auto alt ref");
+
+  if (test_decode) {
+    const VpxInterface *decoder = get_vpx_decoder_by_name("vp9");
+    if (vpx_codec_dec_init(&dcodec, decoder->codec_interface(), NULL, 0))
+      die_codec(&dcodec, "Failed to initialize decoder.");
+  }
+
+  // Encode frames.
+  while (vpx_img_read(&raw, infile)) {
+    if (limit && frame_in >= limit) break;
+    if (update_frame_num > 1 && frame_out + 1 == update_frame_num) {
+      vpx_ref_frame_t ref;
+      ref.frame_type = VP8_LAST_FRAME;
+      ref.img = raw;
+      // Set reference frame in encoder.
+      if (vpx_codec_control(&ecodec, VP8_SET_REFERENCE, &ref))
+        die_codec(&ecodec, "Failed to set reference frame");
+      printf(" <SET_REF>");
+
+      // If set_reference in decoder is commented out, the enc/dec mismatch
+      // would be seen.
+      if (test_decode) {
+        if (vpx_codec_control(&dcodec, VP8_SET_REFERENCE, &ref))
+          die_codec(&dcodec, "Failed to set reference frame");
+      }
+    }
+
+    encode_frame(&ecodec, &raw, frame_in, writer, test_decode, &dcodec,
+                 &frame_out, &mismatch_seen);
+    frame_in++;
+    if (mismatch_seen) break;
+  }
+
+  // Flush encoder.
+  if (!mismatch_seen)
+    while (encode_frame(&ecodec, NULL, frame_in, writer, test_decode, &dcodec,
+                        &frame_out, &mismatch_seen)) {
+    }
+
+  printf("\n");
+  fclose(infile);
+  printf("Processed %d frames.\n", frame_out);
+
+  if (test_decode) {
+    if (!mismatch_seen)
+      printf("Encoder/decoder results are matching.\n");
+    else
+      printf("Encoder/decoder results are NOT matching.\n");
+  }
+
+  if (test_decode)
+    if (vpx_codec_destroy(&dcodec))
+      die_codec(&dcodec, "Failed to destroy decoder");
+
+  vpx_img_free(&raw);
+  if (vpx_codec_destroy(&ecodec))
+    die_codec(&ecodec, "Failed to destroy encoder.");
+
+  vpx_video_writer_close(writer);
+
+  return EXIT_SUCCESS;
+}
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -28,9 +28,7 @@

 static const char *exec_name;

-void usage_exit(void) {
-  exit(EXIT_FAILURE);
-}
+void usage_exit(void) { exit(EXIT_FAILURE); }

 // Denoiser states, for temporal denoising.
 enum denoiserState {
@@ -41,7 +39,7 @@ enum denoiserState {
  kDenoiserOnAdaptive
 };

-static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3};
+static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 };

 // For rate control encoding stats.
 struct RateControlMetrics {
@@ -86,14 +84,14 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
  // per-frame-bandwidth, for the rate control encoding stats below.
  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
  rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0];
-  rc->layer_pfb[0] = 1000.0 * rc->layer_target_bitrate[0] /
-      rc->layer_framerate[0];
+  rc->layer_pfb[0] =
+      1000.0 * rc->layer_target_bitrate[0] / rc->layer_framerate[0];
  for (i = 0; i < cfg->ts_number_layers; ++i) {
    if (i > 0) {
      rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
-      rc->layer_pfb[i] = 1000.0 *
-          (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
-          (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
+      rc->layer_pfb[i] = 1000.0 * (rc->layer_target_bitrate[i] -
+                                   rc->layer_target_bitrate[i - 1]) /
+                         (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
    }
    rc->layer_input_frames[i] = 0;
    rc->layer_enc_frames[i] = 0;
@@ -114,29 +112,31 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
  unsigned int i = 0;
  int tot_num_frames = 0;
  double perc_fluctuation = 0.0;
-  printf("Total number of processed frames: %d\n\n", frame_cnt -1);
+  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
  printf("Rate control layer stats for %d layer(s):\n\n",
-      cfg->ts_number_layers);
+         cfg->ts_number_layers);
  for (i = 0; i < cfg->ts_number_layers; ++i) {
-    const int num_dropped = (i > 0) ?
-        (rc->layer_input_frames[i] - rc->layer_enc_frames[i]) :
-        (rc->layer_input_frames[i] - rc->layer_enc_frames[i] - 1);
+    const int num_dropped =
+        (i > 0) ? (rc->layer_input_frames[i] - rc->layer_enc_frames[i])
+                : (rc->layer_input_frames[i] - rc->layer_enc_frames[i] - 1);
    tot_num_frames += rc->layer_input_frames[i];
    rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[i] *
-        rc->layer_encoding_bitrate[i] / tot_num_frames;
-    rc->layer_avg_frame_size[i] = rc->layer_avg_frame_size[i] /
-        rc->layer_enc_frames[i];
-    rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] /
-        rc->layer_enc_frames[i];
+                                    rc->layer_encoding_bitrate[i] /
+                                    tot_num_frames;
+    rc->layer_avg_frame_size[i] =
+        rc->layer_avg_frame_size[i] / rc->layer_enc_frames[i];
+    rc->layer_avg_rate_mismatch[i] =
+        100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[i];
    printf("For layer#: %d \n", i);
    printf("Bitrate (target vs actual): %d %f \n", rc->layer_target_bitrate[i],
           rc->layer_encoding_bitrate[i]);
    printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i],
           rc->layer_avg_frame_size[i]);
    printf("Average rate_mismatch: %f \n", rc->layer_avg_rate_mismatch[i]);
-    printf("Number of input frames, encoded (non-key) frames, "
-        "and perc dropped frames: %d %d %f \n", rc->layer_input_frames[i],
-        rc->layer_enc_frames[i],
+    printf(
+        "Number of input frames, encoded (non-key) frames, "
+        "and perc dropped frames: %d %d %f \n",
+        rc->layer_input_frames[i], rc->layer_enc_frames[i],
        100.0 * num_dropped / rc->layer_input_frames[i]);
    printf("\n");
  }
@@ -145,11 +145,10 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
      rc->variance_st_encoding_bitrate / rc->window_count -
      (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
-      rc->avg_st_encoding_bitrate;
-  printf("Short-time stats, for window of %d frames: \n",rc->window_size);
+                     rc->avg_st_encoding_bitrate;
+  printf("Short-time stats, for window of %d frames: \n", rc->window_size);
  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
-         rc->avg_st_encoding_bitrate,
-         sqrt(rc->variance_st_encoding_bitrate),
+         rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
         perc_fluctuation);
  if ((frame_cnt - 1) != tot_num_frames)
    die("Error: Number of input frames not equal to output! \n");
@@ -167,20 +166,20 @@ static void set_temporal_layer_pattern(int layering_mode,
  switch (layering_mode) {
    case 0: {
      // 1-layer.
-      int ids[1] = {0};
+      int ids[1] = { 0 };
      cfg->ts_periodicity = 1;
      *flag_periodicity = 1;
      cfg->ts_number_layers = 1;
      cfg->ts_rate_decimator[0] = 1;
      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
      // Update L only.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[0] =
+          VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
      break;
    }
    case 1: {
      // 2-layers, 2-frame period.
-      int ids[2] = {0, 1};
+      int ids[2] = { 0, 1 };
      cfg->ts_periodicity = 2;
      *flag_periodicity = 2;
      cfg->ts_number_layers = 2;
@@ -189,22 +188,24 @@ static void set_temporal_layer_pattern(int layering_mode,
      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
 #if 1
      // 0=L, 1=GF, Intra-layer prediction enabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-      layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_REF_ARF;
+      layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF |
+                       VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF |
+                       VP8_EFLAG_NO_REF_ARF;
+      layer_flags[1] =
+          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_ARF;
 #else
-       // 0=L, 1=GF, Intra-layer prediction disabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
+      // 0=L, 1=GF, Intra-layer prediction disabled.
+      layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF |
+                       VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF |
+                       VP8_EFLAG_NO_REF_ARF;
      layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST;
+                       VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST;
 #endif
      break;
    }
    case 2: {
      // 2-layers, 3-frame period.
-      int ids[3] = {0, 1, 1};
+      int ids[3] = { 0, 1, 1 };
      cfg->ts_periodicity = 3;
      *flag_periodicity = 3;
      cfg->ts_number_layers = 2;
@@ -212,16 +213,17 @@ static void set_temporal_layer_pattern(int layering_mode,
      cfg->ts_rate_decimator[1] = 1;
      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
      // 0=L, 1=GF, Intra-layer prediction enabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[1] =
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF  | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
+      layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
+                       VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
+                       VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[1] = layer_flags[2] =
+          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF |
+          VP8_EFLAG_NO_UPD_LAST;
      break;
    }
    case 3: {
      // 3-layers, 6-frame period.
-      int ids[6] = {0, 2, 2, 1, 2, 2};
+      int ids[6] = { 0, 2, 2, 1, 2, 2 };
      cfg->ts_periodicity = 6;
      *flag_periodicity = 6;
      cfg->ts_number_layers = 3;
@@ -230,19 +232,18 @@ static void set_temporal_layer_pattern(int layering_mode,
      cfg->ts_rate_decimator[2] = 1;
      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
      // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_UPD_LAST;
-      layer_flags[1] =
-      layer_flags[2] =
-      layer_flags[4] =
-      layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
+      layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
+                       VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
+                       VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[3] =
+          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
+      layer_flags[1] = layer_flags[2] = layer_flags[4] = layer_flags[5] =
+          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
      break;
    }
    case 4: {
      // 3-layers, 4-frame period.
-      int ids[4] = {0, 2, 1, 2};
+      int ids[4] = { 0, 2, 1, 2 };
      cfg->ts_periodicity = 4;
      *flag_periodicity = 4;
      cfg->ts_number_layers = 3;
@@ -251,39 +252,41 @@ static void set_temporal_layer_pattern(int layering_mode,
      cfg->ts_rate_decimator[2] = 1;
      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
      // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
+                       VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
+                       VP8_EFLAG_NO_UPD_ARF;
      layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
-      layer_flags[1] =
-      layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+                       VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
+      layer_flags[1] = layer_flags[3] =
+          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+          VP8_EFLAG_NO_UPD_ARF;
      break;
    }
    case 5: {
      // 3-layers, 4-frame period.
-      int ids[4] = {0, 2, 1, 2};
+      int ids[4] = { 0, 2, 1, 2 };
      cfg->ts_periodicity = 4;
      *flag_periodicity = 4;
-      cfg->ts_number_layers     = 3;
+      cfg->ts_number_layers = 3;
      cfg->ts_rate_decimator[0] = 4;
      cfg->ts_rate_decimator[1] = 2;
      cfg->ts_rate_decimator[2] = 1;
      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
      // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, disabled
      // in layer 2.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
+      layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
+                       VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
+                       VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[2] =
+          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[1] = layer_flags[3] =
+          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
          VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[1] =
-      layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
      break;
    }
    case 6: {
      // 3-layers, 4-frame period.
-      int ids[4] = {0, 2, 1, 2};
+      int ids[4] = { 0, 2, 1, 2 };
      cfg->ts_periodicity = 4;
      *flag_periodicity = 4;
      cfg->ts_number_layers = 3;
@@ -292,18 +295,19 @@ static void set_temporal_layer_pattern(int layering_mode,
      cfg->ts_rate_decimator[2] = 1;
      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
      // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[1] =
-      layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
+      layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
+                       VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
+                       VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[2] =
+          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[1] = layer_flags[3] =
+          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
      break;
    }
    case 7: {
      // NOTE: Probably of academic interest only.
      // 5-layers, 16-frame period.
-      int ids[16] = {0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4};
+      int ids[16] = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4 };
      cfg->ts_periodicity = 16;
      *flag_periodicity = 16;
      cfg->ts_number_layers = 5;
@@ -313,28 +317,21 @@ static void set_temporal_layer_pattern(int layering_mode,
      cfg->ts_rate_decimator[3] = 2;
      cfg->ts_rate_decimator[4] = 1;
      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      layer_flags[0]  = VPX_EFLAG_FORCE_KF;
-      layer_flags[1]  =
-      layer_flags[3]  =
-      layer_flags[5]  =
-      layer_flags[7]  =
-      layer_flags[9]  =
-      layer_flags[11] =
-      layer_flags[13] =
-      layer_flags[15] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[2]  =
-      layer_flags[6]  =
-      layer_flags[10] =
-      layer_flags[14] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF;
-      layer_flags[4] =
-      layer_flags[12] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[8]  = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF;
+      layer_flags[0] = VPX_EFLAG_FORCE_KF;
+      layer_flags[1] = layer_flags[3] = layer_flags[5] = layer_flags[7] =
+          layer_flags[9] = layer_flags[11] = layer_flags[13] = layer_flags[15] =
+              VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+              VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[2] = layer_flags[6] = layer_flags[10] = layer_flags[14] =
+          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF;
+      layer_flags[4] = layer_flags[12] =
+          VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[8] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF;
      break;
    }
    case 8: {
      // 2-layers, with sync point at first frame of layer 1.
-      int ids[2] = {0, 1};
+      int ids[2] = { 0, 1 };
      cfg->ts_periodicity = 2;
      *flag_periodicity = 8;
      cfg->ts_number_layers = 2;
@@ -346,17 +343,17 @@ static void set_temporal_layer_pattern(int layering_mode,
      // key frame. Sync point every 8 frames.

      // Layer 0: predict from L and ARF, update L and G.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[0] =
+          VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF;
      // Layer 1: sync point: predict from L and ARF, and update G.
-      layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[1] =
+          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
      // Layer 0, predict from L and ARF, update L.
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF  | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[2] =
+          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
      // Layer 1: predict from L, G and ARF, and update G.
      layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ENTROPY;
+                       VP8_EFLAG_NO_UPD_ENTROPY;
      // Layer 0.
      layer_flags[4] = layer_flags[2];
      // Layer 1.
@@ -365,11 +362,11 @@ static void set_temporal_layer_pattern(int layering_mode,
      layer_flags[6] = layer_flags[4];
      // Layer 1.
      layer_flags[7] = layer_flags[5];
-     break;
+      break;
    }
    case 9: {
      // 3-layers: Sync points for layer 1 and 2 every 8 frames.
-      int ids[4] = {0, 2, 1, 2};
+      int ids[4] = { 0, 2, 1, 2 };
      cfg->ts_periodicity = 4;
      *flag_periodicity = 8;
      cfg->ts_number_layers = 3;
@@ -378,20 +375,21 @@ static void set_temporal_layer_pattern(int layering_mode,
      cfg->ts_rate_decimator[2] = 1;
      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
      // 0=L, 1=GF, 2=ARF.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
+                       VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
+                       VP8_EFLAG_NO_UPD_ARF;
      layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+                       VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
+      layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+                       VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[3] = layer_flags[5] =
          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF   | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[3] =
-      layer_flags[5] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
      layer_flags[4] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[6] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ARF;
+                       VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[6] =
+          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
      layer_flags[7] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_ENTROPY;
+                       VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_ENTROPY;
      break;
    }
    case 10: {
@@ -399,7 +397,7 @@ static void set_temporal_layer_pattern(int layering_mode,
      // and is only updated on key frame.
      // Sync points for layer 1 and 2 every 8 frames.

-      int ids[4] = {0, 2, 1, 2};
+      int ids[4] = { 0, 2, 1, 2 };
      cfg->ts_periodicity = 4;
      *flag_periodicity = 8;
      cfg->ts_number_layers = 3;
@@ -409,21 +407,21 @@ static void set_temporal_layer_pattern(int layering_mode,
      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
      // 0=L, 1=GF, 2=ARF.
      // Layer 0: predict from L and ARF; update L and G.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_REF_GF;
+      layer_flags[0] =
+          VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF;
      // Layer 2: sync point: predict from L and ARF; update none.
      layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ENTROPY;
+                       VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
+                       VP8_EFLAG_NO_UPD_ENTROPY;
      // Layer 1: sync point: predict from L and ARF; update G.
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_UPD_LAST;
+      layer_flags[2] =
+          VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
      // Layer 2: predict from L, G, ARF; update none.
      layer_flags[3] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY;
+                       VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY;
      // Layer 0: predict from L and ARF; update L.
-      layer_flags[4] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_REF_GF;
+      layer_flags[4] =
+          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF;
      // Layer 2: predict from L, G, ARF; update none.
      layer_flags[5] = layer_flags[3];
      // Layer 1: predict from L, G, ARF; update G.
@@ -432,11 +430,36 @@ static void set_temporal_layer_pattern(int layering_mode,
      layer_flags[7] = layer_flags[3];
      break;
    }
-    case 11:
+    case 11: {
+      // 3-layers structure with one reference frame.
+      // This works same as temporal_layering_mode 3.
+      // This was added to compare with vp9_spatial_svc_encoder.
+
+      // 3-layers, 4-frame period.
+      int ids[4] = { 0, 2, 1, 2 };
+      cfg->ts_periodicity = 4;
+      *flag_periodicity = 4;
+      cfg->ts_number_layers = 3;
+      cfg->ts_rate_decimator[0] = 4;
+      cfg->ts_rate_decimator[1] = 2;
+      cfg->ts_rate_decimator[2] = 1;
+      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
+      // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
+      layer_flags[0] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+                       VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+      layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+                       VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
+      layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+                       VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
+      layer_flags[3] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
+                       VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
+      break;
+    }
+    case 12:
    default: {
      // 3-layers structure as in case 10, but no sync/refresh points for
      // layer 1 and 2.
-      int ids[4] = {0, 2, 1, 2};
+      int ids[4] = { 0, 2, 1, 2 };
      cfg->ts_periodicity = 4;
      *flag_periodicity = 8;
      cfg->ts_number_layers = 3;
@@ -446,15 +469,15 @@ static void set_temporal_layer_pattern(int layering_mode,
      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
      // 0=L, 1=GF, 2=ARF.
      // Layer 0: predict from L and ARF; update L.
-      layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_REF_GF;
+      layer_flags[0] =
+          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF;
      layer_flags[4] = layer_flags[0];
      // Layer 1: predict from L, G, ARF; update G.
      layer_flags[2] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
      layer_flags[6] = layer_flags[2];
      // Layer 2: predict from L, G, ARF; update none.
      layer_flags[1] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY;
+                       VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY;
      layer_flags[3] = layer_flags[1];
      layer_flags[5] = layer_flags[1];
      layer_flags[7] = layer_flags[1];
@@ -464,7 +487,7 @@ static void set_temporal_layer_pattern(int layering_mode,
 }

 int main(int argc, char **argv) {
-  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
+  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = { NULL };
  vpx_codec_ctx_t codec;
  vpx_codec_enc_cfg_t cfg;
  int frame_cnt = 0;
@@ -477,21 +500,21 @@ int main(int argc, char **argv) {
  int got_data;
  int flags = 0;
  unsigned int i;
-  int pts = 0;  // PTS starts at 0.
+  int pts = 0;             // PTS starts at 0.
  int frame_duration = 1;  // 1 timebase tick per frame.
  int layering_mode = 0;
-  int layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
+  int layer_flags[VPX_TS_MAX_PERIODICITY] = { 0 };
  int flag_periodicity = 1;
 #if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
-  vpx_svc_layer_id_t layer_id = {0, 0};
+  vpx_svc_layer_id_t layer_id = { 0, 0 };
 #else
-  vpx_svc_layer_id_t layer_id = {0};
+  vpx_svc_layer_id_t layer_id = { 0 };
 #endif
  const VpxInterface *encoder = NULL;
  FILE *infile = NULL;
  struct RateControlMetrics rc;
  int64_t cx_time = 0;
-  const int min_args_base = 11;
+  const int min_args_base = 12;
 #if CONFIG_VP9_HIGHBITDEPTH
  vpx_bit_depth_t bit_depth = VPX_BITS_8;
  int input_bit_depth = 8;
@@ -501,37 +524,38 @@ int main(int argc, char **argv) {
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  double sum_bitrate = 0.0;
  double sum_bitrate2 = 0.0;
-  double framerate  = 30.0;
+  double framerate = 30.0;

  exec_name = argv[0];
  // Check usage and arguments.
  if (argc < min_args) {
 #if CONFIG_VP9_HIGHBITDEPTH
    die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
-        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> "
-        "<Rate_0> ... <Rate_nlayers-1> <bit-depth> \n", argv[0]);
+        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <threads> <mode> "
+        "<Rate_0> ... <Rate_nlayers-1> <bit-depth> \n",
+        argv[0]);
 #else
    die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
-        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> "
-        "<Rate_0> ... <Rate_nlayers-1> \n", argv[0]);
+        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <threads> <mode> "
+        "<Rate_0> ... <Rate_nlayers-1> \n",
+        argv[0]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  }

  encoder = get_vpx_encoder_by_name(argv[3]);
-  if (!encoder)
-    die("Unsupported codec.");
+  if (!encoder) die("Unsupported codec.");

  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));

-  width = strtol(argv[4], NULL, 0);
-  height = strtol(argv[5], NULL, 0);
+  width = (unsigned int)strtoul(argv[4], NULL, 0);
+  height = (unsigned int)strtoul(argv[5], NULL, 0);
  if (width < 16 || width % 2 || height < 16 || height % 2) {
    die("Invalid resolution: %d x %d", width, height);
  }

-  layering_mode = strtol(argv[10], NULL, 0);
-  if (layering_mode < 0 || layering_mode > 12) {
-    die("Invalid layering mode (0..12) %s", argv[10]);
+  layering_mode = (int)strtol(argv[11], NULL, 0);
+  if (layering_mode < 0 || layering_mode > 13) {
+    die("Invalid layering mode (0..12) %s", argv[11]);
  }

  if (argc != min_args + mode_to_num_layers[layering_mode]) {
@@ -539,7 +563,7 @@ int main(int argc, char **argv) {
  }

 #if CONFIG_VP9_HIGHBITDEPTH
-  switch (strtol(argv[argc-1], NULL, 0)) {
+  switch (strtol(argv[argc - 1], NULL, 0)) {
    case 8:
      bit_depth = VPX_BITS_8;
      input_bit_depth = 8;
@@ -552,13 +576,11 @@ int main(int argc, char **argv) {
      bit_depth = VPX_BITS_12;
      input_bit_depth = 12;
      break;
-    default:
-      die("Invalid bit depth (8, 10, 12) %s", argv[argc-1]);
+    default: die("Invalid bit depth (8, 10, 12) %s", argv[argc - 1]);
  }
-  if (!vpx_img_alloc(&raw,
-                     bit_depth == VPX_BITS_8 ? VPX_IMG_FMT_I420 :
-                                               VPX_IMG_FMT_I42016,
-                     width, height, 32)) {
+  if (!vpx_img_alloc(
+          &raw, bit_depth == VPX_BITS_8 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016,
+          width, height, 32)) {
    die("Failed to allocate image", width, height);
  }
 #else
@@ -587,31 +609,29 @@ int main(int argc, char **argv) {
 #endif  // CONFIG_VP9_HIGHBITDEPTH

  // Timebase format e.g. 30fps: numerator=1, demoninator = 30.
-  cfg.g_timebase.num = strtol(argv[6], NULL, 0);
-  cfg.g_timebase.den = strtol(argv[7], NULL, 0);
+  cfg.g_timebase.num = (int)strtol(argv[6], NULL, 0);
+  cfg.g_timebase.den = (int)strtol(argv[7], NULL, 0);

-  speed = strtol(argv[8], NULL, 0);
+  speed = (int)strtol(argv[8], NULL, 0);
  if (speed < 0) {
    die("Invalid speed setting: must be positive");
  }

  for (i = min_args_base;
-       (int)i < min_args_base + mode_to_num_layers[layering_mode];
-       ++i) {
-    rc.layer_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
+       (int)i < min_args_base + mode_to_num_layers[layering_mode]; ++i) {
+    rc.layer_target_bitrate[i - 12] = (int)strtol(argv[i], NULL, 0);
    if (strncmp(encoder->name, "vp8", 3) == 0)
-      cfg.ts_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
+      cfg.ts_target_bitrate[i - 12] = rc.layer_target_bitrate[i - 12];
    else if (strncmp(encoder->name, "vp9", 3) == 0)
-      cfg.layer_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
+      cfg.layer_target_bitrate[i - 12] = rc.layer_target_bitrate[i - 12];
  }

  // Real time parameters.
-  cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0);
+  cfg.rc_dropframe_thresh = (unsigned int)strtoul(argv[9], NULL, 0);
  cfg.rc_end_usage = VPX_CBR;
  cfg.rc_min_quantizer = 2;
  cfg.rc_max_quantizer = 56;
-  if (strncmp(encoder->name, "vp9", 3) == 0)
-    cfg.rc_max_quantizer = 52;
+  if (strncmp(encoder->name, "vp9", 3) == 0) cfg.rc_max_quantizer = 52;
  cfg.rc_undershoot_pct = 50;
  cfg.rc_overshoot_pct = 50;
  cfg.rc_buf_initial_sz = 500;
@@ -622,11 +642,11 @@ int main(int argc, char **argv) {
  cfg.rc_resize_allowed = 0;

  // Use 1 thread as default.
-  cfg.g_threads = 1;
+  cfg.g_threads = (unsigned int)strtoul(argv[10], NULL, 0);

  // Enable error resilient mode.
  cfg.g_error_resilient = 1;
-  cfg.g_lag_in_frames   = 0;
+  cfg.g_lag_in_frames = 0;
  cfg.kf_mode = VPX_KF_AUTO;

  // Disable automatic keyframe placement.
@@ -634,9 +654,7 @@ int main(int argc, char **argv) {

  cfg.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;

-  set_temporal_layer_pattern(layering_mode,
-                             &cfg,
-                             layer_flags,
+  set_temporal_layer_pattern(layering_mode, &cfg, layer_flags,
                             &flag_periodicity);

  set_rate_control_metrics(&rc, &cfg);
@@ -663,15 +681,14 @@ int main(int argc, char **argv) {

    snprintf(file_name, sizeof(file_name), "%s_%d.ivf", argv[2], i);
    outfile[i] = vpx_video_writer_open(file_name, kContainerIVF, &info);
-    if (!outfile[i])
-      die("Failed to open %s for writing", file_name);
+    if (!outfile[i]) die("Failed to open %s for writing", file_name);

    assert(outfile[i] != NULL);
  }
  // No spatial layers in this encoder.
  cfg.ss_number_layers = 1;

-  // Initialize codec.
+// Initialize codec.
 #if CONFIG_VP9_HIGHBITDEPTH
  if (vpx_codec_enc_init(
          &codec, encoder->codec_interface(), &cfg,
@@ -684,16 +701,21 @@ int main(int argc, char **argv) {
  if (strncmp(encoder->name, "vp8", 3) == 0) {
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
-    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
+    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
+    vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0);
  } else if (strncmp(encoder->name, "vp9", 3) == 0) {
    vpx_svc_extra_cfg_t svc_params;
+    memset(&svc_params, 0, sizeof(svc_params));
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+    vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0);
+    vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
    vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
-    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
-    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
+    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);
+    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
+    vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
-    if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0))
+    if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1 : 0))
      die_codec(&codec, "Failed to set SVC");
    for (i = 0; i < cfg.ts_number_layers; ++i) {
      svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
@@ -734,14 +756,12 @@ int main(int argc, char **argv) {
                        layer_id.temporal_layer_id);
    }
    flags = layer_flags[frame_cnt % flag_periodicity];
-    if (layering_mode == 0)
-      flags = 0;
+    if (layering_mode == 0) flags = 0;
    frame_avail = vpx_img_read(&raw, infile);
-    if (frame_avail)
-      ++rc.layer_input_frames[layer_id.temporal_layer_id];
+    if (frame_avail) ++rc.layer_input_frames[layer_id.temporal_layer_id];
    vpx_usec_timer_start(&timer);
-    if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, 1, flags,
-        VPX_DL_REALTIME)) {
+    if (vpx_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags,
+                         VPX_DL_REALTIME)) {
      die_codec(&codec, "Failed to encode frame");
    }
    vpx_usec_timer_mark(&timer);
@@ -751,12 +771,12 @@ int main(int argc, char **argv) {
      layer_flags[0] &= ~VPX_EFLAG_FORCE_KF;
    }
    got_data = 0;
-    while ( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
+    while ((pkt = vpx_codec_get_cx_data(&codec, &iter))) {
      got_data = 1;
      switch (pkt->kind) {
        case VPX_CODEC_CX_FRAME_PKT:
          for (i = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
-              i < cfg.ts_number_layers; ++i) {
+               i < cfg.ts_number_layers; ++i) {
            vpx_video_writer_write_frame(outfile[i], pkt->data.frame.buf,
                                         pkt->data.frame.sz, pts);
            ++rc.layer_tot_enc_frames[i];
@@ -799,8 +819,7 @@ int main(int argc, char **argv) {
            }
          }
          break;
-          default:
-            break;
+        default: break;
      }
    }
    ++frame_cnt;
@@ -810,16 +829,13 @@ int main(int argc, char **argv) {
  printout_rate_control_summary(&rc, &cfg, frame_cnt);
  printf("\n");
  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
-          frame_cnt,
-          1000 * (float)cx_time / (double)(frame_cnt * 1000000),
-          1000000 * (double)frame_cnt / (double)cx_time);
+         frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
+         1000000 * (double)frame_cnt / (double)cx_time);

-  if (vpx_codec_destroy(&codec))
-    die_codec(&codec, "Failed to destroy codec");
+  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");

  // Try to rewrite the output file headers with the actual frame count.
-  for (i = 0; i < cfg.ts_number_layers; ++i)
-    vpx_video_writer_close(outfile[i]);
+  for (i = 0; i < cfg.ts_number_layers; ++i) vpx_video_writer_close(outfile[i]);

  vpx_img_free(&raw);
  return EXIT_SUCCESS;
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -23,7 +23,7 @@ static void fix_framerate(int *num, int *den) {
  // we can guess the framerate using only the timebase in this
  // case. Other files would require reading ahead to guess the
  // timebase, like we do for webm.
-  if (*num < 1000) {
+  if (*den > 0 && *den < 1000000000 && *num > 0 && *num < 1000) {
    // Correct for the factor of 2 applied to the timebase in the encoder.
    if (*num & 1)
      *den *= 2;
@@ -46,7 +46,8 @@ int file_is_ivf(struct VpxInputContext *input_ctx) {
      is_ivf = 1;

      if (mem_get_le16(raw_hdr + 4) != 0) {
-        fprintf(stderr, "Error: Unrecognized IVF version! This file may not"
+        fprintf(stderr,
+                "Error: Unrecognized IVF version! This file may not"
                " decode properly.");
      }

@@ -69,14 +70,13 @@ int file_is_ivf(struct VpxInputContext *input_ctx) {
  return is_ivf;
 }

-int ivf_read_frame(FILE *infile, uint8_t **buffer,
-                   size_t *bytes_read, size_t *buffer_size) {
-  char raw_header[IVF_FRAME_HDR_SZ] = {0};
+int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read,
+                   size_t *buffer_size) {
+  char raw_header[IVF_FRAME_HDR_SZ] = { 0 };
  size_t frame_size = 0;

  if (fread(raw_header, IVF_FRAME_HDR_SZ, 1, infile) != 1) {
-    if (!feof(infile))
-      warn("Failed to read frame size\n");
+    if (!feof(infile)) warn("Failed to read frame size\n");
  } else {
    frame_size = mem_get_le32(raw_header);

--- a/ivfdec.h
+++ b/ivfdec.h
@@ -18,11 +18,11 @@ extern "C" {

 int file_is_ivf(struct VpxInputContext *input);

-int ivf_read_frame(FILE *infile, uint8_t **buffer,
-                   size_t *bytes_read, size_t *buffer_size);
+int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read,
+                   size_t *buffer_size);

 #ifdef __cplusplus
-}  /* extern "C" */
+} /* extern "C" */
 #endif

 #endif  // IVFDEC_H_
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -13,10 +13,8 @@
 #include "vpx/vpx_encoder.h"
 #include "vpx_ports/mem_ops.h"

-void ivf_write_file_header(FILE *outfile,
-                           const struct vpx_codec_enc_cfg *cfg,
-                           unsigned int fourcc,
-                           int frame_cnt) {
+void ivf_write_file_header(FILE *outfile, const struct vpx_codec_enc_cfg *cfg,
+                           unsigned int fourcc, int frame_cnt) {
  char header[32];

  header[0] = 'D';
--- a/ivfenc.h
+++ b/ivfenc.h
@@ -19,17 +19,15 @@ struct vpx_codec_cx_pkt;
 extern "C" {
 #endif

-void ivf_write_file_header(FILE *outfile,
-                           const struct vpx_codec_enc_cfg *cfg,
-                           uint32_t fourcc,
-                           int frame_cnt);
+void ivf_write_file_header(FILE *outfile, const struct vpx_codec_enc_cfg *cfg,
+                           uint32_t fourcc, int frame_cnt);

 void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size);

 void ivf_write_frame_size(FILE *outfile, size_t frame_size);

 #ifdef __cplusplus
-}  /* extern "C" */
+} /* extern "C" */
 #endif

 #endif  // IVFENC_H_
--- a/libs.mk
+++ b/libs.mk
@@ -12,7 +12,7 @@
 # ARM assembly files are written in RVCT-style. We use some make magic to
 # filter those files to allow GCC compilation
 ifeq ($(ARCH_ARM),yes)
-  ASM:=$(if $(filter yes,$(CONFIG_GCC)$(CONFIG_MSVS)),.asm.s,.asm)
+  ASM:=$(if $(filter yes,$(CONFIG_GCC)$(CONFIG_MSVS)),.asm.S,.asm)
 else
  ASM:=.asm
 endif
@@ -50,7 +50,10 @@ CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS))
 include $(SRC_PATH_BARE)/vpx_dsp/vpx_dsp.mk
 CODEC_SRCS-yes += $(addprefix vpx_dsp/,$(call enabled,DSP_SRCS))

-ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
+include $(SRC_PATH_BARE)/vpx_util/vpx_util.mk
+CODEC_SRCS-yes += $(addprefix vpx_util/,$(call enabled,UTIL_SRCS))
+
+ifeq ($(CONFIG_VP8),yes)
  VP8_PREFIX=vp8/
  include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
 endif
@@ -73,7 +76,7 @@ ifeq ($(CONFIG_VP8_DECODER),yes)
  CODEC_DOC_SECTIONS += vp8 vp8_decoder
 endif

-ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
+ifeq ($(CONFIG_VP9),yes)
  VP9_PREFIX=vp9/
  include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk
 endif
@@ -103,9 +106,6 @@ ifeq ($(CONFIG_VP9_DECODER),yes)
  CODEC_DOC_SECTIONS += vp9 vp9_decoder
 endif

-VP9_PREFIX=vp9/
-$(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra
-
 ifeq ($(CONFIG_ENCODERS),yes)
  CODEC_DOC_SECTIONS += encoder
 endif
@@ -113,6 +113,12 @@ ifeq ($(CONFIG_DECODERS),yes)
  CODEC_DOC_SECTIONS += decoder
 endif

+# Suppress -Wextra warnings in third party code.
+$(BUILD_PFX)third_party/googletest/%.cc.o: CXXFLAGS += -Wno-missing-field-initializers
+# Suppress -Wextra warnings in first party code pending investigation.
+# https://bugs.chromium.org/p/webm/issues/detail?id=1069
+$(BUILD_PFX)vp8/encoder/onyx_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered
+$(BUILD_PFX)vp8/decoder/onyxd_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered

 ifeq ($(CONFIG_MSVS),yes)
 CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd)
@@ -146,6 +152,9 @@ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
 endif
 CODEC_EXPORTS-yes += vpx/exports_com
 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
+ifeq ($(CONFIG_SPATIAL_SVC),yes)
+CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_spatial_svc
+endif
 CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec

 INSTALL-LIBS-yes += include/vpx/vpx_codec.h
@@ -223,8 +232,8 @@ OBJS-yes += $(LIBVPX_OBJS)
 LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
 $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)

-SO_VERSION_MAJOR := 2
-SO_VERSION_MINOR := 0
+SO_VERSION_MAJOR := 4
+SO_VERSION_MINOR := 1
 SO_VERSION_PATCH := 0
 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
 LIBVPX_SO               := libvpx.$(SO_VERSION_MAJOR).dylib
@@ -233,6 +242,12 @@ EXPORT_FILE             := libvpx.syms
 LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \
                             libvpx.dylib  )
 else
+ifeq ($(filter iphonesimulator%,$(TGT_OS)),$(TGT_OS))
+LIBVPX_SO               := libvpx.$(SO_VERSION_MAJOR).dylib
+SHARED_LIB_SUF          := .dylib
+EXPORT_FILE             := libvpx.syms
+LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, libvpx.dylib)
+else
 ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS))
 LIBVPX_SO               := libvpx$(SO_VERSION_MAJOR).dll
 SHARED_LIB_SUF          := _dll.a
@@ -248,6 +263,7 @@ LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \
                             libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR))
 endif
 endif
+endif

 LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\
                           $(notdir $(LIBVPX_SO_SYMLINKS)) \
@@ -350,13 +366,19 @@ endif
 #
 # Add assembler dependencies for configuration.
 #
-$(filter %.s.o,$(OBJS-yes)):     $(BUILD_PFX)vpx_config.asm
+$(filter %.S.o,$(OBJS-yes)):     $(BUILD_PFX)vpx_config.asm
 $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm


 $(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h)
 CLEAN-OBJS += $(BUILD_PFX)vpx_version.h

+#
+# Add include path for libwebm sources.
+#
+ifeq ($(CONFIG_WEBM_IO),yes)
+  CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/libwebm
+endif

 ##
 ## libvpx test directives
@@ -369,7 +391,7 @@ LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
 LIBVPX_TEST_BIN=./test_libvpx$(EXE_SFX)
 LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
                     $(call enabled,LIBVPX_TEST_DATA))
-libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)
+libvpx_test_data_url=https://storage.googleapis.com/downloads.webmproject.org/test_data/libvpx/$(1)

 TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
 TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
@@ -383,7 +405,7 @@ CLEAN-OBJS += libvpx_test_srcs.txt
 $(LIBVPX_TEST_DATA): $(SRC_PATH_BARE)/test/test-data.sha1
 	@echo "    [DOWNLOAD] $@"
 	$(qexec)trap 'rm -f $@' INT TERM &&\
-            curl -L -o $@ $(call libvpx_test_data_url,$(@F))
+            curl --retry 1 -L -o $@ $(call libvpx_test_data_url,$(@F))

 testdata:: $(LIBVPX_TEST_DATA)
 	$(qexec)[ -x "$$(which sha1sum)" ] && sha1sum=sha1sum;\
@@ -392,12 +414,10 @@ testdata:: $(LIBVPX_TEST_DATA)
          if [ -n "$${sha1sum}" ]; then\
            set -e;\
            echo "Checking test data:";\
-            if [ -n "$(LIBVPX_TEST_DATA)" ]; then\
-                for f in $(call enabled,LIBVPX_TEST_DATA); do\
-                    grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\
-                        (cd $(LIBVPX_TEST_DATA_PATH); $${sha1sum} -c);\
-                done; \
-            fi; \
+            for f in $(call enabled,LIBVPX_TEST_DATA); do\
+                grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\
+                    (cd $(LIBVPX_TEST_DATA_PATH); $${sha1sum} -c);\
+            done; \
        else\
            echo "Skipping test data integrity check, sha1sum not found.";\
        fi
@@ -434,6 +454,7 @@ test_libvpx.$(VCPROJ_SFX): $(LIBVPX_TEST_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_
            $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
            --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
            -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
+            $(if $(CONFIG_WEBM_IO),-I"$(SRC_PATH_BARE)/third_party/libwebm") \
            -L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^

 PROJECTS-$(CONFIG_MSVS) += test_libvpx.$(VCPROJ_SFX)
--- a/md5_utils.c
+++ b/md5_utils.c
@@ -20,19 +20,17 @@
 * Still in the public domain.
 */

-#include <string.h>   /* for memcpy() */
+#include <string.h> /* for memcpy() */

 #include "md5_utils.h"

-static void
-byteSwap(UWORD32 *buf, unsigned words) {
+static void byteSwap(UWORD32 *buf, unsigned words) {
  md5byte *p;

  /* Only swap bytes for big endian machines */
  int i = 1;

-  if (*(char *)&i == 1)
-    return;
+  if (*(char *)&i == 1) return;

  p = (md5byte *)buf;

@@ -47,8 +45,7 @@ byteSwap(UWORD32 *buf, unsigned words) {
 * Start MD5 accumulation.  Set bit count to 0 and buffer to mysterious
 * initialization constants.
 */
-void
-MD5Init(struct MD5Context *ctx) {
+void MD5Init(struct MD5Context *ctx) {
  ctx->buf[0] = 0x67452301;
  ctx->buf[1] = 0xefcdab89;
  ctx->buf[2] = 0x98badcfe;
@@ -62,8 +59,7 @@ MD5Init(struct MD5Context *ctx) {
 * Update context to reflect the concatenation of another buffer full
 * of bytes.
 */
-void
-MD5Update(struct MD5Context *ctx, md5byte const *buf, unsigned len) {
+void MD5Update(struct MD5Context *ctx, md5byte const *buf, unsigned len) {
  UWORD32 t;

  /* Update byte count */
@@ -71,9 +67,9 @@ MD5Update(struct MD5Context *ctx, md5byte const *buf, unsigned len) {
  t = ctx->bytes[0];

  if ((ctx->bytes[0] = t + len) < t)
-    ctx->bytes[1]++;  /* Carry from low to high */
+    ctx->bytes[1]++; /* Carry from low to high */

-  t = 64 - (t & 0x3f);  /* Space available in ctx->in (at least 1) */
+  t = 64 - (t & 0x3f); /* Space available in ctx->in (at least 1) */

  if (t > len) {
    memcpy((md5byte *)ctx->in + 64 - t, buf, len);
@@ -104,8 +100,7 @@ MD5Update(struct MD5Context *ctx, md5byte const *buf, unsigned len) {
 * Final wrapup - pad to 64-byte boundary with the bit pattern
 * 1 0* (64-bit count of bits processed, MSB-first)
 */
-void
-MD5Final(md5byte digest[16], struct MD5Context *ctx) {
+void MD5Final(md5byte digest[16], struct MD5Context *ctx) {
  int count = ctx->bytes[0] & 0x3f; /* Number of bytes in ctx->in */
  md5byte *p = (md5byte *)ctx->in + count;

@@ -115,7 +110,7 @@ MD5Final(md5byte digest[16], struct MD5Context *ctx) {
  /* Bytes of padding needed to make 56 bytes (-8..55) */
  count = 56 - 1 - count;

-  if (count < 0) {  /* Padding forces an extra block */
+  if (count < 0) { /* Padding forces an extra block */
    memset(p, 0, count + 8);
    byteSwap(ctx->in, 16);
    MD5Transform(ctx->buf, ctx->in);
@@ -147,16 +142,27 @@ MD5Final(md5byte digest[16], struct MD5Context *ctx) {
 #define F4(x, y, z) (y ^ (x | ~z))

 /* This is the central step in the MD5 algorithm. */
-#define MD5STEP(f,w,x,y,z,in,s) \
-  (w += f(x,y,z) + in, w = (w<<s | w>>(32-s)) + x)
+#define MD5STEP(f, w, x, y, z, in, s) \
+  (w += f(x, y, z) + in, w = (w << s | w >> (32 - s)) + x)
+
+#if defined(__clang__) && defined(__has_attribute)
+#if __has_attribute(no_sanitize)
+#define VPX_NO_UNSIGNED_OVERFLOW_CHECK \
+  __attribute__((no_sanitize("unsigned-integer-overflow")))
+#endif
+#endif
+
+#ifndef VPX_NO_UNSIGNED_OVERFLOW_CHECK
+#define VPX_NO_UNSIGNED_OVERFLOW_CHECK
+#endif

 /*
 * The core of the MD5 algorithm, this alters an existing MD5 hash to
 * reflect the addition of 16 longwords of new data.  MD5Update blocks
 * the data and converts bytes into longwords for this routine.
 */
-void
-MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) {
+VPX_NO_UNSIGNED_OVERFLOW_CHECK void MD5Transform(UWORD32 buf[4],
+                                                 UWORD32 const in[16]) {
  register UWORD32 a, b, c, d;

  a = buf[0];
@@ -238,4 +244,6 @@ MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) {
  buf[3] += d;
 }

+#undef VPX_NO_UNSIGNED_OVERFLOW_CHECK
+
 #endif
--- a/rate_hist.c
+++ b/rate_hist.c
@@ -45,8 +45,7 @@ struct rate_hist *init_rate_histogram(const vpx_codec_enc_cfg_t *cfg,
  hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000;

  // prevent division by zero
-  if (hist->samples == 0)
-    hist->samples = 1;
+  if (hist->samples == 0) hist->samples = 1;

  hist->frames = 0;
  hist->total = 0;
@@ -78,18 +77,16 @@ void update_rate_histogram(struct rate_hist *hist,
  int64_t avg_bitrate = 0;
  int64_t sum_sz = 0;
  const int64_t now = pkt->data.frame.pts * 1000 *
-                          (uint64_t)cfg->g_timebase.num /
-                              (uint64_t)cfg->g_timebase.den;
+                      (uint64_t)cfg->g_timebase.num /
+                      (uint64_t)cfg->g_timebase.den;

  int idx = hist->frames++ % hist->samples;
  hist->pts[idx] = now;
  hist->sz[idx] = (int)pkt->data.frame.sz;

-  if (now < cfg->rc_buf_initial_sz)
-    return;
+  if (now < cfg->rc_buf_initial_sz) return;

-  if (!cfg->rc_target_bitrate)
-    return;
+  if (!cfg->rc_target_bitrate) return;

  then = now;

@@ -98,20 +95,16 @@ void update_rate_histogram(struct rate_hist *hist,
    const int i_idx = (i - 1) % hist->samples;

    then = hist->pts[i_idx];
-    if (now - then > cfg->rc_buf_sz)
-      break;
+    if (now - then > cfg->rc_buf_sz) break;
    sum_sz += hist->sz[i_idx];
  }

-  if (now == then)
-    return;
+  if (now == then) return;

  avg_bitrate = sum_sz * 8 * 1000 / (now - then);
  idx = (int)(avg_bitrate * (RATE_BINS / 2) / (cfg->rc_target_bitrate * 1000));
-  if (idx < 0)
-    idx = 0;
-  if (idx > RATE_BINS - 1)
-    idx = RATE_BINS - 1;
+  if (idx < 0) idx = 0;
+  if (idx > RATE_BINS - 1) idx = RATE_BINS - 1;
  if (hist->bucket[idx].low > avg_bitrate)
    hist->bucket[idx].low = (int)avg_bitrate;
  if (hist->bucket[idx].high < avg_bitrate)
@@ -120,8 +113,8 @@ void update_rate_histogram(struct rate_hist *hist,
  hist->total++;
 }

-static int merge_hist_buckets(struct hist_bucket *bucket,
-                              int max_buckets, int *num_buckets) {
+static int merge_hist_buckets(struct hist_bucket *bucket, int max_buckets,
+                              int *num_buckets) {
  int small_bucket = 0, merge_bucket = INT_MAX, big_bucket = 0;
  int buckets = *num_buckets;
  int i;
@@ -129,10 +122,8 @@ static int merge_hist_buckets(struct hist_bucket *bucket,
  /* Find the extrema for this list of buckets */
  big_bucket = small_bucket = 0;
  for (i = 0; i < buckets; i++) {
-    if (bucket[i].count < bucket[small_bucket].count)
-      small_bucket = i;
-    if (bucket[i].count > bucket[big_bucket].count)
-      big_bucket = i;
+    if (bucket[i].count < bucket[small_bucket].count) small_bucket = i;
+    if (bucket[i].count > bucket[big_bucket].count) big_bucket = i;
  }

  /* If we have too many buckets, merge the smallest with an adjacent
@@ -174,13 +165,10 @@ static int merge_hist_buckets(struct hist_bucket *bucket,
     */
    big_bucket = small_bucket = 0;
    for (i = 0; i < buckets; i++) {
-      if (i > merge_bucket)
-        bucket[i] = bucket[i + 1];
+      if (i > merge_bucket) bucket[i] = bucket[i + 1];

-      if (bucket[i].count < bucket[small_bucket].count)
-        small_bucket = i;
-      if (bucket[i].count > bucket[big_bucket].count)
-        big_bucket = i;
+      if (bucket[i].count < bucket[small_bucket].count) small_bucket = i;
+      if (bucket[i].count > bucket[big_bucket].count) big_bucket = i;
    }
  }

@@ -188,8 +176,8 @@ static int merge_hist_buckets(struct hist_bucket *bucket,
  return bucket[big_bucket].count;
 }

-static void show_histogram(const struct hist_bucket *bucket,
-                           int buckets, int total, int scale) {
+static void show_histogram(const struct hist_bucket *bucket, int buckets,
+                           int total, int scale) {
  const char *pat1, *pat2;
  int i;

@@ -232,8 +220,7 @@ static void show_histogram(const struct hist_bucket *bucket,

    pct = (float)(100.0 * bucket[i].count / total);
    len = HIST_BAR_MAX * bucket[i].count / scale;
-    if (len < 1)
-      len = 1;
+    if (len < 1) len = 1;
    assert(len <= HIST_BAR_MAX);

    if (bucket[i].low == bucket[i].high)
@@ -241,8 +228,7 @@ static void show_histogram(const struct hist_bucket *bucket,
    else
      fprintf(stderr, pat2, bucket[i].low, bucket[i].high);

-    for (j = 0; j < HIST_BAR_MAX; j++)
-      fprintf(stderr, j < len ? "=" : " ");
+    for (j = 0; j < HIST_BAR_MAX; j++) fprintf(stderr, j < len ? "=" : " ");
    fprintf(stderr, "\t%5d (%6.2f%%)\n", bucket[i].count, pct);
  }
 }
@@ -268,14 +254,13 @@ void show_q_histogram(const int counts[64], int max_buckets) {
  show_histogram(bucket, buckets, total, scale);
 }

-void show_rate_histogram(struct rate_hist *hist,
-                         const vpx_codec_enc_cfg_t *cfg, int max_buckets) {
+void show_rate_histogram(struct rate_hist *hist, const vpx_codec_enc_cfg_t *cfg,
+                         int max_buckets) {
  int i, scale;
  int buckets = 0;

  for (i = 0; i < RATE_BINS; i++) {
-    if (hist->bucket[i].low == INT_MAX)
-      continue;
+    if (hist->bucket[i].low == INT_MAX) continue;
    hist->bucket[buckets++] = hist->bucket[i];
  }

--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -23,15 +23,19 @@ class ACMRandom {

  explicit ACMRandom(int seed) : random_(seed) {}

-  void Reset(int seed) {
-    random_.Reseed(seed);
-  }
+  void Reset(int seed) { random_.Reseed(seed); }
  uint16_t Rand16(void) {
    const uint32_t value =
        random_.Generate(testing::internal::Random::kMaxRange);
    return (value >> 15) & 0xffff;
  }

+  int16_t Rand9Signed(void) {
+    // Use 9 bits: values between 255 (0x0FF) and -256 (0x100).
+    const uint32_t value = random_.Generate(512);
+    return static_cast<int16_t>(value) - 256;
+  }
+
  uint8_t Rand8(void) {
    const uint32_t value =
        random_.Generate(testing::internal::Random::kMaxRange);
@@ -46,17 +50,11 @@ class ACMRandom {
    return r < 128 ? r << 4 : r >> 4;
  }

-  int PseudoUniform(int range) {
-    return random_.Generate(range);
-  }
+  int PseudoUniform(int range) { return random_.Generate(range); }

-  int operator()(int n) {
-    return PseudoUniform(n);
-  }
+  int operator()(int n) { return PseudoUniform(n); }

-  static int DeterministicSeed(void) {
-    return 0xbaba;
-  }
+  static int DeterministicSeed(void) { return 0xbaba; }

 private:
  testing::internal::Random random_;
--- a/test/active_map_refresh_test.cc
+++ b/test/active_map_refresh_test.cc
@@ -0,0 +1,128 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <algorithm>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+
+// Check if any pixel in a 16x16 macroblock varies between frames.
+int CheckMb(const vpx_image_t &current, const vpx_image_t &previous, int mb_r,
+            int mb_c) {
+  for (int plane = 0; plane < 3; plane++) {
+    int r = 16 * mb_r;
+    int c0 = 16 * mb_c;
+    int r_top = std::min(r + 16, static_cast<int>(current.d_h));
+    int c_top = std::min(c0 + 16, static_cast<int>(current.d_w));
+    r = std::max(r, 0);
+    c0 = std::max(c0, 0);
+    if (plane > 0 && current.x_chroma_shift) {
+      c_top = (c_top + 1) >> 1;
+      c0 >>= 1;
+    }
+    if (plane > 0 && current.y_chroma_shift) {
+      r_top = (r_top + 1) >> 1;
+      r >>= 1;
+    }
+    for (; r < r_top; ++r) {
+      for (int c = c0; c < c_top; ++c) {
+        if (current.planes[plane][current.stride[plane] * r + c] !=
+            previous.planes[plane][previous.stride[plane] * r + c]) {
+          return 1;
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+void GenerateMap(int mb_rows, int mb_cols, const vpx_image_t &current,
+                 const vpx_image_t &previous, uint8_t *map) {
+  for (int mb_r = 0; mb_r < mb_rows; ++mb_r) {
+    for (int mb_c = 0; mb_c < mb_cols; ++mb_c) {
+      map[mb_r * mb_cols + mb_c] = CheckMb(current, previous, mb_r, mb_c);
+    }
+  }
+}
+
+const int kAqModeCyclicRefresh = 3;
+
+class ActiveMapRefreshTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+  ActiveMapRefreshTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~ActiveMapRefreshTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    cpu_used_ = GET_PARAM(2);
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    ::libvpx_test::Y4mVideoSource *y4m_video =
+        static_cast<libvpx_test::Y4mVideoSource *>(video);
+    if (video->frame() == 1) {
+      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
+      encoder->Control(VP9E_SET_AQ_MODE, kAqModeCyclicRefresh);
+    } else if (video->frame() >= 2 && video->img()) {
+      vpx_image_t *current = video->img();
+      vpx_image_t *previous = y4m_holder_->img();
+      ASSERT_TRUE(previous != NULL);
+      vpx_active_map_t map = vpx_active_map_t();
+      const int width = static_cast<int>(current->d_w);
+      const int height = static_cast<int>(current->d_h);
+      const int mb_width = (width + 15) / 16;
+      const int mb_height = (height + 15) / 16;
+      uint8_t *active_map = new uint8_t[mb_width * mb_height];
+      GenerateMap(mb_height, mb_width, *current, *previous, active_map);
+      map.cols = mb_width;
+      map.rows = mb_height;
+      map.active_map = active_map;
+      encoder->Control(VP8E_SET_ACTIVEMAP, &map);
+      delete[] active_map;
+    }
+    if (video->img()) {
+      y4m_video->SwapBuffers(y4m_holder_);
+    }
+  }
+
+  int cpu_used_;
+  ::libvpx_test::Y4mVideoSource *y4m_holder_;
+};
+
+TEST_P(ActiveMapRefreshTest, Test) {
+  cfg_.g_lag_in_frames = 0;
+  cfg_.g_profile = 1;
+  cfg_.rc_target_bitrate = 600;
+  cfg_.rc_resize_allowed = 0;
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 30;
+  cfg_.g_pass = VPX_RC_ONE_PASS;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.kf_max_dist = 90000;
+
+  ::libvpx_test::Y4mVideoSource video("desktop_credits.y4m", 0, 30);
+  ::libvpx_test::Y4mVideoSource video_holder("desktop_credits.y4m", 0, 30);
+  video_holder.Begin();
+  y4m_holder_ = &video_holder;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+VP9_INSTANTIATE_TEST_CASE(ActiveMapRefreshTest,
+                          ::testing::Values(::libvpx_test::kRealTime),
+                          ::testing::Range(5, 6));
+}  // namespace
--- a/test/active_map_test.cc
+++ b/test/active_map_test.cc
@@ -39,6 +39,7 @@ class ActiveMapTest
      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
    } else if (video->frame() == 3) {
      vpx_active_map_t map = vpx_active_map_t();
+      /* clang-format off */
      uint8_t active_map[9 * 13] = {
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
@@ -50,6 +51,7 @@ class ActiveMapTest
        0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0,
      };
+      /* clang-format on */
      map.cols = (kWidth + 15) / 16;
      map.rows = (kHeight + 15) / 16;
      ASSERT_EQ(map.cols, 13u);
@@ -77,13 +79,13 @@ TEST_P(ActiveMapTest, Test) {
  cfg_.rc_end_usage = VPX_CBR;
  cfg_.kf_max_dist = 90000;

-  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", kWidth, kHeight, 30,
-                                       1, 0, 20);
+  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", kWidth, kHeight, 30, 1,
+                                       0, 20);

  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }

 VP9_INSTANTIATE_TEST_CASE(ActiveMapTest,
                          ::testing::Values(::libvpx_test::kRealTime),
-                          ::testing::Range(0, 6));
+                          ::testing::Range(0, 9));
 }  // namespace
--- a/test/add_noise_test.cc
+++ b/test/add_noise_test.cc
@@ -0,0 +1,136 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <math.h>
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/postproc.h"
+#include "vpx_mem/vpx_mem.h"
+
+namespace {
+
+static const int kNoiseSize = 3072;
+
+// TODO(jimbankoski): make width and height integers not unsigned.
+typedef void (*AddNoiseFunc)(uint8_t *start, const int8_t *noise,
+                             int blackclamp, int whiteclamp, int width,
+                             int height, int pitch);
+
+class AddNoiseTest : public ::testing::TestWithParam<AddNoiseFunc> {
+ public:
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+  virtual ~AddNoiseTest() {}
+};
+
+double stddev6(char a, char b, char c, char d, char e, char f) {
+  const double n = (a + b + c + d + e + f) / 6.0;
+  const double v = ((a - n) * (a - n) + (b - n) * (b - n) + (c - n) * (c - n) +
+                    (d - n) * (d - n) + (e - n) * (e - n) + (f - n) * (f - n)) /
+                   6.0;
+  return sqrt(v);
+}
+
+TEST_P(AddNoiseTest, CheckNoiseAdded) {
+  const int width = 64;
+  const int height = 64;
+  const int image_size = width * height;
+  int8_t noise[kNoiseSize];
+  const int clamp = vpx_setup_noise(4.4, noise, kNoiseSize);
+  uint8_t *const s =
+      reinterpret_cast<uint8_t *>(vpx_calloc(image_size, sizeof(*s)));
+  ASSERT_TRUE(s != NULL);
+  memset(s, 99, image_size * sizeof(*s));
+
+  ASM_REGISTER_STATE_CHECK(
+      GetParam()(s, noise, clamp, clamp, width, height, width));
+
+  // Check to make sure we don't end up having either the same or no added
+  // noise either vertically or horizontally.
+  for (int i = 0; i < image_size - 6 * width - 6; ++i) {
+    const double hd = stddev6(s[i] - 99, s[i + 1] - 99, s[i + 2] - 99,
+                              s[i + 3] - 99, s[i + 4] - 99, s[i + 5] - 99);
+    const double vd = stddev6(s[i] - 99, s[i + width] - 99,
+                              s[i + 2 * width] - 99, s[i + 3 * width] - 99,
+                              s[i + 4 * width] - 99, s[i + 5 * width] - 99);
+
+    EXPECT_NE(hd, 0);
+    EXPECT_NE(vd, 0);
+  }
+
+  // Initialize pixels in the image to 255 and check for roll over.
+  memset(s, 255, image_size);
+
+  ASM_REGISTER_STATE_CHECK(
+      GetParam()(s, noise, clamp, clamp, width, height, width));
+
+  // Check to make sure don't roll over.
+  for (int i = 0; i < image_size; ++i) {
+    EXPECT_GT(static_cast<int>(s[i]), clamp) << "i = " << i;
+  }
+
+  // Initialize pixels in the image to 0 and check for roll under.
+  memset(s, 0, image_size);
+
+  ASM_REGISTER_STATE_CHECK(
+      GetParam()(s, noise, clamp, clamp, width, height, width));
+
+  // Check to make sure don't roll under.
+  for (int i = 0; i < image_size; ++i) {
+    EXPECT_LT(static_cast<int>(s[i]), 255 - clamp) << "i = " << i;
+  }
+
+  vpx_free(s);
+}
+
+TEST_P(AddNoiseTest, CheckCvsAssembly) {
+  const int width = 64;
+  const int height = 64;
+  const int image_size = width * height;
+  int8_t noise[kNoiseSize];
+  const int clamp = vpx_setup_noise(4.4, noise, kNoiseSize);
+
+  uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
+  uint8_t *const d = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
+  ASSERT_TRUE(s != NULL);
+  ASSERT_TRUE(d != NULL);
+
+  memset(s, 99, image_size);
+  memset(d, 99, image_size);
+
+  srand(0);
+  ASM_REGISTER_STATE_CHECK(
+      GetParam()(s, noise, clamp, clamp, width, height, width));
+  srand(0);
+  ASM_REGISTER_STATE_CHECK(
+      vpx_plane_add_noise_c(d, noise, clamp, clamp, width, height, width));
+
+  for (int i = 0; i < image_size; ++i) {
+    EXPECT_EQ(static_cast<int>(s[i]), static_cast<int>(d[i])) << "i = " << i;
+  }
+
+  vpx_free(d);
+  vpx_free(s);
+}
+
+INSTANTIATE_TEST_CASE_P(C, AddNoiseTest,
+                        ::testing::Values(vpx_plane_add_noise_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, AddNoiseTest,
+                        ::testing::Values(vpx_plane_add_noise_sse2));
+#endif
+
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, AddNoiseTest,
+                        ::testing::Values(vpx_plane_add_noise_msa));
+#endif
+}  // namespace
--- a/test/alt_ref_aq_segment_test.cc
+++ b/test/alt_ref_aq_segment_test.cc
@@ -0,0 +1,157 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class AltRefAqSegmentTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+  AltRefAqSegmentTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~AltRefAqSegmentTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
+    aq_mode_ = 0;
+    alt_ref_aq_mode_ = 0;
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(VP9E_SET_ALT_REF_AQ, alt_ref_aq_mode_);
+      encoder->Control(VP9E_SET_AQ_MODE, aq_mode_);
+      encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 100);
+    }
+  }
+
+  int set_cpu_used_;
+  int aq_mode_;
+  int alt_ref_aq_mode_;
+};
+
+// Validate that this ALT_REF_AQ/AQ segmentation mode
+// (ALT_REF_AQ=0, AQ=0/no_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ0) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_VBR;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 0;
+  alt_ref_aq_mode_ = 1;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+// Validate that this ALT_REF_AQ/AQ segmentation mode
+// (ALT_REF_AQ=0, AQ=1/variance_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ1) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_VBR;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 1;
+  alt_ref_aq_mode_ = 1;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+// Validate that this ALT_REF_AQ/AQ segmentation mode
+// (ALT_REF_AQ=0, AQ=2/complexity_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ2) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_VBR;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 2;
+  alt_ref_aq_mode_ = 1;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+// Validate that this ALT_REF_AQ/AQ segmentation mode
+// (ALT_REF_AQ=0, AQ=3/cyclicrefresh_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ3) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_VBR;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 3;
+  alt_ref_aq_mode_ = 1;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+// Validate that this ALT_REF_AQ/AQ segmentation mode
+// (ALT_REF_AQ=0, AQ=4/equator360_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ4) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_VBR;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 4;
+  alt_ref_aq_mode_ = 1;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+VP9_INSTANTIATE_TEST_CASE(AltRefAqSegmentTest,
+                          ::testing::Values(::libvpx_test::kOnePassGood,
+                                            ::libvpx_test::kTwoPassGood),
+                          ::testing::Range(2, 5));
+}  // namespace
--- a/test/altref_test.cc
+++ b/test/altref_test.cc
@@ -14,12 +14,14 @@
 #include "test/util.h"
 namespace {

+#if CONFIG_VP8_ENCODER
+
 // lookahead range: [kLookAheadMin, kLookAheadMax).
 const int kLookAheadMin = 5;
 const int kLookAheadMax = 26;

 class AltRefTest : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<int> {
+                   public ::libvpx_test::CodecTestWithParam<int> {
 protected:
  AltRefTest() : EncoderTest(GET_PARAM(0)), altref_count_(0) {}
  virtual ~AltRefTest() {}
@@ -29,9 +31,7 @@ class AltRefTest : public ::libvpx_test::EncoderTest,
    SetMode(libvpx_test::kTwoPassGood);
  }

-  virtual void BeginPassHook(unsigned int pass) {
-    altref_count_ = 0;
-  }
+  virtual void BeginPassHook(unsigned int /*pass*/) { altref_count_ = 0; }

  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
                                  libvpx_test::Encoder *encoder) {
@@ -63,7 +63,90 @@ TEST_P(AltRefTest, MonotonicTimestamps) {
  EXPECT_GE(altref_count(), 1);
 }

-
 VP8_INSTANTIATE_TEST_CASE(AltRefTest,
                          ::testing::Range(kLookAheadMin, kLookAheadMax));
+
+#endif  // CONFIG_VP8_ENCODER
+
+class AltRefForcedKeyTestLarge
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+  AltRefForcedKeyTestLarge()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        cpu_used_(GET_PARAM(2)), forced_kf_frame_num_(1), frame_num_(0) {}
+  virtual ~AltRefForcedKeyTestLarge() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+    cfg_.rc_end_usage = VPX_VBR;
+    cfg_.g_threads = 0;
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
+      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+#if CONFIG_VP9_ENCODER
+      // override test default for tile columns if necessary.
+      if (GET_PARAM(0) == &libvpx_test::kVP9) {
+        encoder->Control(VP9E_SET_TILE_COLUMNS, 6);
+      }
+#endif
+    }
+    frame_flags_ =
+        (video->frame() == forced_kf_frame_num_) ? VPX_EFLAG_FORCE_KF : 0;
+  }
+
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    if (frame_num_ == forced_kf_frame_num_) {
+      ASSERT_TRUE(!!(pkt->data.frame.flags & VPX_FRAME_IS_KEY))
+          << "Frame #" << frame_num_ << " isn't a keyframe!";
+    }
+    ++frame_num_;
+  }
+
+  ::libvpx_test::TestMode encoding_mode_;
+  int cpu_used_;
+  unsigned int forced_kf_frame_num_;
+  unsigned int frame_num_;
+};
+
+TEST_P(AltRefForcedKeyTestLarge, Frame1IsKey) {
+  const vpx_rational timebase = { 1, 30 };
+  const int lag_values[] = { 3, 15, 25, -1 };
+
+  forced_kf_frame_num_ = 1;
+  for (int i = 0; lag_values[i] != -1; ++i) {
+    frame_num_ = 0;
+    cfg_.g_lag_in_frames = lag_values[i];
+    libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       timebase.den, timebase.num, 0, 30);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+}
+
+TEST_P(AltRefForcedKeyTestLarge, ForcedFrameIsKey) {
+  const vpx_rational timebase = { 1, 30 };
+  const int lag_values[] = { 3, 15, 25, -1 };
+
+  for (int i = 0; lag_values[i] != -1; ++i) {
+    frame_num_ = 0;
+    forced_kf_frame_num_ = lag_values[i] - 1;
+    cfg_.g_lag_in_frames = lag_values[i];
+    libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       timebase.den, timebase.num, 0, 30);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+}
+
+VP8_INSTANTIATE_TEST_CASE(AltRefForcedKeyTestLarge,
+                          ::testing::Values(::libvpx_test::kOnePassGood),
+                          ::testing::Range(0, 9));
+
+VP9_INSTANTIATE_TEST_CASE(AltRefForcedKeyTestLarge,
+                          ::testing::Values(::libvpx_test::kOnePassGood),
+                          ::testing::Range(0, 9));
 }  // namespace
--- a/test/aq_segment_test.cc
+++ b/test/aq_segment_test.cc
@@ -57,7 +57,7 @@ TEST_P(AqSegmentTest, TestNoMisMatchAQ1) {
  aq_mode_ = 1;

  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                        30, 1, 0, 100);
+                                       30, 1, 0, 100);

  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
@@ -77,7 +77,7 @@ TEST_P(AqSegmentTest, TestNoMisMatchAQ2) {
  aq_mode_ = 2;

  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                        30, 1, 0, 100);
+                                       30, 1, 0, 100);

  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
@@ -97,7 +97,7 @@ TEST_P(AqSegmentTest, TestNoMisMatchAQ3) {
  aq_mode_ = 3;

  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                        30, 1, 0, 100);
+                                       30, 1, 0, 100);

  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -0,0 +1,397 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "vpx_mem/vpx_mem.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+class AverageTestBase : public ::testing::Test {
+ public:
+  AverageTestBase(int width, int height) : width_(width), height_(height) {}
+
+  static void SetUpTestCase() {
+    source_data_ = reinterpret_cast<uint8_t *>(
+        vpx_memalign(kDataAlignment, kDataBlockSize));
+  }
+
+  static void TearDownTestCase() {
+    vpx_free(source_data_);
+    source_data_ = NULL;
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  // Handle blocks up to 4 blocks 64x64 with stride up to 128
+  static const int kDataAlignment = 16;
+  static const int kDataBlockSize = 64 * 128;
+
+  virtual void SetUp() {
+    source_stride_ = (width_ + 31) & ~31;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  // Sum Pixels
+  static unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) {
+    unsigned int average = 0;
+    for (int h = 0; h < 8; ++h) {
+      for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
+    }
+    return ((average + 32) >> 6);
+  }
+
+  static unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) {
+    unsigned int average = 0;
+    for (int h = 0; h < 4; ++h) {
+      for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
+    }
+    return ((average + 8) >> 4);
+  }
+
+  void FillConstant(uint8_t fill_constant) {
+    for (int i = 0; i < width_ * height_; ++i) {
+      source_data_[i] = fill_constant;
+    }
+  }
+
+  void FillRandom() {
+    for (int i = 0; i < width_ * height_; ++i) {
+      source_data_[i] = rnd_.Rand8();
+    }
+  }
+
+  int width_, height_;
+  static uint8_t *source_data_;
+  int source_stride_;
+
+  ACMRandom rnd_;
+};
+typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch);
+
+typedef std::tr1::tuple<int, int, int, int, AverageFunction> AvgFunc;
+
+class AverageTest : public AverageTestBase,
+                    public ::testing::WithParamInterface<AvgFunc> {
+ public:
+  AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+
+ protected:
+  void CheckAverages() {
+    const int block_size = GET_PARAM(3);
+    unsigned int expected = 0;
+    if (block_size == 8) {
+      expected =
+          ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_);
+    } else if (block_size == 4) {
+      expected =
+          ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_);
+    }
+
+    ASM_REGISTER_STATE_CHECK(
+        GET_PARAM(4)(source_data_ + GET_PARAM(2), source_stride_));
+    unsigned int actual =
+        GET_PARAM(4)(source_data_ + GET_PARAM(2), source_stride_);
+
+    EXPECT_EQ(expected, actual);
+  }
+};
+
+typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref,
+                              const int ref_stride, const int height);
+
+typedef std::tr1::tuple<int, IntProRowFunc, IntProRowFunc> IntProRowParam;
+
+class IntProRowTest : public AverageTestBase,
+                      public ::testing::WithParamInterface<IntProRowParam> {
+ public:
+  IntProRowTest()
+      : AverageTestBase(16, GET_PARAM(0)), hbuf_asm_(NULL), hbuf_c_(NULL) {
+    asm_func_ = GET_PARAM(1);
+    c_func_ = GET_PARAM(2);
+  }
+
+ protected:
+  virtual void SetUp() {
+    hbuf_asm_ = reinterpret_cast<int16_t *>(
+        vpx_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16));
+    hbuf_c_ = reinterpret_cast<int16_t *>(
+        vpx_memalign(kDataAlignment, sizeof(*hbuf_c_) * 16));
+  }
+
+  virtual void TearDown() {
+    vpx_free(hbuf_c_);
+    hbuf_c_ = NULL;
+    vpx_free(hbuf_asm_);
+    hbuf_asm_ = NULL;
+  }
+
+  void RunComparison() {
+    ASM_REGISTER_STATE_CHECK(c_func_(hbuf_c_, source_data_, 0, height_));
+    ASM_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, 0, height_));
+    EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * 16))
+        << "Output mismatch";
+  }
+
+ private:
+  IntProRowFunc asm_func_;
+  IntProRowFunc c_func_;
+  int16_t *hbuf_asm_;
+  int16_t *hbuf_c_;
+};
+
+typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width);
+
+typedef std::tr1::tuple<int, IntProColFunc, IntProColFunc> IntProColParam;
+
+class IntProColTest : public AverageTestBase,
+                      public ::testing::WithParamInterface<IntProColParam> {
+ public:
+  IntProColTest() : AverageTestBase(GET_PARAM(0), 1), sum_asm_(0), sum_c_(0) {
+    asm_func_ = GET_PARAM(1);
+    c_func_ = GET_PARAM(2);
+  }
+
+ protected:
+  void RunComparison() {
+    ASM_REGISTER_STATE_CHECK(sum_c_ = c_func_(source_data_, width_));
+    ASM_REGISTER_STATE_CHECK(sum_asm_ = asm_func_(source_data_, width_));
+    EXPECT_EQ(sum_c_, sum_asm_) << "Output mismatch";
+  }
+
+ private:
+  IntProColFunc asm_func_;
+  IntProColFunc c_func_;
+  int16_t sum_asm_;
+  int16_t sum_c_;
+};
+
+typedef int (*SatdFunc)(const int16_t *coeffs, int length);
+typedef std::tr1::tuple<int, SatdFunc> SatdTestParam;
+
+class SatdTest : public ::testing::Test,
+                 public ::testing::WithParamInterface<SatdTestParam> {
+ protected:
+  virtual void SetUp() {
+    satd_size_ = GET_PARAM(0);
+    satd_func_ = GET_PARAM(1);
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<int16_t *>(
+        vpx_memalign(16, sizeof(*src_) * satd_size_));
+    ASSERT_TRUE(src_ != NULL);
+  }
+
+  virtual void TearDown() {
+    libvpx_test::ClearSystemState();
+    vpx_free(src_);
+  }
+
+  void FillConstant(const int16_t val) {
+    for (int i = 0; i < satd_size_; ++i) src_[i] = val;
+  }
+
+  void FillRandom() {
+    for (int i = 0; i < satd_size_; ++i) src_[i] = rnd_.Rand16();
+  }
+
+  void Check(const int expected) {
+    int total;
+    ASM_REGISTER_STATE_CHECK(total = satd_func_(src_, satd_size_));
+    EXPECT_EQ(expected, total);
+  }
+
+  int satd_size_;
+
+ private:
+  int16_t *src_;
+  SatdFunc satd_func_;
+  ACMRandom rnd_;
+};
+
+uint8_t *AverageTestBase::source_data_ = NULL;
+
+TEST_P(AverageTest, MinValue) {
+  FillConstant(0);
+  CheckAverages();
+}
+
+TEST_P(AverageTest, MaxValue) {
+  FillConstant(255);
+  CheckAverages();
+}
+
+TEST_P(AverageTest, Random) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  for (int i = 0; i < 1000; i++) {
+    FillRandom();
+    CheckAverages();
+  }
+}
+
+TEST_P(IntProRowTest, MinValue) {
+  FillConstant(0);
+  RunComparison();
+}
+
+TEST_P(IntProRowTest, MaxValue) {
+  FillConstant(255);
+  RunComparison();
+}
+
+TEST_P(IntProRowTest, Random) {
+  FillRandom();
+  RunComparison();
+}
+
+TEST_P(IntProColTest, MinValue) {
+  FillConstant(0);
+  RunComparison();
+}
+
+TEST_P(IntProColTest, MaxValue) {
+  FillConstant(255);
+  RunComparison();
+}
+
+TEST_P(IntProColTest, Random) {
+  FillRandom();
+  RunComparison();
+}
+
+TEST_P(SatdTest, MinValue) {
+  const int kMin = -32640;
+  const int expected = -kMin * satd_size_;
+  FillConstant(kMin);
+  Check(expected);
+}
+
+TEST_P(SatdTest, MaxValue) {
+  const int kMax = 32640;
+  const int expected = kMax * satd_size_;
+  FillConstant(kMax);
+  Check(expected);
+}
+
+TEST_P(SatdTest, Random) {
+  int expected;
+  switch (satd_size_) {
+    case 16: expected = 205298; break;
+    case 64: expected = 1113950; break;
+    case 256: expected = 4268415; break;
+    case 1024: expected = 16954082; break;
+    default:
+      FAIL() << "Invalid satd size (" << satd_size_
+             << ") valid: 16/64/256/1024";
+  }
+  FillRandom();
+  Check(expected);
+}
+
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+    C, AverageTest,
+    ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_avg_8x8_c),
+                      make_tuple(16, 16, 1, 4, &vpx_avg_4x4_c)));
+
+INSTANTIATE_TEST_CASE_P(C, SatdTest,
+                        ::testing::Values(make_tuple(16, &vpx_satd_c),
+                                          make_tuple(64, &vpx_satd_c),
+                                          make_tuple(256, &vpx_satd_c),
+                                          make_tuple(1024, &vpx_satd_c)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AverageTest,
+    ::testing::Values(make_tuple(16, 16, 0, 8, &vpx_avg_8x8_sse2),
+                      make_tuple(16, 16, 5, 8, &vpx_avg_8x8_sse2),
+                      make_tuple(32, 32, 15, 8, &vpx_avg_8x8_sse2),
+                      make_tuple(16, 16, 0, 4, &vpx_avg_4x4_sse2),
+                      make_tuple(16, 16, 5, 4, &vpx_avg_4x4_sse2),
+                      make_tuple(32, 32, 15, 4, &vpx_avg_4x4_sse2)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, IntProRowTest,
+    ::testing::Values(make_tuple(16, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c),
+                      make_tuple(32, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c),
+                      make_tuple(64, &vpx_int_pro_row_sse2,
+                                 &vpx_int_pro_row_c)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, IntProColTest,
+    ::testing::Values(make_tuple(16, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c),
+                      make_tuple(32, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c),
+                      make_tuple(64, &vpx_int_pro_col_sse2,
+                                 &vpx_int_pro_col_c)));
+
+INSTANTIATE_TEST_CASE_P(SSE2, SatdTest,
+                        ::testing::Values(make_tuple(16, &vpx_satd_sse2),
+                                          make_tuple(64, &vpx_satd_sse2),
+                                          make_tuple(256, &vpx_satd_sse2),
+                                          make_tuple(1024, &vpx_satd_sse2)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, AverageTest,
+    ::testing::Values(make_tuple(16, 16, 0, 8, &vpx_avg_8x8_neon),
+                      make_tuple(16, 16, 5, 8, &vpx_avg_8x8_neon),
+                      make_tuple(32, 32, 15, 8, &vpx_avg_8x8_neon),
+                      make_tuple(16, 16, 0, 4, &vpx_avg_4x4_neon),
+                      make_tuple(16, 16, 5, 4, &vpx_avg_4x4_neon),
+                      make_tuple(32, 32, 15, 4, &vpx_avg_4x4_neon)));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON, IntProRowTest,
+    ::testing::Values(make_tuple(16, &vpx_int_pro_row_neon, &vpx_int_pro_row_c),
+                      make_tuple(32, &vpx_int_pro_row_neon, &vpx_int_pro_row_c),
+                      make_tuple(64, &vpx_int_pro_row_neon,
+                                 &vpx_int_pro_row_c)));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON, IntProColTest,
+    ::testing::Values(make_tuple(16, &vpx_int_pro_col_neon, &vpx_int_pro_col_c),
+                      make_tuple(32, &vpx_int_pro_col_neon, &vpx_int_pro_col_c),
+                      make_tuple(64, &vpx_int_pro_col_neon,
+                                 &vpx_int_pro_col_c)));
+
+INSTANTIATE_TEST_CASE_P(NEON, SatdTest,
+                        ::testing::Values(make_tuple(16, &vpx_satd_neon),
+                                          make_tuple(64, &vpx_satd_neon),
+                                          make_tuple(256, &vpx_satd_neon),
+                                          make_tuple(1024, &vpx_satd_neon)));
+#endif
+
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, AverageTest,
+    ::testing::Values(make_tuple(16, 16, 0, 8, &vpx_avg_8x8_msa),
+                      make_tuple(16, 16, 5, 8, &vpx_avg_8x8_msa),
+                      make_tuple(32, 32, 15, 8, &vpx_avg_8x8_msa),
+                      make_tuple(16, 16, 0, 4, &vpx_avg_4x4_msa),
+                      make_tuple(16, 16, 5, 4, &vpx_avg_4x4_msa),
+                      make_tuple(32, 32, 15, 4, &vpx_avg_4x4_msa)));
+#endif
+
+}  // namespace
--- a/test/blockiness_test.cc
+++ b/test/blockiness_test.cc
@@ -8,10 +8,11 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-
-#include <string.h>
 #include <limits.h>
 #include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vpx_config.h"
 #if CONFIG_VP9_ENCODER
@@ -22,15 +23,12 @@
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"

 #include "vpx_mem/vpx_mem.h"

-
-extern "C"
-double vp9_get_blockiness(const unsigned char *img1, int img1_pitch,
-                          const unsigned char *img2, int img2_pitch,
-                          int width, int height);
+extern "C" double vp9_get_blockiness(const unsigned char *img1, int img1_pitch,
+                                     const unsigned char *img2, int img2_pitch,
+                                     int width, int height);

 using libvpx_test::ACMRandom;

@@ -40,9 +38,9 @@ class BlockinessTestBase : public ::testing::Test {
  BlockinessTestBase(int width, int height) : width_(width), height_(height) {}

  static void SetUpTestCase() {
-    source_data_ = reinterpret_cast<uint8_t*>(
+    source_data_ = reinterpret_cast<uint8_t *>(
        vpx_memalign(kDataAlignment, kDataBufferSize));
-    reference_data_ = reinterpret_cast<uint8_t*>(
+    reference_data_ = reinterpret_cast<uint8_t *>(
        vpx_memalign(kDataAlignment, kDataBufferSize));
  }

@@ -53,14 +51,12 @@ class BlockinessTestBase : public ::testing::Test {
    reference_data_ = NULL;
  }

-  virtual void TearDown() {
-    libvpx_test::ClearSystemState();
-  }
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
  // Handle frames up to 640x480
  static const int kDataAlignment = 16;
-  static const int kDataBufferSize = 640*480;
+  static const int kDataBufferSize = 640 * 480;

  virtual void SetUp() {
    source_stride_ = (width_ + 31) & ~31;
@@ -68,8 +64,8 @@ class BlockinessTestBase : public ::testing::Test {
    rnd_.Reset(ACMRandom::DeterministicSeed());
  }

-  void FillConstant(uint8_t *data, int stride, uint8_t fill_constant,
-                    int width, int height) {
+  void FillConstant(uint8_t *data, int stride, uint8_t fill_constant, int width,
+                    int height) {
    for (int h = 0; h < height; ++h) {
      for (int w = 0; w < width; ++w) {
        data[h * stride + w] = fill_constant;
@@ -104,10 +100,11 @@ class BlockinessTestBase : public ::testing::Test {
  void FillCheckerboard(uint8_t *data, int stride) {
    for (int h = 0; h < height_; h += 4) {
      for (int w = 0; w < width_; w += 4) {
-        if (((h/4) ^ (w/4)) & 1)
+        if (((h / 4) ^ (w / 4)) & 1) {
          FillConstant(data + h * stride + w, stride, 255, 4, 4);
-        else
+        } else {
          FillConstant(data + h * stride + w, stride, 0, 4, 4);
+        }
      }
    }
  }
@@ -135,9 +132,9 @@ class BlockinessTestBase : public ::testing::Test {
    }
  }
  int width_, height_;
-  static uint8_t* source_data_;
+  static uint8_t *source_data_;
  int source_stride_;
-  static uint8_t* reference_data_;
+  static uint8_t *reference_data_;
  int reference_stride_;

  ACMRandom rnd_;
@@ -152,32 +149,32 @@ class BlockinessVP9Test
  BlockinessVP9Test() : BlockinessTestBase(GET_PARAM(0), GET_PARAM(1)) {}

 protected:
-  int CheckBlockiness() {
-    return vp9_get_blockiness(source_data_, source_stride_,
-                              reference_data_, reference_stride_,
-                              width_, height_);
+  double GetBlockiness() const {
+    return vp9_get_blockiness(source_data_, source_stride_, reference_data_,
+                              reference_stride_, width_, height_);
  }
 };
 #endif  // CONFIG_VP9_ENCODER

-uint8_t* BlockinessTestBase::source_data_ = NULL;
-uint8_t* BlockinessTestBase::reference_data_ = NULL;
+uint8_t *BlockinessTestBase::source_data_ = NULL;
+uint8_t *BlockinessTestBase::reference_data_ = NULL;

 #if CONFIG_VP9_ENCODER
 TEST_P(BlockinessVP9Test, SourceBlockierThanReference) {
  // Source is blockier than reference.
  FillRandomBlocky(source_data_, source_stride_);
  FillConstant(reference_data_, reference_stride_, 128);
-  int super_blocky = CheckBlockiness();
+  const double super_blocky = GetBlockiness();

-  EXPECT_EQ(0, super_blocky) << "Blocky source should produce 0 blockiness.";
+  EXPECT_DOUBLE_EQ(0.0, super_blocky)
+      << "Blocky source should produce 0 blockiness.";
 }

 TEST_P(BlockinessVP9Test, ReferenceBlockierThanSource) {
  // Source is blockier than reference.
  FillConstant(source_data_, source_stride_, 128);
  FillRandomBlocky(reference_data_, reference_stride_);
-  int super_blocky = CheckBlockiness();
+  const double super_blocky = GetBlockiness();

  EXPECT_GT(super_blocky, 0.0)
      << "Blocky reference should score high for blockiness.";
@@ -187,10 +184,10 @@ TEST_P(BlockinessVP9Test, BlurringDecreasesBlockiness) {
  // Source is blockier than reference.
  FillConstant(source_data_, source_stride_, 128);
  FillRandomBlocky(reference_data_, reference_stride_);
-  int super_blocky = CheckBlockiness();
+  const double super_blocky = GetBlockiness();

  Blur(reference_data_, reference_stride_, 4);
-  int less_blocky = CheckBlockiness();
+  const double less_blocky = GetBlockiness();

  EXPECT_GT(super_blocky, less_blocky)
      << "A straight blur should decrease blockiness.";
@@ -201,17 +198,16 @@ TEST_P(BlockinessVP9Test, WorstCaseBlockiness) {
  FillConstant(source_data_, source_stride_, 128);
  FillCheckerboard(reference_data_, reference_stride_);

-  int super_blocky = CheckBlockiness();
+  const double super_blocky = GetBlockiness();

  Blur(reference_data_, reference_stride_, 4);
-  int less_blocky = CheckBlockiness();
+  const double less_blocky = GetBlockiness();

  EXPECT_GT(super_blocky, less_blocky)
      << "A straight blur should decrease blockiness.";
 }
 #endif  // CONFIG_VP9_ENCODER

-
 using std::tr1::make_tuple;

 //------------------------------------------------------------------------------
@@ -219,9 +215,7 @@ using std::tr1::make_tuple;

 #if CONFIG_VP9_ENCODER
 const BlockinessParam c_vp9_tests[] = {
-  make_tuple(320, 240),
-  make_tuple(318, 242),
-  make_tuple(318, 238),
+  make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238),
 };
 INSTANTIATE_TEST_CASE_P(C, BlockinessVP9Test, ::testing::ValuesIn(c_vp9_tests));
 #endif
--- a/test/borders_test.cc
+++ b/test/borders_test.cc
@@ -17,8 +17,9 @@

 namespace {

-class BordersTest : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+class BordersTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
 protected:
  BordersTest() : EncoderTest(GET_PARAM(0)) {}
  virtual ~BordersTest() {}
@@ -52,7 +53,7 @@ TEST_P(BordersTest, TestEncodeHighBitrate) {
  // extend into the border and test the border condition.
  cfg_.g_lag_in_frames = 25;
  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_minsection_pct = 2000;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
  cfg_.rc_target_bitrate = 2000;
  cfg_.rc_max_quantizer = 10;

@@ -78,6 +79,6 @@ TEST_P(BordersTest, TestLowBitrate) {
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }

-VP9_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values(
-    ::libvpx_test::kTwoPassGood));
+VP9_INSTANTIATE_TEST_CASE(BordersTest,
+                          ::testing::Values(::libvpx_test::kTwoPassGood));
 }  // namespace
--- a/test/byte_alignment_test.cc
+++ b/test/byte_alignment_test.cc
@@ -21,14 +21,14 @@

 namespace {

+#if CONFIG_WEBM_IO
+
 const int kLegacyByteAlignment = 0;
 const int kLegacyYPlaneByteAlignment = 32;
 const int kNumPlanesToCheck = 3;
 const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
 const char kVP9Md5File[] = "vp90-2-02-size-lf-1920x1080.webm.md5";

-#if CONFIG_WEBM_IO
-
 struct ByteAlignmentTestParam {
  int byte_alignment;
  vpx_codec_err_t expected_value;
@@ -36,29 +36,26 @@ struct ByteAlignmentTestParam {
 };

 const ByteAlignmentTestParam kBaTestParams[] = {
-  {kLegacyByteAlignment, VPX_CODEC_OK, true},
-  {32, VPX_CODEC_OK, true},
-  {64, VPX_CODEC_OK, true},
-  {128, VPX_CODEC_OK, true},
-  {256, VPX_CODEC_OK, true},
-  {512, VPX_CODEC_OK, true},
-  {1024, VPX_CODEC_OK, true},
-  {1, VPX_CODEC_INVALID_PARAM, false},
-  {-2, VPX_CODEC_INVALID_PARAM, false},
-  {4, VPX_CODEC_INVALID_PARAM, false},
-  {16, VPX_CODEC_INVALID_PARAM, false},
-  {255, VPX_CODEC_INVALID_PARAM, false},
-  {2048, VPX_CODEC_INVALID_PARAM, false},
+  { kLegacyByteAlignment, VPX_CODEC_OK, true },
+  { 32, VPX_CODEC_OK, true },
+  { 64, VPX_CODEC_OK, true },
+  { 128, VPX_CODEC_OK, true },
+  { 256, VPX_CODEC_OK, true },
+  { 512, VPX_CODEC_OK, true },
+  { 1024, VPX_CODEC_OK, true },
+  { 1, VPX_CODEC_INVALID_PARAM, false },
+  { -2, VPX_CODEC_INVALID_PARAM, false },
+  { 4, VPX_CODEC_INVALID_PARAM, false },
+  { 16, VPX_CODEC_INVALID_PARAM, false },
+  { 255, VPX_CODEC_INVALID_PARAM, false },
+  { 2048, VPX_CODEC_INVALID_PARAM, false },
 };

 // Class for testing byte alignment of reference buffers.
 class ByteAlignmentTest
    : public ::testing::TestWithParam<ByteAlignmentTestParam> {
 protected:
-  ByteAlignmentTest()
-      : video_(NULL),
-        decoder_(NULL),
-        md5_file_(NULL) {}
+  ByteAlignmentTest() : video_(NULL), decoder_(NULL), md5_file_(NULL) {}

  virtual void SetUp() {
    video_ = new libvpx_test::WebMVideoSource(kVP9TestFile);
@@ -74,8 +71,7 @@ class ByteAlignmentTest
  }

  virtual void TearDown() {
-    if (md5_file_ != NULL)
-      fclose(md5_file_);
+    if (md5_file_ != NULL) fclose(md5_file_);

    delete decoder_;
    delete video_;
@@ -89,8 +85,7 @@ class ByteAlignmentTest
    const vpx_codec_err_t res =
        decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
    CheckDecodedFrames(byte_alignment_to_check);
-    if (res == VPX_CODEC_OK)
-      video_->Next();
+    if (res == VPX_CODEC_OK) video_->Next();
    return res;
  }

@@ -98,8 +93,7 @@ class ByteAlignmentTest
    for (; video_->cxdata() != NULL; video_->Next()) {
      const vpx_codec_err_t res =
          decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
-      if (res != VPX_CODEC_OK)
-        return res;
+      if (res != VPX_CODEC_OK) return res;
      CheckDecodedFrames(byte_alignment_to_check);
    }
    return VPX_CODEC_OK;
@@ -135,7 +129,7 @@ class ByteAlignmentTest
  void OpenMd5File(const std::string &md5_file_name_) {
    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
    ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: "
-        << md5_file_name_;
+                                   << md5_file_name_;
  }

  void CheckMd5(const vpx_image_t &img) {
@@ -163,8 +157,8 @@ class ByteAlignmentTest

 TEST_F(ByteAlignmentTest, SwitchByteAlignment) {
  const int num_elements = 14;
-  const int byte_alignments[] = { 0, 32, 64, 128, 256, 512, 1024,
-                                  0, 1024, 32, 512, 64, 256, 128 };
+  const int byte_alignments[] = { 0, 32,   64, 128, 256, 512, 1024,
+                                  0, 1024, 32, 512, 64,  256, 128 };

  for (int i = 0; i < num_elements; ++i) {
    SetByteAlignment(byte_alignments[i], VPX_CODEC_OK);
--- a/test/clear_system_state.h
+++ b/test/clear_system_state.h
@@ -12,7 +12,7 @@

 #include "./vpx_config.h"
 #if ARCH_X86 || ARCH_X86_64
-# include "vpx_ports/x86.h"
+#include "vpx_ports/x86.h"
 #endif

 namespace libvpx_test {
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -32,15 +32,12 @@ class CodecFactory {

  virtual ~CodecFactory() {}

-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 unsigned long deadline) const = 0;
+  virtual Decoder *CreateDecoder(vpx_codec_dec_cfg_t cfg) const = 0;

-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 const vpx_codec_flags_t flags,
-                                 unsigned long deadline)  // NOLINT(runtime/int)
-                                 const = 0;
+  virtual Decoder *CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 const vpx_codec_flags_t flags) const = 0;

-  virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
+  virtual Encoder *CreateEncoder(vpx_codec_enc_cfg_t cfg,
                                 unsigned long deadline,
                                 const unsigned long init_flags,
                                 TwopassStatsStore *stats) const = 0;
@@ -53,19 +50,25 @@ class CodecFactory {
 * to avoid having to include a pointer to the CodecFactory in every test
 * definition.
 */
-template<class T1>
-class CodecTestWithParam : public ::testing::TestWithParam<
-    std::tr1::tuple< const libvpx_test::CodecFactory*, T1 > > {
-};
+template <class T1>
+class CodecTestWithParam
+    : public ::testing::TestWithParam<
+          std::tr1::tuple<const libvpx_test::CodecFactory *, T1> > {};

-template<class T1, class T2>
-class CodecTestWith2Params : public ::testing::TestWithParam<
-    std::tr1::tuple< const libvpx_test::CodecFactory*, T1, T2 > > {
-};
+template <class T1, class T2>
+class CodecTestWith2Params
+    : public ::testing::TestWithParam<
+          std::tr1::tuple<const libvpx_test::CodecFactory *, T1, T2> > {};

-template<class T1, class T2, class T3>
-class CodecTestWith3Params : public ::testing::TestWithParam<
-    std::tr1::tuple< const libvpx_test::CodecFactory*, T1, T2, T3 > > {
+template <class T1, class T2, class T3>
+class CodecTestWith3Params
+    : public ::testing::TestWithParam<
+          std::tr1::tuple<const libvpx_test::CodecFactory *, T1, T2, T3> > {};
+
+template <class T1, class T2, class T3, class T4>
+class CodecTestWith4Params
+    : public ::testing::TestWithParam<
+          std::tr1::tuple<const libvpx_test::CodecFactory *, T1, T2, T3, T4> > {
 };

 /*
@@ -74,15 +77,13 @@ class CodecTestWith3Params : public ::testing::TestWithParam<
 #if CONFIG_VP8
 class VP8Decoder : public Decoder {
 public:
-  VP8Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
-      : Decoder(cfg, deadline) {}
+  explicit VP8Decoder(vpx_codec_dec_cfg_t cfg) : Decoder(cfg) {}

-  VP8Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
-             unsigned long deadline)  // NOLINT
-      : Decoder(cfg, flag, deadline) {}
+  VP8Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag)
+      : Decoder(cfg, flag) {}

 protected:
-  virtual vpx_codec_iface_t* CodecInterface() const {
+  virtual vpx_codec_iface_t *CodecInterface() const {
 #if CONFIG_VP8_DECODER
    return &vpx_codec_vp8_dx_algo;
 #else
@@ -98,7 +99,7 @@ class VP8Encoder : public Encoder {
      : Encoder(cfg, deadline, init_flags, stats) {}

 protected:
-  virtual vpx_codec_iface_t* CodecInterface() const {
+  virtual vpx_codec_iface_t *CodecInterface() const {
 #if CONFIG_VP8_ENCODER
    return &vpx_codec_vp8_cx_algo;
 #else
@@ -111,28 +112,32 @@ class VP8CodecFactory : public CodecFactory {
 public:
  VP8CodecFactory() : CodecFactory() {}

-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 unsigned long deadline) const {
-    return CreateDecoder(cfg, 0, deadline);
+  virtual Decoder *CreateDecoder(vpx_codec_dec_cfg_t cfg) const {
+    return CreateDecoder(cfg, 0);
  }

-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 const vpx_codec_flags_t flags,
-                                 unsigned long deadline) const {  // NOLINT
+  virtual Decoder *CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 const vpx_codec_flags_t flags) const {
 #if CONFIG_VP8_DECODER
-    return new VP8Decoder(cfg, flags, deadline);
+    return new VP8Decoder(cfg, flags);
 #else
+    (void)cfg;
+    (void)flags;
    return NULL;
 #endif
  }

-  virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
+  virtual Encoder *CreateEncoder(vpx_codec_enc_cfg_t cfg,
                                 unsigned long deadline,
                                 const unsigned long init_flags,
                                 TwopassStatsStore *stats) const {
 #if CONFIG_VP8_ENCODER
    return new VP8Encoder(cfg, deadline, init_flags, stats);
 #else
+    (void)cfg;
+    (void)deadline;
+    (void)init_flags;
+    (void)stats;
    return NULL;
 #endif
  }
@@ -142,6 +147,8 @@ class VP8CodecFactory : public CodecFactory {
 #if CONFIG_VP8_ENCODER
    return vpx_codec_enc_config_default(&vpx_codec_vp8_cx_algo, cfg, usage);
 #else
+    (void)cfg;
+    (void)usage;
    return VPX_CODEC_INCAPABLE;
 #endif
  }
@@ -149,32 +156,30 @@ class VP8CodecFactory : public CodecFactory {

 const libvpx_test::VP8CodecFactory kVP8;

-#define VP8_INSTANTIATE_TEST_CASE(test, ...)\
-  INSTANTIATE_TEST_CASE_P(VP8, test, \
-      ::testing::Combine( \
-          ::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
-              &libvpx_test::kVP8)), \
+#define VP8_INSTANTIATE_TEST_CASE(test, ...)                                \
+  INSTANTIATE_TEST_CASE_P(                                                  \
+      VP8, test,                                                            \
+      ::testing::Combine(                                                   \
+          ::testing::Values(static_cast<const libvpx_test::CodecFactory *>( \
+              &libvpx_test::kVP8)),                                         \
          __VA_ARGS__))
 #else
 #define VP8_INSTANTIATE_TEST_CASE(test, ...)
 #endif  // CONFIG_VP8

-
 /*
 * VP9 Codec Definitions
 */
 #if CONFIG_VP9
 class VP9Decoder : public Decoder {
 public:
-  VP9Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
-      : Decoder(cfg, deadline) {}
+  explicit VP9Decoder(vpx_codec_dec_cfg_t cfg) : Decoder(cfg) {}

-  VP9Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
-             unsigned long deadline)  // NOLINT
-      : Decoder(cfg, flag, deadline) {}
+  VP9Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag)
+      : Decoder(cfg, flag) {}

 protected:
-  virtual vpx_codec_iface_t* CodecInterface() const {
+  virtual vpx_codec_iface_t *CodecInterface() const {
 #if CONFIG_VP9_DECODER
    return &vpx_codec_vp9_dx_algo;
 #else
@@ -190,7 +195,7 @@ class VP9Encoder : public Encoder {
      : Encoder(cfg, deadline, init_flags, stats) {}

 protected:
-  virtual vpx_codec_iface_t* CodecInterface() const {
+  virtual vpx_codec_iface_t *CodecInterface() const {
 #if CONFIG_VP9_ENCODER
    return &vpx_codec_vp9_cx_algo;
 #else
@@ -203,28 +208,32 @@ class VP9CodecFactory : public CodecFactory {
 public:
  VP9CodecFactory() : CodecFactory() {}

-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 unsigned long deadline) const {
-    return CreateDecoder(cfg, 0, deadline);
+  virtual Decoder *CreateDecoder(vpx_codec_dec_cfg_t cfg) const {
+    return CreateDecoder(cfg, 0);
  }

-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 const vpx_codec_flags_t flags,
-                                 unsigned long deadline) const {  // NOLINT
+  virtual Decoder *CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 const vpx_codec_flags_t flags) const {
 #if CONFIG_VP9_DECODER
-    return new VP9Decoder(cfg, flags, deadline);
+    return new VP9Decoder(cfg, flags);
 #else
+    (void)cfg;
+    (void)flags;
    return NULL;
 #endif
  }

-  virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
+  virtual Encoder *CreateEncoder(vpx_codec_enc_cfg_t cfg,
                                 unsigned long deadline,
                                 const unsigned long init_flags,
                                 TwopassStatsStore *stats) const {
 #if CONFIG_VP9_ENCODER
    return new VP9Encoder(cfg, deadline, init_flags, stats);
 #else
+    (void)cfg;
+    (void)deadline;
+    (void)init_flags;
+    (void)stats;
    return NULL;
 #endif
  }
@@ -234,6 +243,8 @@ class VP9CodecFactory : public CodecFactory {
 #if CONFIG_VP9_ENCODER
    return vpx_codec_enc_config_default(&vpx_codec_vp9_cx_algo, cfg, usage);
 #else
+    (void)cfg;
+    (void)usage;
    return VPX_CODEC_INCAPABLE;
 #endif
  }
@@ -241,17 +252,16 @@ class VP9CodecFactory : public CodecFactory {

 const libvpx_test::VP9CodecFactory kVP9;

-#define VP9_INSTANTIATE_TEST_CASE(test, ...)\
-  INSTANTIATE_TEST_CASE_P(VP9, test, \
-      ::testing::Combine( \
-          ::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
-               &libvpx_test::kVP9)), \
+#define VP9_INSTANTIATE_TEST_CASE(test, ...)                                \
+  INSTANTIATE_TEST_CASE_P(                                                  \
+      VP9, test,                                                            \
+      ::testing::Combine(                                                   \
+          ::testing::Values(static_cast<const libvpx_test::CodecFactory *>( \
+              &libvpx_test::kVP9)),                                         \
          __VA_ARGS__))
 #else
 #define VP9_INSTANTIATE_TEST_CASE(test, ...)
 #endif  // CONFIG_VP9

-
 }  // namespace libvpx_test
-
 #endif  // TEST_CODEC_FACTORY_H_
--- a/test/config_test.cc
+++ b/test/config_test.cc
@@ -15,11 +15,13 @@

 namespace {

-class ConfigTest : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+class ConfigTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
 protected:
-  ConfigTest() : EncoderTest(GET_PARAM(0)),
-                 frame_count_in_(0), frame_count_out_(0), frame_count_max_(0) {}
+  ConfigTest()
+      : EncoderTest(GET_PARAM(0)), frame_count_in_(0), frame_count_out_(0),
+        frame_count_max_(0) {}
  virtual ~ConfigTest() {}

  virtual void SetUp() {
@@ -32,12 +34,12 @@ class ConfigTest : public ::libvpx_test::EncoderTest,
    frame_count_out_ = 0;
  }

-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource* /*video*/) {
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource * /*video*/) {
    ++frame_count_in_;
    abort_ |= (frame_count_in_ >= frame_count_max_);
  }

-  virtual void FramePktHook(const vpx_codec_cx_pkt_t* /*pkt*/) {
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {
    ++frame_count_out_;
  }

--- a/test/consistency_test.cc
+++ b/test/consistency_test.cc
@@ -8,10 +8,11 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-
-#include <string.h>
 #include <limits.h>
 #include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vpx_config.h"
 #if CONFIG_VP9_ENCODER
@@ -22,16 +23,13 @@
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "vp9/encoder/vp9_ssim.h"
+#include "vpx_dsp/ssim.h"
 #include "vpx_mem/vpx_mem.h"

-extern "C"
-double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch,
-                            uint8_t *img2, int img2_pitch,
-                            int width, int height,
-                            Ssimv *sv2, Metrics *m,
-                            int do_inconsistency);
+extern "C" double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch,
+                                       uint8_t *img2, int img2_pitch, int width,
+                                       int height, Ssimv *sv2, Metrics *m,
+                                       int do_inconsistency);

 using libvpx_test::ACMRandom;

@@ -41,20 +39,18 @@ class ConsistencyTestBase : public ::testing::Test {
  ConsistencyTestBase(int width, int height) : width_(width), height_(height) {}

  static void SetUpTestCase() {
-    source_data_[0] = reinterpret_cast<uint8_t*>(
+    source_data_[0] = reinterpret_cast<uint8_t *>(
        vpx_memalign(kDataAlignment, kDataBufferSize));
-    reference_data_[0] = reinterpret_cast<uint8_t*>(
+    reference_data_[0] = reinterpret_cast<uint8_t *>(
        vpx_memalign(kDataAlignment, kDataBufferSize));
-    source_data_[1] = reinterpret_cast<uint8_t*>(
+    source_data_[1] = reinterpret_cast<uint8_t *>(
        vpx_memalign(kDataAlignment, kDataBufferSize));
-    reference_data_[1] = reinterpret_cast<uint8_t*>(
+    reference_data_[1] = reinterpret_cast<uint8_t *>(
        vpx_memalign(kDataAlignment, kDataBufferSize));
    ssim_array_ = new Ssimv[kDataBufferSize / 16];
  }

-  static void ClearSsim() {
-    memset(ssim_array_, 0, kDataBufferSize / 16);
-  }
+  static void ClearSsim() { memset(ssim_array_, 0, kDataBufferSize / 16); }
  static void TearDownTestCase() {
    vpx_free(source_data_[0]);
    source_data_[0] = NULL;
@@ -65,17 +61,15 @@ class ConsistencyTestBase : public ::testing::Test {
    vpx_free(reference_data_[1]);
    reference_data_[1] = NULL;

-    delete ssim_array_;
+    delete[] ssim_array_;
  }

-  virtual void TearDown() {
-    libvpx_test::ClearSystemState();
-  }
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
  // Handle frames up to 640x480
  static const int kDataAlignment = 16;
-  static const int kDataBufferSize = 640*480;
+  static const int kDataBufferSize = 640 * 480;

  virtual void SetUp() {
    source_stride_ = (width_ + 31) & ~31;
@@ -122,9 +116,9 @@ class ConsistencyTestBase : public ::testing::Test {
    }
  }
  int width_, height_;
-  static uint8_t* source_data_[2];
+  static uint8_t *source_data_[2];
  int source_stride_;
-  static uint8_t* reference_data_[2];
+  static uint8_t *reference_data_[2];
  int reference_stride_;
  static Ssimv *ssim_array_;
  Metrics metrics_;
@@ -142,18 +136,17 @@ class ConsistencyVP9Test

 protected:
  double CheckConsistency(int frame) {
-    EXPECT_LT(frame, 2)<< "Frame to check has to be less than 2.";
-    return
-        vp9_get_ssim_metrics(source_data_[frame], source_stride_,
-                             reference_data_[frame], reference_stride_,
-                             width_, height_, ssim_array_, &metrics_, 1);
+    EXPECT_LT(frame, 2) << "Frame to check has to be less than 2.";
+    return vpx_get_ssim_metrics(source_data_[frame], source_stride_,
+                                reference_data_[frame], reference_stride_,
+                                width_, height_, ssim_array_, &metrics_, 1);
  }
 };
 #endif  // CONFIG_VP9_ENCODER

-uint8_t* ConsistencyTestBase::source_data_[2] = {NULL, NULL};
-uint8_t* ConsistencyTestBase::reference_data_[2] = {NULL, NULL};
-Ssimv* ConsistencyTestBase::ssim_array_ = NULL;
+uint8_t *ConsistencyTestBase::source_data_[2] = { NULL, NULL };
+uint8_t *ConsistencyTestBase::reference_data_[2] = { NULL, NULL };
+Ssimv *ConsistencyTestBase::ssim_array_ = NULL;

 #if CONFIG_VP9_ENCODER
 TEST_P(ConsistencyVP9Test, ConsistencyIsZero) {
@@ -205,7 +198,6 @@ TEST_P(ConsistencyVP9Test, ConsistencyIsZero) {
 }
 #endif  // CONFIG_VP9_ENCODER

-
 using std::tr1::make_tuple;

 //------------------------------------------------------------------------------
@@ -213,9 +205,7 @@ using std::tr1::make_tuple;

 #if CONFIG_VP9_ENCODER
 const ConsistencyParam c_vp9_tests[] = {
-  make_tuple(320, 240),
-  make_tuple(318, 242),
-  make_tuple(318, 238),
+  make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238),
 };
 INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test,
                        ::testing::ValuesIn(c_vp9_tests));
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
--- a/test/cpu_speed_test.cc
+++ b/test/cpu_speed_test.cc
@@ -23,10 +23,9 @@ class CpuSpeedTest
      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
 protected:
  CpuSpeedTest()
-      : EncoderTest(GET_PARAM(0)),
-        encoding_mode_(GET_PARAM(1)),
-        set_cpu_used_(GET_PARAM(2)),
-        min_psnr_(kMaxPSNR) {}
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        set_cpu_used_(GET_PARAM(2)), min_psnr_(kMaxPSNR),
+        tune_content_(VP9E_CONTENT_DEFAULT) {}
  virtual ~CpuSpeedTest() {}

  virtual void SetUp() {
@@ -41,14 +40,13 @@ class CpuSpeedTest
    }
  }

-  virtual void BeginPassHook(unsigned int /*pass*/) {
-    min_psnr_ = kMaxPSNR;
-  }
+  virtual void BeginPassHook(unsigned int /*pass*/) { min_psnr_ = kMaxPSNR; }

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
    if (video->frame() == 1) {
      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_);
      if (encoding_mode_ != ::libvpx_test::kRealTime) {
        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
@@ -59,13 +57,13 @@ class CpuSpeedTest
  }

  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
-    if (pkt->data.psnr.psnr[0] < min_psnr_)
-      min_psnr_ = pkt->data.psnr.psnr[0];
+    if (pkt->data.psnr.psnr[0] < min_psnr_) min_psnr_ = pkt->data.psnr.psnr[0];
  }

  ::libvpx_test::TestMode encoding_mode_;
  int set_cpu_used_;
  double min_psnr_;
+  int tune_content_;
 };

 TEST_P(CpuSpeedTest, TestQ0) {
@@ -74,7 +72,7 @@ TEST_P(CpuSpeedTest, TestQ0) {
  // the encoder to producing lots of big partitions which will likely
  // extend into the border and test the border condition.
  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_minsection_pct = 2000;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
  cfg_.rc_target_bitrate = 400;
  cfg_.rc_max_quantizer = 0;
  cfg_.rc_min_quantizer = 0;
@@ -92,7 +90,7 @@ TEST_P(CpuSpeedTest, TestScreencastQ0) {
  ::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25);
  cfg_.g_timebase = video.timebase();
  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_minsection_pct = 2000;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
  cfg_.rc_target_bitrate = 400;
  cfg_.rc_max_quantizer = 0;
  cfg_.rc_min_quantizer = 0;
@@ -103,13 +101,28 @@ TEST_P(CpuSpeedTest, TestScreencastQ0) {
  EXPECT_GE(min_psnr_, kMaxPSNR);
 }

+TEST_P(CpuSpeedTest, TestTuneScreen) {
+  ::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25);
+  cfg_.g_timebase = video.timebase();
+  cfg_.rc_2pass_vbr_minsection_pct = 5;
+  cfg_.rc_2pass_vbr_minsection_pct = 2000;
+  cfg_.rc_target_bitrate = 2000;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_min_quantizer = 0;
+  tune_content_ = VP9E_CONTENT_SCREEN;
+
+  init_flags_ = VPX_CODEC_USE_PSNR;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
 TEST_P(CpuSpeedTest, TestEncodeHighBitrate) {
  // Validate that this non multiple of 64 wide clip encodes and decodes
  // without a mismatch when passing in a very low max q.  This pushes
  // the encoder to producing lots of big partitions which will likely
  // extend into the border and test the border condition.
  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_minsection_pct = 2000;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
  cfg_.rc_target_bitrate = 12000;
  cfg_.rc_max_quantizer = 10;
  cfg_.rc_min_quantizer = 0;
@@ -125,7 +138,7 @@ TEST_P(CpuSpeedTest, TestLowBitrate) {
  // when passing in a very high min q.  This pushes the encoder to producing
  // lots of small partitions which might will test the other condition.
  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_minsection_pct = 2000;
+  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
  cfg_.rc_target_bitrate = 200;
  cfg_.rc_min_quantizer = 40;

@@ -135,9 +148,9 @@ TEST_P(CpuSpeedTest, TestLowBitrate) {
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }

-VP9_INSTANTIATE_TEST_CASE(
-    CpuSpeedTest,
-    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
-                      ::libvpx_test::kRealTime),
-    ::testing::Range(0, 9));
+VP9_INSTANTIATE_TEST_CASE(CpuSpeedTest,
+                          ::testing::Values(::libvpx_test::kTwoPassGood,
+                                            ::libvpx_test::kOnePassGood,
+                                            ::libvpx_test::kRealTime),
+                          ::testing::Range(0, 9));
 }  // namespace
--- a/test/cq_test.cc
+++ b/test/cq_test.cc
@@ -24,14 +24,12 @@ const int kCQLevelStep = 8;
 const unsigned int kCQTargetBitrate = 2000;

 class CQTest : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<int> {
+               public ::libvpx_test::CodecTestWithParam<int> {
 public:
  // maps the cqlevel to the bitrate produced.
  typedef std::map<int, uint32_t> BitrateMap;

-  static void SetUpTestCase() {
-    bitrates_.clear();
-  }
+  static void SetUpTestCase() { bitrates_.clear(); }

  static void TearDownTestCase() {
    ASSERT_TRUE(!HasFailure())
@@ -128,7 +126,6 @@ TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) {
  EXPECT_GE(cq_psnr_lin, vbr_psnr_lin);
 }

-VP8_INSTANTIATE_TEST_CASE(CQTest,
-                          ::testing::Range(kCQLevelMin, kCQLevelMax,
-                                           kCQLevelStep));
+VP8_INSTANTIATE_TEST_CASE(CQTest, ::testing::Range(kCQLevelMin, kCQLevelMax,
+                                                   kCQLevelStep));
 }  // namespace
--- a/test/vp8cx_set_ref.sh
+++ b/test/vp8cx_set_ref.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 ##
-##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
@@ -8,30 +8,27 @@
 ##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
-##  This file tests the libvpx vp8cx_set_ref example. To add new tests to this
+##  This file tests the libvpx cx_set_ref example. To add new tests to this
 ##  file, do the following:
 ##    1. Write a shell function (this is your test).
-##    2. Add the function to vp8cx_set_ref_tests (on a new line).
+##    2. Add the function to cx_set_ref_tests (on a new line).
 ##
 . $(dirname $0)/tools_common.sh

 # Environment check: $YUV_RAW_INPUT is required.
-vp8cx_set_ref_verify_environment() {
+cx_set_ref_verify_environment() {
  if [ ! -e "${YUV_RAW_INPUT}" ]; then
    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
    return 1
  fi
 }

-# Runs vp8cx_set_ref and updates the reference frame before encoding frame 90.
-# $1 is the codec name, which vp8cx_set_ref does not support at present: It's
-# currently used only to name the output file.
-# TODO(tomfinegan): Pass the codec param once the example is updated to support
-# VP9.
+# Runs cx_set_ref and updates the reference frame before encoding frame 90.
+# $1 is the codec name.
 vpx_set_ref() {
-  local encoder="${LIBVPX_BIN_PATH}/vp8cx_set_ref${VPX_TEST_EXE_SUFFIX}"
  local codec="$1"
-  local output_file="${VPX_TEST_OUTPUT_DIR}/vp8cx_set_ref_${codec}.ivf"
+  local encoder="${LIBVPX_BIN_PATH}/${codec}cx_set_ref${VPX_TEST_EXE_SUFFIX}"
+  local output_file="${VPX_TEST_OUTPUT_DIR}/${codec}cx_set_ref_${codec}.ivf"
  local ref_frame_num=90

  if [ ! -x "${encoder}" ]; then
@@ -46,12 +43,18 @@ vpx_set_ref() {
  [ -e "${output_file}" ] || return 1
 }

-vp8cx_set_ref_vp8() {
+cx_set_ref_vp8() {
  if [ "$(vp8_encode_available)" = "yes" ]; then
    vpx_set_ref vp8 || return 1
  fi
 }

-vp8cx_set_ref_tests="vp8cx_set_ref_vp8"
+cx_set_ref_vp9() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_set_ref vp9 || return 1
+  fi
+}

-run_tests vp8cx_set_ref_verify_environment "${vp8cx_set_ref_tests}"
+cx_set_ref_tests="cx_set_ref_vp8 cx_set_ref_vp9"
+
+run_tests cx_set_ref_verify_environment "${cx_set_ref_tests}"
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -13,56 +13,25 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_ports/mem.h"
+#include "vpx_ports/msvc.h"  // for round()

 using libvpx_test::ACMRandom;

 namespace {

-#ifdef _MSC_VER
-static int round(double x) {
-  if (x < 0)
-    return static_cast<int>(ceil(x - 0.5));
-  else
-    return static_cast<int>(floor(x + 0.5));
-}
-#endif
-
 const int kNumCoeffs = 256;
-const double PI = 3.1415926535898;
-void reference2_16x16_idct_2d(double *input, double *output) {
-  double x;
-  for (int l = 0; l < 16; ++l) {
-    for (int k = 0; k < 16; ++k) {
-      double s = 0;
-      for (int i = 0; i < 16; ++i) {
-        for (int j = 0; j < 16; ++j) {
-          x = cos(PI * j * (l + 0.5) / 16.0) *
-              cos(PI * i * (k + 0.5) / 16.0) *
-              input[i * 16 + j] / 256;
-          if (i != 0)
-            x *= sqrt(2.0);
-          if (j != 0)
-            x *= sqrt(2.0);
-          s += x;
-        }
-      }
-      output[k*16+l] = s;
-    }
-  }
-}
-
-
 const double C1 = 0.995184726672197;
 const double C2 = 0.98078528040323;
 const double C3 = 0.956940335732209;
@@ -85,16 +54,16 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  double temp1, temp2;

  // step 1
-  step[ 0] = input[0] + input[15];
-  step[ 1] = input[1] + input[14];
-  step[ 2] = input[2] + input[13];
-  step[ 3] = input[3] + input[12];
-  step[ 4] = input[4] + input[11];
-  step[ 5] = input[5] + input[10];
-  step[ 6] = input[6] + input[ 9];
-  step[ 7] = input[7] + input[ 8];
-  step[ 8] = input[7] - input[ 8];
-  step[ 9] = input[6] - input[ 9];
+  step[0] = input[0] + input[15];
+  step[1] = input[1] + input[14];
+  step[2] = input[2] + input[13];
+  step[3] = input[3] + input[12];
+  step[4] = input[4] + input[11];
+  step[5] = input[5] + input[10];
+  step[6] = input[6] + input[9];
+  step[7] = input[7] + input[8];
+  step[8] = input[7] - input[8];
+  step[9] = input[6] - input[9];
  step[10] = input[5] - input[10];
  step[11] = input[4] - input[11];
  step[12] = input[3] - input[12];
@@ -112,13 +81,13 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  output[6] = step[1] - step[6];
  output[7] = step[0] - step[7];

-  temp1 = step[ 8] * C7;
+  temp1 = step[8] * C7;
  temp2 = step[15] * C9;
-  output[ 8] = temp1 + temp2;
+  output[8] = temp1 + temp2;

-  temp1 = step[ 9] * C11;
+  temp1 = step[9] * C11;
  temp2 = step[14] * C5;
-  output[ 9] = temp1 - temp2;
+  output[9] = temp1 - temp2;

  temp1 = step[10] * C3;
  temp2 = step[13] * C13;
@@ -136,40 +105,40 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  temp2 = step[13] * C3;
  output[13] = temp2 - temp1;

-  temp1 = step[ 9] * C5;
+  temp1 = step[9] * C5;
  temp2 = step[14] * C11;
  output[14] = temp2 + temp1;

-  temp1 = step[ 8] * C9;
+  temp1 = step[8] * C9;
  temp2 = step[15] * C7;
  output[15] = temp2 - temp1;

  // step 3
-  step[ 0] = output[0] + output[3];
-  step[ 1] = output[1] + output[2];
-  step[ 2] = output[1] - output[2];
-  step[ 3] = output[0] - output[3];
+  step[0] = output[0] + output[3];
+  step[1] = output[1] + output[2];
+  step[2] = output[1] - output[2];
+  step[3] = output[0] - output[3];

  temp1 = output[4] * C14;
  temp2 = output[7] * C2;
-  step[ 4] = temp1 + temp2;
+  step[4] = temp1 + temp2;

  temp1 = output[5] * C10;
  temp2 = output[6] * C6;
-  step[ 5] = temp1 + temp2;
+  step[5] = temp1 + temp2;

  temp1 = output[5] * C6;
  temp2 = output[6] * C10;
-  step[ 6] = temp2 - temp1;
+  step[6] = temp2 - temp1;

  temp1 = output[4] * C2;
  temp2 = output[7] * C14;
-  step[ 7] = temp2 - temp1;
+  step[7] = temp2 - temp1;

-  step[ 8] = output[ 8] + output[11];
-  step[ 9] = output[ 9] + output[10];
-  step[10] = output[ 9] - output[10];
-  step[11] = output[ 8] - output[11];
+  step[8] = output[8] + output[11];
+  step[9] = output[9] + output[10];
+  step[10] = output[9] - output[10];
+  step[11] = output[8] - output[11];

  step[12] = output[12] + output[15];
  step[13] = output[13] + output[14];
@@ -177,25 +146,25 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  step[15] = output[12] - output[15];

  // step 4
-  output[ 0] = (step[ 0] + step[ 1]);
-  output[ 8] = (step[ 0] - step[ 1]);
+  output[0] = (step[0] + step[1]);
+  output[8] = (step[0] - step[1]);

  temp1 = step[2] * C12;
  temp2 = step[3] * C4;
  temp1 = temp1 + temp2;
-  output[ 4] = 2*(temp1 * C8);
+  output[4] = 2 * (temp1 * C8);

  temp1 = step[2] * C4;
  temp2 = step[3] * C12;
  temp1 = temp2 - temp1;
  output[12] = 2 * (temp1 * C8);

-  output[ 2] = 2 * ((step[4] + step[ 5]) * C8);
-  output[14] = 2 * ((step[7] - step[ 6]) * C8);
+  output[2] = 2 * ((step[4] + step[5]) * C8);
+  output[14] = 2 * ((step[7] - step[6]) * C8);

  temp1 = step[4] - step[5];
  temp2 = step[6] + step[7];
-  output[ 6] = (temp1 + temp2);
+  output[6] = (temp1 + temp2);
  output[10] = (temp1 - temp2);

  intermediate[8] = step[8] + step[14];
@@ -211,18 +180,18 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  temp1 = temp2 + temp1;
  output[13] = 2 * (temp1 * C8);

-  output[ 9] = 2 * ((step[10] + step[11]) * C8);
+  output[9] = 2 * ((step[10] + step[11]) * C8);

  intermediate[11] = step[10] - step[11];
  intermediate[12] = step[12] + step[13];
  intermediate[13] = step[12] - step[13];
-  intermediate[14] = step[ 8] - step[14];
-  intermediate[15] = step[ 9] - step[15];
+  intermediate[14] = step[8] - step[14];
+  intermediate[15] = step[9] - step[15];

  output[15] = (intermediate[11] + intermediate[12]);
-  output[ 1] = -(intermediate[11] - intermediate[12]);
+  output[1] = -(intermediate[11] - intermediate[12]);

-  output[ 7] = 2 * (intermediate[13] * C8);
+  output[7] = 2 * (intermediate[13] * C8);

  temp1 = intermediate[14] * C12;
  temp2 = intermediate[15] * C4;
@@ -232,28 +201,24 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  temp1 = intermediate[14] * C4;
  temp2 = intermediate[15] * C12;
  temp1 = temp2 + temp1;
-  output[ 5] = 2 * (temp1 * C8);
+  output[5] = 2 * (temp1 * C8);
 }

 void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
  // First transform columns
  for (int i = 0; i < 16; ++i) {
    double temp_in[16], temp_out[16];
-    for (int j = 0; j < 16; ++j)
-      temp_in[j] = input[j * 16 + i];
+    for (int j = 0; j < 16; ++j) temp_in[j] = input[j * 16 + i];
    butterfly_16x16_dct_1d(temp_in, temp_out);
-    for (int j = 0; j < 16; ++j)
-      output[j * 16 + i] = temp_out[j];
+    for (int j = 0; j < 16; ++j) output[j * 16 + i] = temp_out[j];
  }
  // Then transform rows
  for (int i = 0; i < 16; ++i) {
    double temp_in[16], temp_out[16];
-    for (int j = 0; j < 16; ++j)
-      temp_in[j] = output[j + i * 16];
+    for (int j = 0; j < 16; ++j) temp_in[j] = output[j + i * 16];
    butterfly_16x16_dct_1d(temp_in, temp_out);
    // Scale by some magic number
-    for (int j = 0; j < 16; ++j)
-      output[j + i * 16] = temp_out[j]/2;
+    for (int j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j] / 2;
  }
 }

@@ -271,16 +236,15 @@ typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>

 void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
                   int /*tx_type*/) {
-  vp9_fdct16x16_c(in, out, stride);
+  vpx_fdct16x16_c(in, out, stride);
 }

 void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
                   int /*tx_type*/) {
-  vp9_idct16x16_256_add_c(in, dest, stride);
+  vpx_idct16x16_256_add_c(in, dest, stride);
 }

-void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
-                  int tx_type) {
+void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
  vp9_fht16x16_c(in, out, stride, tx_type);
 }

@@ -291,20 +255,20 @@ void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_256_add_c(in, out, stride, 10);
+  vpx_highbd_idct16x16_256_add_c(in, out, stride, 10);
 }

 void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_256_add_c(in, out, stride, 12);
+  vpx_highbd_idct16x16_256_add_c(in, out, stride, 12);
 }

 void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
-                      int tx_type) {
+                      int /*tx_type*/) {
  idct16x16_10(in, out, stride);
 }

 void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
-                      int tx_type) {
+                      int /*tx_type*/) {
  idct16x16_12(in, out, stride);
 }

@@ -316,29 +280,29 @@ void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
 }

+#if HAVE_SSE2
 void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_10_add_c(in, out, stride, 10);
+  vpx_highbd_idct16x16_10_add_c(in, out, stride, 10);
 }

 void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_10_add_c(in, out, stride, 12);
+  vpx_highbd_idct16x16_10_add_c(in, out, stride, 12);
 }

-#if HAVE_SSE2
 void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
 }

 void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
 }

 void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
 }

 void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
 }
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -382,11 +346,10 @@ class Trans16x16TestBase {
        }
      }

-      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
-                                          test_temp_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
      if (bit_depth_ == VPX_BITS_8) {
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, dst, pitch_));
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
@@ -396,19 +359,18 @@ class Trans16x16TestBase {

      for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
-        const uint32_t diff =
-            bit_depth_ == VPX_BITS_8 ?  dst[j] - src[j] : dst16[j] - src16[j];
+        const int32_t diff =
+            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
-        const uint32_t diff = dst[j] - src[j];
+        const int32_t diff = dst[j] - src[j];
 #endif
        const uint32_t error = diff * diff;
-        if (max_error < error)
-          max_error = error;
+        if (max_error < error) max_error = error;
        total_error += error;
      }
    }

-    EXPECT_GE(1u  << 2 * (bit_depth_ - 8), max_error)
+    EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
        << "Error: 16x16 FHT/IHT has an individual round trip error > 1";

    EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
@@ -424,8 +386,9 @@ class Trans16x16TestBase {

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
-      for (int j = 0; j < kNumCoeffs; ++j)
+      for (int j = 0; j < kNumCoeffs; ++j) {
        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+      }

      fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
@@ -449,16 +412,14 @@ class Trans16x16TestBase {
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
      if (i == 0) {
-        for (int j = 0; j < kNumCoeffs; ++j)
-          input_extreme_block[j] = mask_;
+        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
      } else if (i == 1) {
-        for (int j = 0; j < kNumCoeffs; ++j)
-          input_extreme_block[j] = -mask_;
+        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
      }

      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
-      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
-                                          output_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(input_extreme_block, output_block, pitch_));

      // The minimum quant value is 4.
      for (int j = 0; j < kNumCoeffs; ++j) {
@@ -487,12 +448,12 @@ class Trans16x16TestBase {
      for (int j = 0; j < kNumCoeffs; ++j) {
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
-      if (i == 0)
-        for (int j = 0; j < kNumCoeffs; ++j)
-          input_extreme_block[j] = mask_;
-      if (i == 1)
-        for (int j = 0; j < kNumCoeffs; ++j)
-          input_extreme_block[j] = -mask_;
+      if (i == 0) {
+        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
+      }
+      if (i == 1) {
+        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
+      }

      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);

@@ -506,8 +467,9 @@ class Trans16x16TestBase {

      // quantization with maximum allowed step sizes
      output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred;
-      for (int j = 1; j < kNumCoeffs; ++j)
+      for (int j = 1; j < kNumCoeffs; ++j) {
        output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
+      }
      if (bit_depth_ == VPX_BITS_8) {
        inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
@@ -515,17 +477,15 @@ class Trans16x16TestBase {
      } else {
        inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
                     tx_type_);
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block,
-                                            CONVERT_TO_BYTEPTR(dst16), pitch_));
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(output_ref_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }
      if (bit_depth_ == VPX_BITS_8) {
-        for (int j = 0; j < kNumCoeffs; ++j)
-          EXPECT_EQ(ref[j], dst[j]);
+        for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref[j], dst[j]);
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
-        for (int j = 0; j < kNumCoeffs; ++j)
-          EXPECT_EQ(ref16[j], dst16[j]);
+        for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref16[j], dst16[j]);
 #endif
      }
    }
@@ -562,15 +522,16 @@ class Trans16x16TestBase {
      }

      reference_16x16_dct_2d(in, out_r);
-      for (int j = 0; j < kNumCoeffs; ++j)
+      for (int j = 0; j < kNumCoeffs; ++j) {
        coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
+      }

      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
-                                            16));
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), 16));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
      }

@@ -582,9 +543,8 @@ class Trans16x16TestBase {
        const uint32_t diff = dst[j] - src[j];
 #endif  // CONFIG_VP9_HIGHBITDEPTH
        const uint32_t error = diff * diff;
-        EXPECT_GE(1u, error)
-            << "Error: 16x16 IDCT has error " << error
-            << " at index " << j;
+        EXPECT_GE(1u, error) << "Error: 16x16 IDCT has error " << error
+                             << " at index " << j;
      }
    }
  }
@@ -626,8 +586,8 @@ class Trans16x16TestBase {
      } else {
 #if CONFIG_VP9_HIGHBITDEPTH
        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
-                                 pitch_));
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
      }

@@ -639,9 +599,8 @@ class Trans16x16TestBase {
        const uint32_t diff = dst[j] - ref[j];
 #endif  // CONFIG_VP9_HIGHBITDEPTH
        const uint32_t error = diff * diff;
-        EXPECT_EQ(0u, error)
-            << "Error: 16x16 IDCT Comparison has error " << error
-            << " at index " << j;
+        EXPECT_EQ(0u, error) << "Error: 16x16 IDCT Comparison has error "
+                             << error << " at index " << j;
      }
    }
  }
@@ -654,32 +613,25 @@ class Trans16x16TestBase {
  IhtFunc inv_txfm_ref;
 };

-class Trans16x16DCT
-    : public Trans16x16TestBase,
-      public ::testing::TestWithParam<Dct16x16Param> {
+class Trans16x16DCT : public Trans16x16TestBase,
+                      public ::testing::TestWithParam<Dct16x16Param> {
 public:
  virtual ~Trans16x16DCT() {}

  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    tx_type_  = GET_PARAM(2);
+    tx_type_ = GET_PARAM(2);
    bit_depth_ = GET_PARAM(3);
-    pitch_    = 16;
+    pitch_ = 16;
    fwd_txfm_ref = fdct16x16_ref;
    inv_txfm_ref = idct16x16_ref;
    mask_ = (1 << bit_depth_) - 1;
 #if CONFIG_VP9_HIGHBITDEPTH
    switch (bit_depth_) {
-      case VPX_BITS_10:
-        inv_txfm_ref = idct16x16_10_ref;
-        break;
-      case VPX_BITS_12:
-        inv_txfm_ref = idct16x16_12_ref;
-        break;
-      default:
-        inv_txfm_ref = idct16x16_ref;
-        break;
+      case VPX_BITS_10: inv_txfm_ref = idct16x16_10_ref; break;
+      case VPX_BITS_12: inv_txfm_ref = idct16x16_12_ref; break;
+      default: inv_txfm_ref = idct16x16_ref; break;
    }
 #else
    inv_txfm_ref = idct16x16_ref;
@@ -699,17 +651,11 @@ class Trans16x16DCT
  IdctFunc inv_txfm_;
 };

-TEST_P(Trans16x16DCT, AccuracyCheck) {
-  RunAccuracyCheck();
-}
+TEST_P(Trans16x16DCT, AccuracyCheck) { RunAccuracyCheck(); }

-TEST_P(Trans16x16DCT, CoeffCheck) {
-  RunCoeffCheck();
-}
+TEST_P(Trans16x16DCT, CoeffCheck) { RunCoeffCheck(); }

-TEST_P(Trans16x16DCT, MemCheck) {
-  RunMemCheck();
-}
+TEST_P(Trans16x16DCT, MemCheck) { RunMemCheck(); }

 TEST_P(Trans16x16DCT, QuantCheck) {
  // Use maximally allowed quantization step sizes for DC and AC
@@ -717,36 +663,27 @@ TEST_P(Trans16x16DCT, QuantCheck) {
  RunQuantCheck(1336, 1828);
 }

-TEST_P(Trans16x16DCT, InvAccuracyCheck) {
-  RunInvAccuracyCheck();
-}
+TEST_P(Trans16x16DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }

-class Trans16x16HT
-    : public Trans16x16TestBase,
-      public ::testing::TestWithParam<Ht16x16Param> {
+class Trans16x16HT : public Trans16x16TestBase,
+                     public ::testing::TestWithParam<Ht16x16Param> {
 public:
  virtual ~Trans16x16HT() {}

  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    tx_type_  = GET_PARAM(2);
+    tx_type_ = GET_PARAM(2);
    bit_depth_ = GET_PARAM(3);
-    pitch_    = 16;
+    pitch_ = 16;
    fwd_txfm_ref = fht16x16_ref;
    inv_txfm_ref = iht16x16_ref;
    mask_ = (1 << bit_depth_) - 1;
 #if CONFIG_VP9_HIGHBITDEPTH
    switch (bit_depth_) {
-      case VPX_BITS_10:
-        inv_txfm_ref = iht16x16_10;
-        break;
-      case VPX_BITS_12:
-        inv_txfm_ref = iht16x16_12;
-        break;
-      default:
-        inv_txfm_ref = iht16x16_ref;
-        break;
+      case VPX_BITS_10: inv_txfm_ref = iht16x16_10; break;
+      case VPX_BITS_12: inv_txfm_ref = iht16x16_12; break;
+      default: inv_txfm_ref = iht16x16_ref; break;
    }
 #else
    inv_txfm_ref = iht16x16_ref;
@@ -766,17 +703,11 @@ class Trans16x16HT
  IhtFunc inv_txfm_;
 };

-TEST_P(Trans16x16HT, AccuracyCheck) {
-  RunAccuracyCheck();
-}
+TEST_P(Trans16x16HT, AccuracyCheck) { RunAccuracyCheck(); }

-TEST_P(Trans16x16HT, CoeffCheck) {
-  RunCoeffCheck();
-}
+TEST_P(Trans16x16HT, CoeffCheck) { RunCoeffCheck(); }

-TEST_P(Trans16x16HT, MemCheck) {
-  RunMemCheck();
-}
+TEST_P(Trans16x16HT, MemCheck) { RunMemCheck(); }

 TEST_P(Trans16x16HT, QuantCheck) {
  // The encoder skips any non-DC intra prediction modes,
@@ -784,9 +715,8 @@ TEST_P(Trans16x16HT, QuantCheck) {
  RunQuantCheck(429, 729);
 }

-class InvTrans16x16DCT
-    : public Trans16x16TestBase,
-      public ::testing::TestWithParam<Idct16x16Param> {
+class InvTrans16x16DCT : public Trans16x16TestBase,
+                         public ::testing::TestWithParam<Idct16x16Param> {
 public:
  virtual ~InvTrans16x16DCT() {}

@@ -797,11 +727,11 @@ class InvTrans16x16DCT
    bit_depth_ = GET_PARAM(3);
    pitch_ = 16;
    mask_ = (1 << bit_depth_) - 1;
-}
+  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {}
+  void RunFwdTxfm(int16_t * /*in*/, tran_low_t * /*out*/, int /*stride*/) {}
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
    inv_txfm_(out, dst, stride);
  }
@@ -815,20 +745,80 @@ TEST_P(InvTrans16x16DCT, CompareReference) {
  CompareInvReference(ref_txfm_, thresh_);
 }

+class PartialTrans16x16Test : public ::testing::TestWithParam<
+                                  std::tr1::tuple<FdctFunc, vpx_bit_depth_t> > {
+ public:
+  virtual ~PartialTrans16x16Test() {}
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    bit_depth_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  vpx_bit_depth_t bit_depth_;
+  FdctFunc fwd_txfm_;
+};
+
+TEST_P(PartialTrans16x16Test, Extremes) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  const int minval = -maxval;
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
+  EXPECT_EQ((maxval * kNumCoeffs) >> 1, output[0]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
+  EXPECT_EQ((minval * kNumCoeffs) >> 1, output[0]);
+}
+
+TEST_P(PartialTrans16x16Test, Random) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+  int sum = 0;
+  for (int i = 0; i < kNumCoeffs; ++i) {
+    const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
+    input[i] = val;
+    sum += val;
+  }
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
+  EXPECT_EQ(sum >> 1, output[0]);
+}
+
 using std::tr1::make_tuple;

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
+        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
 #else
-INSTANTIATE_TEST_CASE_P(
-    C, Trans16x16DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct16x16_c,
+                                                     &vpx_idct16x16_256_add_c,
+                                                     0, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
@@ -847,6 +837,11 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    C, PartialTrans16x16Test,
+    ::testing::Values(make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_8),
+                      make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_10),
+                      make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
@@ -855,60 +850,53 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_c,
+                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans16x16DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_neon, 0, VPX_BITS_8)));
+    ::testing::Values(make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_neon,
+                                 0, VPX_BITS_8)));
 #endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_sse2,
-                   &vp9_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
+    ::testing::Values(make_tuple(&vpx_fdct16x16_sse2,
+                                 &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
-    ::testing::Values(
-        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0,
-                   VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1,
-                   VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2,
-                   VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3,
-                   VPX_BITS_8)));
+    ::testing::Values(make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
+                                 0, VPX_BITS_8),
+                      make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
+                                 1, VPX_BITS_8),
+                      make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
+                                 2, VPX_BITS_8),
+                      make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
+                                 3, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct16x16_sse2,
-                   &idct16x16_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct16x16_c,
-                   &idct16x16_256_add_10_sse2, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct16x16_sse2,
-                   &idct16x16_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct16x16_c,
-                   &idct16x16_256_add_12_sse2, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct16x16_sse2,
-                   &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_256_add_10_sse2, 0,
+                   VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_12, 0, VPX_BITS_12),
+        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_256_add_12_sse2, 0,
+                   VPX_BITS_12),
+        make_tuple(&vpx_fdct16x16_sse2, &vpx_idct16x16_256_add_c, 0,
+                   VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 1, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 2, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 3, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 1, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 2, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 3, VPX_BITS_12),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
@@ -918,23 +906,24 @@ INSTANTIATE_TEST_CASE_P(
 // that to test both branches.
 INSTANTIATE_TEST_CASE_P(
    SSE2, InvTrans16x16DCT,
-    ::testing::Values(
-        make_tuple(&idct16x16_10_add_10_c,
-                   &idct16x16_10_add_10_sse2, 3167, VPX_BITS_10),
-        make_tuple(&idct16x16_10,
-                   &idct16x16_256_add_10_sse2, 3167, VPX_BITS_10),
-        make_tuple(&idct16x16_10_add_12_c,
-                   &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
-        make_tuple(&idct16x16_12,
-                   &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
+    ::testing::Values(make_tuple(&idct16x16_10_add_10_c,
+                                 &idct16x16_10_add_10_sse2, 3167, VPX_BITS_10),
+                      make_tuple(&idct16x16_10, &idct16x16_256_add_10_sse2,
+                                 3167, VPX_BITS_10),
+                      make_tuple(&idct16x16_10_add_12_c,
+                                 &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
+                      make_tuple(&idct16x16_12, &idct16x16_256_add_12_sse2,
+                                 3167, VPX_BITS_12)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    MSA, Trans16x16DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_msa,
-                   &vp9_idct16x16_256_add_msa, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(MSA, Trans16x16DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct16x16_msa,
+                                                     &vpx_idct16x16_256_add_msa,
+                                                     0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    MSA, Trans16x16HT,
    ::testing::Values(
@@ -943,5 +932,8 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
                   VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(MSA, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_msa,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -13,29 +13,23 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_ports/mem.h"
+#include "vpx_ports/msvc.h"  // for round()

 using libvpx_test::ACMRandom;

 namespace {
-#ifdef _MSC_VER
-static int round(double x) {
-  if (x < 0)
-    return static_cast<int>(ceil(x - 0.5));
-  else
-    return static_cast<int>(floor(x + 0.5));
-}
-#endif

 const int kNumCoeffs = 1024;
 const double kPi = 3.141592653589793238462643383279502884;
@@ -43,10 +37,10 @@ void reference_32x32_dct_1d(const double in[32], double out[32]) {
  const double kInvSqrt2 = 0.707106781186547524400844362104;
  for (int k = 0; k < 32; k++) {
    out[k] = 0.0;
-    for (int n = 0; n < 32; n++)
+    for (int n = 0; n < 32; n++) {
      out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
-    if (k == 0)
-      out[k] = out[k] * kInvSqrt2;
+    }
+    if (k == 0) out[k] = out[k] * kInvSqrt2;
  }
 }

@@ -55,21 +49,17 @@ void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
  // First transform columns
  for (int i = 0; i < 32; ++i) {
    double temp_in[32], temp_out[32];
-    for (int j = 0; j < 32; ++j)
-      temp_in[j] = input[j*32 + i];
+    for (int j = 0; j < 32; ++j) temp_in[j] = input[j * 32 + i];
    reference_32x32_dct_1d(temp_in, temp_out);
-    for (int j = 0; j < 32; ++j)
-      output[j * 32 + i] = temp_out[j];
+    for (int j = 0; j < 32; ++j) output[j * 32 + i] = temp_out[j];
  }
  // Then transform rows
  for (int i = 0; i < 32; ++i) {
    double temp_in[32], temp_out[32];
-    for (int j = 0; j < 32; ++j)
-      temp_in[j] = output[j + i*32];
+    for (int j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32];
    reference_32x32_dct_1d(temp_in, temp_out);
    // Scale by some magic number
-    for (int j = 0; j < 32; ++j)
-      output[j + i * 32] = temp_out[j] / 4;
+    for (int j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j] / 4;
  }
 }

@@ -80,16 +70,12 @@ typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
    Trans32x32Param;

 #if CONFIG_VP9_HIGHBITDEPTH
-void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 8);
-}
-
 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 10);
+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
 }

 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 12);
+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH

@@ -99,8 +85,8 @@ class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    version_  = GET_PARAM(2);  // 0: high precision forward transform
-                               // 1: low precision version for rd loop
+    version_ = GET_PARAM(2);  // 0: high precision forward transform
+                              // 1: low precision version for rd loop
    bit_depth_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
  }
@@ -150,21 +136,20 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
      ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
 #if CONFIG_VP9_HIGHBITDEPTH
    } else {
-      ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block,
-                                         CONVERT_TO_BYTEPTR(dst16), 32));
+      ASM_REGISTER_STATE_CHECK(
+          inv_txfm_(test_temp_block, CONVERT_TO_BYTEPTR(dst16), 32));
 #endif
    }

    for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
-      const uint32_t diff =
+      const int32_t diff =
          bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
-      const uint32_t diff = dst[j] - src[j];
+      const int32_t diff = dst[j] - src[j];
 #endif
      const uint32_t error = diff * diff;
-      if (max_error < error)
-        max_error = error;
+      if (max_error < error) max_error = error;
      total_error += error;
    }
  }
@@ -190,11 +175,12 @@ TEST_P(Trans32x32Test, CoeffCheck) {
  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

  for (int i = 0; i < count_test_block; ++i) {
-    for (int j = 0; j < kNumCoeffs; ++j)
+    for (int j = 0; j < kNumCoeffs; ++j) {
      input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+    }

    const int stride = 32;
-    vp9_fdct32x32_c(input_block, output_ref_block, stride);
+    vpx_fdct32x32_c(input_block, output_ref_block, stride);
    ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));

    if (version_ == 0) {
@@ -223,15 +209,13 @@ TEST_P(Trans32x32Test, MemCheck) {
      input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
    }
    if (i == 0) {
-      for (int j = 0; j < kNumCoeffs; ++j)
-        input_extreme_block[j] = mask_;
+      for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
    } else if (i == 1) {
-      for (int j = 0; j < kNumCoeffs; ++j)
-        input_extreme_block[j] = -mask_;
+      for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
    }

    const int stride = 32;
-    vp9_fdct32x32_c(input_extreme_block, output_ref_block, stride);
+    vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
    ASM_REGISTER_STATE_CHECK(
        fwd_txfm_(input_extreme_block, output_block, stride));

@@ -284,8 +268,9 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
    }

    reference_32x32_dct_2d(in, out_r);
-    for (int j = 0; j < kNumCoeffs; ++j)
+    for (int j = 0; j < kNumCoeffs; ++j) {
      coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
+    }
    if (bit_depth_ == VPX_BITS_8) {
      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -301,94 +286,161 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
      const int diff = dst[j] - src[j];
 #endif
      const int error = diff * diff;
-      EXPECT_GE(1, error)
-          << "Error: 32x32 IDCT has error " << error
-          << " at index " << j;
+      EXPECT_GE(1, error) << "Error: 32x32 IDCT has error " << error
+                          << " at index " << j;
    }
  }
 }

+class PartialTrans32x32Test
+    : public ::testing::TestWithParam<
+          std::tr1::tuple<FwdTxfmFunc, vpx_bit_depth_t> > {
+ public:
+  virtual ~PartialTrans32x32Test() {}
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    bit_depth_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  vpx_bit_depth_t bit_depth_;
+  FwdTxfmFunc fwd_txfm_;
+};
+
+TEST_P(PartialTrans32x32Test, Extremes) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  const int minval = -maxval;
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+  EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+  EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]);
+}
+
+TEST_P(PartialTrans32x32Test, Random) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+  int sum = 0;
+  for (int i = 0; i < kNumCoeffs; ++i) {
+    const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
+    input[i] = val;
+    sum += val;
+  }
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+  EXPECT_EQ(sum >> 3, output[0]);
+}
+
 using std::tr1::make_tuple;

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct32x32_c,
-                   &idct32x32_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct32x32_rd_c,
-                   &idct32x32_10, 1, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct32x32_c,
-                   &idct32x32_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct32x32_rd_c,
-                   &idct32x32_12, 1, VPX_BITS_12),
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_c,
-                   &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+        make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_10, 1, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_12, 0, VPX_BITS_12),
+        make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_12, 1, VPX_BITS_12),
+        make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1,
+                   VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    C, PartialTrans32x32Test,
+    ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_8),
+                      make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_10),
+                      make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans32x32Test,
-    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_c,
-                   &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0,
+                                 VPX_BITS_8),
+                      make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c,
+                                 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_c,
+                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans32x32Test,
-    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_c,
-                   &vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
-#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_neon,
+                                 0, VPX_BITS_8),
+                      make_tuple(&vpx_fdct32x32_rd_c,
+                                 &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
+#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans32x32Test,
-    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_sse2,
-                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_sse2,
-                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+    ::testing::Values(make_tuple(&vpx_fdct32x32_sse2,
+                                 &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
+                      make_tuple(&vpx_fdct32x32_rd_sse2,
+                                 &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
+        make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
                   VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
+        make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
+        make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
                   VPX_BITS_12),
-        make_tuple(&vp9_fdct32x32_sse2, &vp9_idct32x32_1024_add_c, 0,
+        make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
                   VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_sse2, &vp9_idct32x32_1024_add_c, 1,
+        make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
                   VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    AVX2, Trans32x32Test,
-    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_avx2,
-                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_avx2,
-                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+    ::testing::Values(make_tuple(&vpx_fdct32x32_avx2,
+                                 &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
+                      make_tuple(&vpx_fdct32x32_rd_avx2,
+                                 &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
 #endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    MSA, Trans32x32Test,
-    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_msa,
-                   &vp9_idct32x32_1024_add_msa, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_msa,
-                   &vp9_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
+    ::testing::Values(make_tuple(&vpx_fdct32x32_msa,
+                                 &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
+                      make_tuple(&vpx_fdct32x32_rd_msa,
+                                 &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/decode_api_test.cc
+++ b/test/decode_api_test.cc
@@ -7,10 +7,11 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
+
 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "test/ivf_video_source.h"
 #include "./vpx_config.h"
+#include "test/ivf_video_source.h"
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

@@ -27,7 +28,7 @@ TEST(DecodeAPI, InvalidParams) {
    &vpx_codec_vp9_dx_algo,
 #endif
  };
-  uint8_t buf[1] = {0};
+  uint8_t buf[1] = { 0 };
  vpx_codec_ctx_t dec;

  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_dec_init(NULL, NULL, NULL, 0));
@@ -50,8 +51,7 @@ TEST(DecodeAPI, InvalidParams) {
              vpx_codec_decode(&dec, buf, NELEMENTS(buf), NULL, 0));
    EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
              vpx_codec_decode(&dec, NULL, NELEMENTS(buf), NULL, 0));
-    EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
-              vpx_codec_decode(&dec, buf, 0, NULL, 0));
+    EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_decode(&dec, buf, 0, NULL, 0));

    EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec));
  }
@@ -76,12 +76,9 @@ TEST(DecodeAPI, OptionalParams) {
 // Test VP9 codec controls after a decode error to ensure the code doesn't
 // misbehave.
 void TestVp9Controls(vpx_codec_ctx_t *dec) {
-  static const int kControls[] = {
-    VP8D_GET_LAST_REF_UPDATES,
-    VP8D_GET_FRAME_CORRUPTED,
-    VP9D_GET_DISPLAY_SIZE,
-    VP9D_GET_FRAME_SIZE
-  };
+  static const int kControls[] = { VP8D_GET_LAST_REF_UPDATES,
+                                   VP8D_GET_FRAME_CORRUPTED,
+                                   VP9D_GET_DISPLAY_SIZE, VP9D_GET_FRAME_SIZE };
  int val[2];

  for (int i = 0; i < NELEMENTS(kControls); ++i) {
@@ -90,9 +87,7 @@ void TestVp9Controls(vpx_codec_ctx_t *dec) {
      case VP8D_GET_FRAME_CORRUPTED:
        EXPECT_EQ(VPX_CODEC_ERROR, res) << kControls[i];
        break;
-      default:
-        EXPECT_EQ(VPX_CODEC_OK, res) << kControls[i];
-        break;
+      default: EXPECT_EQ(VPX_CODEC_OK, res) << kControls[i]; break;
    }
    EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
              vpx_codec_control_(dec, kControls[i], NULL));
@@ -129,14 +124,52 @@ TEST(DecodeAPI, Vp9InvalidDecode) {
  vpx_codec_ctx_t dec;
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0));
  const uint32_t frame_size = static_cast<uint32_t>(video.frame_size());
+#if CONFIG_VP9_HIGHBITDEPTH
  EXPECT_EQ(VPX_CODEC_MEM_ERROR,
            vpx_codec_decode(&dec, video.cxdata(), frame_size, NULL, 0));
+#else
+  EXPECT_EQ(VPX_CODEC_UNSUP_BITSTREAM,
+            vpx_codec_decode(&dec, video.cxdata(), frame_size, NULL, 0));
+#endif
  vpx_codec_iter_t iter = NULL;
  EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter));

  TestVp9Controls(&dec);
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec));
 }
+
+TEST(DecodeAPI, Vp9PeekSI) {
+  const vpx_codec_iface_t *const codec = &vpx_codec_vp9_dx_algo;
+  // The first 9 bytes are valid and the rest of the bytes are made up. Until
+  // size 10, this should return VPX_CODEC_UNSUP_BITSTREAM and after that it
+  // should return VPX_CODEC_CORRUPT_FRAME.
+  const uint8_t data[32] = {
+    0x85, 0xa4, 0xc1, 0xa1, 0x38, 0x81, 0xa3, 0x49, 0x83, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  };
+
+  for (uint32_t data_sz = 1; data_sz <= 32; ++data_sz) {
+    // Verify behavior of vpx_codec_decode. vpx_codec_decode doesn't even get
+    // to decoder_peek_si_internal on frames of size < 8.
+    if (data_sz >= 8) {
+      vpx_codec_ctx_t dec;
+      EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0));
+      EXPECT_EQ(
+          (data_sz < 10) ? VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_CORRUPT_FRAME,
+          vpx_codec_decode(&dec, data, data_sz, NULL, 0));
+      vpx_codec_iter_t iter = NULL;
+      EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter));
+      EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec));
+    }
+
+    // Verify behavior of vpx_codec_peek_stream_info.
+    vpx_codec_stream_info_t si;
+    si.sz = sizeof(si);
+    EXPECT_EQ((data_sz < 10) ? VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_OK,
+              vpx_codec_peek_stream_info(codec, data, data_sz, &si));
+  }
+}
 #endif  // CONFIG_VP9_DECODER

 }  // namespace
--- a/test/decode_perf_test.cc
+++ b/test/decode_perf_test.cc
@@ -28,7 +28,6 @@ namespace {
 #define VIDEO_NAME 0
 #define THREADS 1

-const int kMaxPsnr = 100;
 const double kUsecsInSec = 1000000.0;
 const char kNewEncodeOutputFile[] = "new_encode.ivf";

@@ -70,8 +69,7 @@ const DecodePerfParam kVP9DecodePerfVectors[] = {
   power/temp/min max frame decode times/etc
 */

-class DecodePerfTest : public ::testing::TestWithParam<DecodePerfParam> {
-};
+class DecodePerfTest : public ::testing::TestWithParam<DecodePerfParam> {};

 TEST_P(DecodePerfTest, PerfTest) {
  const char *const video_name = GET_PARAM(VIDEO_NAME);
@@ -92,8 +90,7 @@ TEST_P(DecodePerfTest, PerfTest) {
  }

  vpx_usec_timer_mark(&t);
-  const double elapsed_secs = double(vpx_usec_timer_elapsed(&t))
-                              / kUsecsInSec;
+  const double elapsed_secs = double(vpx_usec_timer_elapsed(&t)) / kUsecsInSec;
  const unsigned frames = video.frame_number();
  const double fps = double(frames) / elapsed_secs;

@@ -111,17 +108,13 @@ TEST_P(DecodePerfTest, PerfTest) {
 INSTANTIATE_TEST_CASE_P(VP9, DecodePerfTest,
                        ::testing::ValuesIn(kVP9DecodePerfVectors));

-class VP9NewEncodeDecodePerfTest :
-    public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+class VP9NewEncodeDecodePerfTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
 protected:
  VP9NewEncodeDecodePerfTest()
-      : EncoderTest(GET_PARAM(0)),
-        encoding_mode_(GET_PARAM(1)),
-        speed_(0),
-        outfile_(0),
-        out_frames_(0) {
-  }
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), speed_(0),
+        outfile_(0), out_frames_(0) {}

  virtual ~VP9NewEncodeDecodePerfTest() {}

@@ -160,8 +153,9 @@ class VP9NewEncodeDecodePerfTest :

  virtual void EndPassHook() {
    if (outfile_ != NULL) {
-      if (!fseek(outfile_, 0, SEEK_SET))
+      if (!fseek(outfile_, 0, SEEK_SET)) {
        ivf_write_file_header(outfile_, &cfg_, VP9_FOURCC, out_frames_);
+      }
      fclose(outfile_);
      outfile_ = NULL;
    }
@@ -171,8 +165,9 @@ class VP9NewEncodeDecodePerfTest :
    ++out_frames_;

    // Write initial file header if first frame.
-    if (pkt->data.frame.pts == 0)
+    if (pkt->data.frame.pts == 0) {
      ivf_write_file_header(outfile_, &cfg_, VP9_FOURCC, out_frames_);
+    }

    // Write frame header and data.
    ivf_write_frame_header(outfile_, out_frames_, pkt->data.frame.sz);
@@ -180,11 +175,9 @@ class VP9NewEncodeDecodePerfTest :
              pkt->data.frame.sz);
  }

-  virtual bool DoDecode() { return false; }
+  virtual bool DoDecode() const { return false; }

-  void set_speed(unsigned int speed) {
-    speed_ = speed;
-  }
+  void set_speed(unsigned int speed) { speed_ = speed; }

 private:
  libvpx_test::TestMode encoding_mode_;
@@ -196,10 +189,7 @@ class VP9NewEncodeDecodePerfTest :
 struct EncodePerfTestVideo {
  EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_,
                      uint32_t bitrate_, int frames_)
-      : name(name_),
-        width(width_),
-        height(height_),
-        bitrate(bitrate_),
+      : name(name_), width(width_), height(height_), bitrate(bitrate_),
        frames(frames_) {}
  const char *name;
  uint32_t width;
@@ -225,10 +215,8 @@ TEST_P(VP9NewEncodeDecodePerfTest, PerfTest) {

  const char *video_name = kVP9EncodePerfTestVectors[i].name;
  libvpx_test::I420VideoSource video(
-      video_name,
-      kVP9EncodePerfTestVectors[i].width,
-      kVP9EncodePerfTestVectors[i].height,
-      timebase.den, timebase.num, 0,
+      video_name, kVP9EncodePerfTestVectors[i].width,
+      kVP9EncodePerfTestVectors[i].height, timebase.den, timebase.num, 0,
      kVP9EncodePerfTestVectors[i].frames);
  set_speed(2);

@@ -268,6 +256,6 @@ TEST_P(VP9NewEncodeDecodePerfTest, PerfTest) {
  printf("}\n");
 }

-VP9_INSTANTIATE_TEST_CASE(
-  VP9NewEncodeDecodePerfTest, ::testing::Values(::libvpx_test::kTwoPassGood));
+VP9_INSTANTIATE_TEST_CASE(VP9NewEncodeDecodePerfTest,
+                          ::testing::Values(::libvpx_test::kTwoPassGood));
 }  // namespace
--- a/test/decode_svc_test.cc
+++ b/test/decode_svc_test.cc
@@ -0,0 +1,124 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string>
+
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+#include "test/test_vectors.h"
+#include "test/util.h"
+
+namespace {
+
+const unsigned int kNumFrames = 19;
+
+class DecodeSvcTest : public ::libvpx_test::DecoderTest,
+                      public ::libvpx_test::CodecTestWithParam<const char *> {
+ protected:
+  DecodeSvcTest() : DecoderTest(GET_PARAM(::libvpx_test::kCodecFactoryParam)) {}
+  virtual ~DecodeSvcTest() {}
+
+  virtual void PreDecodeFrameHook(
+      const libvpx_test::CompressedVideoSource &video,
+      libvpx_test::Decoder *decoder) {
+    if (video.frame_number() == 0)
+      decoder->Control(VP9_DECODE_SVC_SPATIAL_LAYER, spatial_layer_);
+  }
+
+  virtual void DecompressedFrameHook(const vpx_image_t &img,
+                                     const unsigned int frame_number) {
+    ASSERT_EQ(img.d_w, width_);
+    ASSERT_EQ(img.d_h, height_);
+    total_frames_ = frame_number;
+  }
+
+  int spatial_layer_;
+  unsigned int width_;
+  unsigned int height_;
+  unsigned int total_frames_;
+};
+
+// SVC test vector is 1280x720, with 3 spatial layers, and 20 frames.
+
+// Decode the SVC test vector, which has 3 spatial layers, and decode up to
+// spatial layer 0. Verify the resolution of each decoded frame and the total
+// number of frames decoded. This results in 1/4x1/4 resolution (320x180).
+TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer0) {
+  const std::string filename = GET_PARAM(1);
+  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
+  video.reset(new libvpx_test::IVFVideoSource(filename));
+  ASSERT_TRUE(video.get() != NULL);
+  video->Init();
+  total_frames_ = 0;
+  spatial_layer_ = 0;
+  width_ = 320;
+  height_ = 180;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  ASSERT_EQ(total_frames_, kNumFrames);
+}
+
+// Decode the SVC test vector, which has 3 spatial layers, and decode up to
+// spatial layer 1. Verify the resolution of each decoded frame and the total
+// number of frames decoded. This results in 1/2x1/2 resolution (640x360).
+TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer1) {
+  const std::string filename = GET_PARAM(1);
+  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
+  video.reset(new libvpx_test::IVFVideoSource(filename));
+  ASSERT_TRUE(video.get() != NULL);
+  video->Init();
+  total_frames_ = 0;
+  spatial_layer_ = 1;
+  width_ = 640;
+  height_ = 360;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  ASSERT_EQ(total_frames_, kNumFrames);
+}
+
+// Decode the SVC test vector, which has 3 spatial layers, and decode up to
+// spatial layer 2. Verify the resolution of each decoded frame and the total
+// number of frames decoded. This results in the full resolution (1280x720).
+TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer2) {
+  const std::string filename = GET_PARAM(1);
+  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
+  video.reset(new libvpx_test::IVFVideoSource(filename));
+  ASSERT_TRUE(video.get() != NULL);
+  video->Init();
+  total_frames_ = 0;
+  spatial_layer_ = 2;
+  width_ = 1280;
+  height_ = 720;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  ASSERT_EQ(total_frames_, kNumFrames);
+}
+
+// Decode the SVC test vector, which has 3 spatial layers, and decode up to
+// spatial layer 10. Verify the resolution of each decoded frame and the total
+// number of frames decoded. This is beyond the number of spatial layers, so
+// the decoding should result in the full resolution (1280x720).
+TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer10) {
+  const std::string filename = GET_PARAM(1);
+  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
+  video.reset(new libvpx_test::IVFVideoSource(filename));
+  ASSERT_TRUE(video.get() != NULL);
+  video->Init();
+  total_frames_ = 0;
+  spatial_layer_ = 10;
+  width_ = 1280;
+  height_ = 720;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  ASSERT_EQ(total_frames_, kNumFrames);
+}
+
+VP9_INSTANTIATE_TEST_CASE(
+    DecodeSvcTest, ::testing::ValuesIn(libvpx_test::kVP9TestVectorsSvc,
+                                       libvpx_test::kVP9TestVectorsSvc +
+                                           libvpx_test::kNumVP9TestVectorsSvc));
+}  // namespace
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -7,9 +7,11 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
 #include "test/register_state_check.h"
 #include "test/video_source.h"

@@ -19,9 +21,8 @@ const char kVP8Name[] = "WebM Project VP8";

 vpx_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size,
                                    vpx_codec_stream_info_t *stream_info) {
-  return vpx_codec_peek_stream_info(CodecInterface(),
-                                    cxdata, static_cast<unsigned int>(size),
-                                    stream_info);
+  return vpx_codec_peek_stream_info(
+      CodecInterface(), cxdata, static_cast<unsigned int>(size), stream_info);
 }

 vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) {
@@ -33,9 +34,8 @@ vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size,
  vpx_codec_err_t res_dec;
  InitOnce();
  API_REGISTER_STATE_CHECK(
-      res_dec = vpx_codec_decode(&decoder_,
-                                 cxdata, static_cast<unsigned int>(size),
-                                 user_priv, 0));
+      res_dec = vpx_codec_decode(
+          &decoder_, cxdata, static_cast<unsigned int>(size), user_priv, 0));
  return res_dec;
 }

@@ -65,7 +65,7 @@ void DecoderTest::HandlePeekResult(Decoder *const decoder,

 void DecoderTest::RunLoop(CompressedVideoSource *video,
                          const vpx_codec_dec_cfg_t &dec_cfg) {
-  Decoder* const decoder = codec_->CreateDecoder(dec_cfg, flags_, 0);
+  Decoder *const decoder = codec_->CreateDecoder(dec_cfg, flags_);
  ASSERT_TRUE(decoder != NULL);
  bool end_of_file = false;

@@ -78,16 +78,14 @@ void DecoderTest::RunLoop(CompressedVideoSource *video,
    stream_info.sz = sizeof(stream_info);

    if (video->cxdata() != NULL) {
-      const vpx_codec_err_t res_peek = decoder->PeekStream(video->cxdata(),
-                                                           video->frame_size(),
-                                                           &stream_info);
+      const vpx_codec_err_t res_peek = decoder->PeekStream(
+          video->cxdata(), video->frame_size(), &stream_info);
      HandlePeekResult(decoder, video, res_peek);
      ASSERT_FALSE(::testing::Test::HasFailure());

-      vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(),
-                                                     video->frame_size());
-      if (!HandleDecodeResult(res_dec, *video, decoder))
-        break;
+      vpx_codec_err_t res_dec =
+          decoder->DecodeFrame(video->cxdata(), video->frame_size());
+      if (!HandleDecodeResult(res_dec, *video, decoder)) break;
    } else {
      // Signal end of the file to the decoder.
      const vpx_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0);
@@ -99,8 +97,9 @@ void DecoderTest::RunLoop(CompressedVideoSource *video,
    const vpx_image_t *img = NULL;

    // Get decompressed data
-    while ((img = dec_iter.Next()))
+    while ((img = dec_iter.Next())) {
      DecompressedFrameHook(*img, video->frame_number());
+    }
  }
  delete decoder;
 }
@@ -114,8 +113,6 @@ void DecoderTest::set_cfg(const vpx_codec_dec_cfg_t &dec_cfg) {
  memcpy(&cfg_, &dec_cfg, sizeof(cfg_));
 }

-void DecoderTest::set_flags(const vpx_codec_flags_t flags) {
-  flags_ = flags;
-}
+void DecoderTest::set_flags(const vpx_codec_flags_t flags) { flags_ = flags; }

 }  // namespace libvpx_test
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -26,13 +26,11 @@ class DxDataIterator {
  explicit DxDataIterator(vpx_codec_ctx_t *decoder)
      : decoder_(decoder), iter_(NULL) {}

-  const vpx_image_t *Next() {
-    return vpx_codec_get_frame(decoder_, &iter_);
-  }
+  const vpx_image_t *Next() { return vpx_codec_get_frame(decoder_, &iter_); }

 private:
-  vpx_codec_ctx_t  *decoder_;
-  vpx_codec_iter_t  iter_;
+  vpx_codec_ctx_t *decoder_;
+  vpx_codec_iter_t iter_;
 };

 // Provides a simplified interface to manage one video decoding.
@@ -40,20 +38,17 @@ class DxDataIterator {
 // as more tests are added.
 class Decoder {
 public:
-  Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
-      : cfg_(cfg), flags_(0), deadline_(deadline), init_done_(false) {
+  explicit Decoder(vpx_codec_dec_cfg_t cfg)
+      : cfg_(cfg), flags_(0), init_done_(false) {
    memset(&decoder_, 0, sizeof(decoder_));
  }

-  Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
-          unsigned long deadline)  // NOLINT
-      : cfg_(cfg), flags_(flag), deadline_(deadline), init_done_(false) {
+  Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag)
+      : cfg_(cfg), flags_(flag), init_done_(false) {
    memset(&decoder_, 0, sizeof(decoder_));
  }

-  virtual ~Decoder() {
-    vpx_codec_destroy(&decoder_);
-  }
+  virtual ~Decoder() { vpx_codec_destroy(&decoder_); }

  vpx_codec_err_t PeekStream(const uint8_t *cxdata, size_t size,
                             vpx_codec_stream_info_t *stream_info);
@@ -63,17 +58,9 @@ class Decoder {
  vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size,
                              void *user_priv);

-  DxDataIterator GetDxData() {
-    return DxDataIterator(&decoder_);
-  }
+  DxDataIterator GetDxData() { return DxDataIterator(&decoder_); }

-  void set_deadline(unsigned long deadline) {
-    deadline_ = deadline;
-  }
-
-  void Control(int ctrl_id, int arg) {
-    Control(ctrl_id, arg, VPX_CODEC_OK);
-  }
+  void Control(int ctrl_id, int arg) { Control(ctrl_id, arg, VPX_CODEC_OK); }

  void Control(int ctrl_id, const void *arg) {
    InitOnce();
@@ -87,7 +74,7 @@ class Decoder {
    ASSERT_EQ(expected_value, res) << DecodeError();
  }

-  const char* DecodeError() {
+  const char *DecodeError() {
    const char *detail = vpx_codec_error_detail(&decoder_);
    return detail ? detail : vpx_codec_error(&decoder_);
  }
@@ -97,38 +84,34 @@ class Decoder {
      vpx_get_frame_buffer_cb_fn_t cb_get,
      vpx_release_frame_buffer_cb_fn_t cb_release, void *user_priv) {
    InitOnce();
-    return vpx_codec_set_frame_buffer_functions(
-        &decoder_, cb_get, cb_release, user_priv);
+    return vpx_codec_set_frame_buffer_functions(&decoder_, cb_get, cb_release,
+                                                user_priv);
  }

-  const char* GetDecoderName() const {
+  const char *GetDecoderName() const {
    return vpx_codec_iface_name(CodecInterface());
  }

  bool IsVP8() const;

-  vpx_codec_ctx_t * GetDecoder() {
-    return &decoder_;
-  }
+  vpx_codec_ctx_t *GetDecoder() { return &decoder_; }

 protected:
-  virtual vpx_codec_iface_t* CodecInterface() const = 0;
+  virtual vpx_codec_iface_t *CodecInterface() const = 0;

  void InitOnce() {
    if (!init_done_) {
-      const vpx_codec_err_t res = vpx_codec_dec_init(&decoder_,
-                                                     CodecInterface(),
-                                                     &cfg_, flags_);
+      const vpx_codec_err_t res =
+          vpx_codec_dec_init(&decoder_, CodecInterface(), &cfg_, flags_);
      ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
      init_done_ = true;
    }
  }

-  vpx_codec_ctx_t     decoder_;
+  vpx_codec_ctx_t decoder_;
  vpx_codec_dec_cfg_t cfg_;
-  vpx_codec_flags_t   flags_;
-  unsigned int        deadline_;
-  bool                init_done_;
+  vpx_codec_flags_t flags_;
+  bool init_done_;
 };

 // Common test functionality for all Decoder tests.
@@ -143,37 +126,35 @@ class DecoderTest {
  virtual void set_flags(const vpx_codec_flags_t flags);

  // Hook to be called before decompressing every frame.
-  virtual void PreDecodeFrameHook(const CompressedVideoSource& /*video*/,
-                                  Decoder* /*decoder*/) {}
+  virtual void PreDecodeFrameHook(const CompressedVideoSource & /*video*/,
+                                  Decoder * /*decoder*/) {}

  // Hook to be called to handle decode result. Return true to continue.
  virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
-                                  const CompressedVideoSource& /*video*/,
+                                  const CompressedVideoSource & /*video*/,
                                  Decoder *decoder) {
    EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
    return VPX_CODEC_OK == res_dec;
  }

  // Hook to be called on every decompressed frame.
-  virtual void DecompressedFrameHook(const vpx_image_t& /*img*/,
+  virtual void DecompressedFrameHook(const vpx_image_t & /*img*/,
                                     const unsigned int /*frame_number*/) {}

  // Hook to be called on peek result
-  virtual void HandlePeekResult(Decoder* const decoder,
+  virtual void HandlePeekResult(Decoder *const decoder,
                                CompressedVideoSource *video,
                                const vpx_codec_err_t res_peek);

 protected:
  explicit DecoderTest(const CodecFactory *codec)
-      : codec_(codec),
-        cfg_(),
-        flags_(0) {}
+      : codec_(codec), cfg_(), flags_(0) {}

  virtual ~DecoderTest() {}

  const CodecFactory *codec_;
  vpx_codec_dec_cfg_t cfg_;
-  vpx_codec_flags_t   flags_;
+  vpx_codec_flags_t flags_;
 };

 }  // namespace libvpx_test
--- a/test/encode_api_test.cc
+++ b/test/encode_api_test.cc
@@ -0,0 +1,65 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "vpx/vp8cx.h"
+#include "vpx/vpx_encoder.h"
+
+namespace {
+
+#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
+
+TEST(EncodeAPI, InvalidParams) {
+  static const vpx_codec_iface_t *kCodecs[] = {
+#if CONFIG_VP8_ENCODER
+    &vpx_codec_vp8_cx_algo,
+#endif
+#if CONFIG_VP9_ENCODER
+    &vpx_codec_vp9_cx_algo,
+#endif
+  };
+  uint8_t buf[1] = { 0 };
+  vpx_image_t img;
+  vpx_codec_ctx_t enc;
+  vpx_codec_enc_cfg_t cfg;
+
+  EXPECT_EQ(&img, vpx_img_wrap(&img, VPX_IMG_FMT_I420, 1, 1, 1, buf));
+
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(NULL, NULL, NULL, 0));
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(&enc, NULL, NULL, 0));
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_encode(NULL, NULL, 0, 0, 0, 0));
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_encode(NULL, &img, 0, 0, 0, 0));
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_destroy(NULL));
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
+            vpx_codec_enc_config_default(NULL, NULL, 0));
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
+            vpx_codec_enc_config_default(NULL, &cfg, 0));
+  EXPECT_TRUE(vpx_codec_error(NULL) != NULL);
+
+  for (int i = 0; i < NELEMENTS(kCodecs); ++i) {
+    SCOPED_TRACE(vpx_codec_iface_name(kCodecs[i]));
+    EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
+              vpx_codec_enc_init(NULL, kCodecs[i], NULL, 0));
+    EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
+              vpx_codec_enc_init(&enc, kCodecs[i], NULL, 0));
+    EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
+              vpx_codec_enc_config_default(kCodecs[i], &cfg, 1));
+
+    EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(kCodecs[i], &cfg, 0));
+    EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, kCodecs[i], &cfg, 0));
+    EXPECT_EQ(VPX_CODEC_OK, vpx_codec_encode(&enc, NULL, 0, 0, 0, 0));
+
+    EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc));
+  }
+}
+
+}  // namespace
--- a/test/encode_perf_test.cc
+++ b/test/encode_perf_test.cc
@@ -26,10 +26,7 @@ const double kUsecsInSec = 1000000.0;
 struct EncodePerfTestVideo {
  EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_,
                      uint32_t bitrate_, int frames_)
-      : name(name_),
-        width(width_),
-        height(height_),
-        bitrate(bitrate_),
+      : name(name_), width(width_), height(height_), bitrate(bitrate_),
        frames(frames_) {}
  const char *name;
  uint32_t width;
@@ -45,8 +42,8 @@ const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
  EncodePerfTestVideo("macmarcostationary_640_480_30.yuv", 640, 480, 200, 718),
  EncodePerfTestVideo("niklas_640_480_30.yuv", 640, 480, 200, 471),
  EncodePerfTestVideo("tacomanarrows_640_480_30.yuv", 640, 480, 200, 300),
-  EncodePerfTestVideo("tacomasmallcameramovement_640_480_30.yuv",
-                      640, 480, 200, 300),
+  EncodePerfTestVideo("tacomasmallcameramovement_640_480_30.yuv", 640, 480, 200,
+                      300),
  EncodePerfTestVideo("thaloundeskmtg_640_480_30.yuv", 640, 480, 200, 300),
  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
 };
@@ -61,12 +58,8 @@ class VP9EncodePerfTest
      public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
 protected:
  VP9EncodePerfTest()
-      : EncoderTest(GET_PARAM(0)),
-        min_psnr_(kMaxPsnr),
-        nframes_(0),
-        encoding_mode_(GET_PARAM(1)),
-        speed_(0),
-        threads_(1) {}
+      : EncoderTest(GET_PARAM(0)), min_psnr_(kMaxPsnr), nframes_(0),
+        encoding_mode_(GET_PARAM(1)), speed_(0), threads_(1) {}

  virtual ~VP9EncodePerfTest() {}

@@ -107,24 +100,18 @@ class VP9EncodePerfTest

  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
    if (pkt->data.psnr.psnr[0] < min_psnr_) {
-      min_psnr_= pkt->data.psnr.psnr[0];
+      min_psnr_ = pkt->data.psnr.psnr[0];
    }
  }

  // for performance reasons don't decode
-  virtual bool DoDecode() { return 0; }
+  virtual bool DoDecode() const { return false; }

-  double min_psnr() const {
-    return min_psnr_;
-  }
+  double min_psnr() const { return min_psnr_; }

-  void set_speed(unsigned int speed) {
-    speed_ = speed;
-  }
+  void set_speed(unsigned int speed) { speed_ = speed; }

-  void set_threads(unsigned int threads) {
-    threads_ = threads;
-  }
+  void set_threads(unsigned int threads) { threads_ = threads; }

 private:
  double min_psnr_;
@@ -139,11 +126,12 @@ TEST_P(VP9EncodePerfTest, PerfTest) {
    for (size_t j = 0; j < NELEMENTS(kEncodePerfTestSpeeds); ++j) {
      for (size_t k = 0; k < NELEMENTS(kEncodePerfTestThreads); ++k) {
        if (kVP9EncodePerfTestVectors[i].width < 512 &&
-            kEncodePerfTestThreads[k] > 1)
+            kEncodePerfTestThreads[k] > 1) {
          continue;
-        else if (kVP9EncodePerfTestVectors[i].width < 1024 &&
-                 kEncodePerfTestThreads[k] > 2)
+        } else if (kVP9EncodePerfTestVectors[i].width < 1024 &&
+                   kEncodePerfTestThreads[k] > 2) {
          continue;
+        }

        set_threads(kEncodePerfTestThreads[k]);
        SetUp();
@@ -157,10 +145,8 @@ TEST_P(VP9EncodePerfTest, PerfTest) {
        const unsigned frames = kVP9EncodePerfTestVectors[i].frames;
        const char *video_name = kVP9EncodePerfTestVectors[i].name;
        libvpx_test::I420VideoSource video(
-            video_name,
-            kVP9EncodePerfTestVectors[i].width,
-            kVP9EncodePerfTestVectors[i].height,
-            timebase.den, timebase.num, 0,
+            video_name, kVP9EncodePerfTestVectors[i].width,
+            kVP9EncodePerfTestVectors[i].height, timebase.den, timebase.num, 0,
            kVP9EncodePerfTestVectors[i].frames);
        set_speed(kEncodePerfTestSpeeds[j]);

@@ -197,6 +183,6 @@ TEST_P(VP9EncodePerfTest, PerfTest) {
  }
 }

-VP9_INSTANTIATE_TEST_CASE(
-    VP9EncodePerfTest, ::testing::Values(::libvpx_test::kRealTime));
+VP9_INSTANTIATE_TEST_CASE(VP9EncodePerfTest,
+                          ::testing::Values(::libvpx_test::kRealTime));
 }  // namespace
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -10,13 +10,14 @@

 #include <string>

+#include "third_party/googletest/src/include/gtest/gtest.h"
+
 #include "./vpx_config.h"
 #include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
 #include "test/decode_test_driver.h"
+#include "test/encode_test_driver.h"
 #include "test/register_state_check.h"
 #include "test/video_source.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"

 namespace libvpx_test {
 void Encoder::InitEncoder(VideoSource *video) {
@@ -29,8 +30,7 @@ void Encoder::InitEncoder(VideoSource *video) {
    cfg_.g_timebase = video->timebase();
    cfg_.rc_twopass_stats_in = stats_->buf();

-    res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_,
-                             init_flags_);
+    res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_, init_flags_);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();

 #if CONFIG_VP9_ENCODER
@@ -52,17 +52,17 @@ void Encoder::InitEncoder(VideoSource *video) {
 }

 void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) {
-  if (video->img())
+  if (video->img()) {
    EncodeFrameInternal(*video, frame_flags);
-  else
+  } else {
    Flush();
+  }

  // Handle twopass stats
  CxDataIterator iter = GetCxData();

  while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) {
-    if (pkt->kind != VPX_CODEC_STATS_PKT)
-      continue;
+    if (pkt->kind != VPX_CODEC_STATS_PKT) continue;

    stats_->Append(*pkt);
  }
@@ -82,15 +82,15 @@ void Encoder::EncodeFrameInternal(const VideoSource &video,
  }

  // Encode the frame
-  API_REGISTER_STATE_CHECK(
-      res = vpx_codec_encode(&encoder_, img, video.pts(), video.duration(),
-                             frame_flags, deadline_));
+  API_REGISTER_STATE_CHECK(res = vpx_codec_encode(&encoder_, img, video.pts(),
+                                                  video.duration(), frame_flags,
+                                                  deadline_));
  ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
 }

 void Encoder::Flush() {
-  const vpx_codec_err_t res = vpx_codec_encode(&encoder_, NULL, 0, 0, 0,
-                                               deadline_);
+  const vpx_codec_err_t res =
+      vpx_codec_encode(&encoder_, NULL, 0, 0, 0, deadline_);
  if (!encoder_.priv)
    ASSERT_EQ(VPX_CODEC_ERROR, res) << EncoderError();
  else
@@ -105,60 +105,57 @@ void EncoderTest::InitializeConfig() {

 void EncoderTest::SetMode(TestMode mode) {
  switch (mode) {
-    case kRealTime:
-      deadline_ = VPX_DL_REALTIME;
-      break;
+    case kRealTime: deadline_ = VPX_DL_REALTIME; break;

    case kOnePassGood:
-    case kTwoPassGood:
-      deadline_ = VPX_DL_GOOD_QUALITY;
-      break;
+    case kTwoPassGood: deadline_ = VPX_DL_GOOD_QUALITY; break;

    case kOnePassBest:
-    case kTwoPassBest:
-      deadline_ = VPX_DL_BEST_QUALITY;
-      break;
+    case kTwoPassBest: deadline_ = VPX_DL_BEST_QUALITY; break;

-    default:
-      ASSERT_TRUE(false) << "Unexpected mode " << mode;
+    default: ASSERT_TRUE(false) << "Unexpected mode " << mode;
  }

-  if (mode == kTwoPassGood || mode == kTwoPassBest)
+  if (mode == kTwoPassGood || mode == kTwoPassBest) {
    passes_ = 2;
-  else
+  } else {
    passes_ = 1;
+  }
 }
 // The function should return "true" most of the time, therefore no early
 // break-out is implemented within the match checking process.
-static bool compare_img(const vpx_image_t *img1,
-                        const vpx_image_t *img2) {
-  bool match = (img1->fmt == img2->fmt) &&
-               (img1->cs == img2->cs) &&
-               (img1->d_w == img2->d_w) &&
-               (img1->d_h == img2->d_h);
+static bool compare_img(const vpx_image_t *img1, const vpx_image_t *img2) {
+  bool match = (img1->fmt == img2->fmt) && (img1->cs == img2->cs) &&
+               (img1->d_w == img2->d_w) && (img1->d_h == img2->d_h);

-  const unsigned int width_y  = img1->d_w;
+  const unsigned int width_y = img1->d_w;
  const unsigned int height_y = img1->d_h;
  unsigned int i;
-  for (i = 0; i < height_y; ++i)
+  for (i = 0; i < height_y; ++i) {
    match = (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
                    img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
-                    width_y) == 0) && match;
-  const unsigned int width_uv  = (img1->d_w + 1) >> 1;
+                    width_y) == 0) &&
+            match;
+  }
+  const unsigned int width_uv = (img1->d_w + 1) >> 1;
  const unsigned int height_uv = (img1->d_h + 1) >> 1;
-  for (i = 0; i <  height_uv; ++i)
+  for (i = 0; i < height_uv; ++i) {
    match = (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
                    img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
-                    width_uv) == 0) && match;
-  for (i = 0; i < height_uv; ++i)
+                    width_uv) == 0) &&
+            match;
+  }
+  for (i = 0; i < height_uv; ++i) {
    match = (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
                    img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
-                    width_uv) == 0) && match;
+                    width_uv) == 0) &&
+            match;
+  }
  return match;
 }

-void EncoderTest::MismatchHook(const vpx_image_t* /*img1*/,
-                               const vpx_image_t* /*img2*/) {
+void EncoderTest::MismatchHook(const vpx_image_t * /*img1*/,
+                               const vpx_image_t * /*img2*/) {
  ASSERT_TRUE(0) << "Encode/Decode mismatch found";
 }

@@ -171,33 +168,37 @@ void EncoderTest::RunLoop(VideoSource *video) {
  for (unsigned int pass = 0; pass < passes_; pass++) {
    last_pts_ = 0;

-    if (passes_ == 1)
+    if (passes_ == 1) {
      cfg_.g_pass = VPX_RC_ONE_PASS;
-    else if (pass == 0)
+    } else if (pass == 0) {
      cfg_.g_pass = VPX_RC_FIRST_PASS;
-    else
+    } else {
      cfg_.g_pass = VPX_RC_LAST_PASS;
+    }

    BeginPassHook(pass);
-    Encoder* const encoder = codec_->CreateEncoder(cfg_, deadline_, init_flags_,
-                                                   &stats_);
-    ASSERT_TRUE(encoder != NULL);
+    testing::internal::scoped_ptr<Encoder> encoder(
+        codec_->CreateEncoder(cfg_, deadline_, init_flags_, &stats_));
+    ASSERT_TRUE(encoder.get() != NULL);

-    video->Begin();
+    ASSERT_NO_FATAL_FAILURE(video->Begin());
    encoder->InitEncoder(video);
+    ASSERT_FALSE(::testing::Test::HasFatalFailure());

    unsigned long dec_init_flags = 0;  // NOLINT
    // Use fragment decoder if encoder outputs partitions.
    // NOTE: fragment decoder and partition encoder are only supported by VP8.
-    if (init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION)
+    if (init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION) {
      dec_init_flags |= VPX_CODEC_USE_INPUT_FRAGMENTS;
-    Decoder* const decoder = codec_->CreateDecoder(dec_cfg, dec_init_flags, 0);
+    }
+    testing::internal::scoped_ptr<Decoder> decoder(
+        codec_->CreateDecoder(dec_cfg, dec_init_flags));
    bool again;
    for (again = true; again; video->Next()) {
      again = (video->img() != NULL);

      PreEncodeFrameHook(video);
-      PreEncodeFrameHook(video, encoder);
+      PreEncodeFrameHook(video, encoder.get());
      encoder->EncodeFrame(video, frame_flags_);

      CxDataIterator iter = encoder->GetCxData();
@@ -210,12 +211,11 @@ void EncoderTest::RunLoop(VideoSource *video) {
        switch (pkt->kind) {
          case VPX_CODEC_CX_FRAME_PKT:
            has_cxdata = true;
-            if (decoder && DoDecode()) {
+            if (decoder.get() != NULL && DoDecode()) {
              vpx_codec_err_t res_dec = decoder->DecodeFrame(
-                  (const uint8_t*)pkt->data.frame.buf, pkt->data.frame.sz);
+                  (const uint8_t *)pkt->data.frame.buf, pkt->data.frame.sz);

-              if (!HandleDecodeResult(res_dec, *video, decoder))
-                break;
+              if (!HandleDecodeResult(res_dec, *video, decoder.get())) break;

              has_dxdata = true;
            }
@@ -224,20 +224,16 @@ void EncoderTest::RunLoop(VideoSource *video) {
            FramePktHook(pkt);
            break;

-          case VPX_CODEC_PSNR_PKT:
-            PSNRPktHook(pkt);
-            break;
+          case VPX_CODEC_PSNR_PKT: PSNRPktHook(pkt); break;

-          default:
-            break;
+          default: break;
        }
      }

      // Flush the decoder when there are no more fragments.
      if ((init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION) && has_dxdata) {
        const vpx_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0);
-        if (!HandleDecodeResult(res_dec, *video, decoder))
-          break;
+        if (!HandleDecodeResult(res_dec, *video, decoder.get())) break;
      }

      if (has_dxdata && has_cxdata) {
@@ -250,21 +246,14 @@ void EncoderTest::RunLoop(VideoSource *video) {
            MismatchHook(img_enc, img_dec);
          }
        }
-        if (img_dec)
-          DecompressedFrameHook(*img_dec, video->pts());
+        if (img_dec) DecompressedFrameHook(*img_dec, video->pts());
      }
-      if (!Continue())
-        break;
+      if (!Continue()) break;
    }

    EndPassHook();

-    if (decoder)
-      delete decoder;
-    delete encoder;
-
-    if (!Continue())
-      break;
+    if (!Continue()) break;
  }
 }

--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -13,12 +13,13 @@
 #include <string>
 #include <vector>

-#include "./vpx_config.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "vpx/vpx_encoder.h"
+
+#include "./vpx_config.h"
 #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
 #include "vpx/vp8cx.h"
 #endif
+#include "vpx/vpx_encoder.h"

 namespace libvpx_test {

@@ -32,19 +33,17 @@ enum TestMode {
  kTwoPassGood,
  kTwoPassBest
 };
-#define ALL_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \
-                                         ::libvpx_test::kOnePassGood, \
-                                         ::libvpx_test::kOnePassBest, \
-                                         ::libvpx_test::kTwoPassGood, \
-                                         ::libvpx_test::kTwoPassBest)
+#define ALL_TEST_MODES                                                        \
+  ::testing::Values(::libvpx_test::kRealTime, ::libvpx_test::kOnePassGood,    \
+                    ::libvpx_test::kOnePassBest, ::libvpx_test::kTwoPassGood, \
+                    ::libvpx_test::kTwoPassBest)

-#define ONE_PASS_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \
-                                              ::libvpx_test::kOnePassGood, \
-                                              ::libvpx_test::kOnePassBest)
-
-#define TWO_PASS_TEST_MODES ::testing::Values(::libvpx_test::kTwoPassGood, \
-                                              ::libvpx_test::kTwoPassBest)
+#define ONE_PASS_TEST_MODES                                                \
+  ::testing::Values(::libvpx_test::kRealTime, ::libvpx_test::kOnePassGood, \
+                    ::libvpx_test::kOnePassBest)

+#define TWO_PASS_TEST_MODES \
+  ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kTwoPassBest)

 // Provides an object to handle the libvpx get_cx_data() iteration pattern
 class CxDataIterator {
@@ -57,8 +56,8 @@ class CxDataIterator {
  }

 private:
-  vpx_codec_ctx_t  *encoder_;
-  vpx_codec_iter_t  iter_;
+  vpx_codec_ctx_t *encoder_;
+  vpx_codec_iter_t iter_;
 };

 // Implements an in-memory store for libvpx twopass statistics
@@ -74,15 +73,12 @@ class TwopassStatsStore {
    return buf;
  }

-  void Reset() {
-    buffer_.clear();
-  }
+  void Reset() { buffer_.clear(); }

 protected:
-  std::string  buffer_;
+  std::string buffer_;
 };

-
 // Provides a simplified interface to manage one video encoding pass, given
 // a configuration and video source.
 //
@@ -96,13 +92,9 @@ class Encoder {
    memset(&encoder_, 0, sizeof(encoder_));
  }

-  virtual ~Encoder() {
-    vpx_codec_destroy(&encoder_);
-  }
+  virtual ~Encoder() { vpx_codec_destroy(&encoder_); }

-  CxDataIterator GetCxData() {
-    return CxDataIterator(&encoder_);
-  }
+  CxDataIterator GetCxData() { return CxDataIterator(&encoder_); }

  void InitEncoder(VideoSource *video);

@@ -114,15 +106,18 @@ class Encoder {
  void EncodeFrame(VideoSource *video, const unsigned long frame_flags);

  // Convenience wrapper for EncodeFrame()
-  void EncodeFrame(VideoSource *video) {
-    EncodeFrame(video, 0);
-  }
+  void EncodeFrame(VideoSource *video) { EncodeFrame(video, 0); }

  void Control(int ctrl_id, int arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }

+  void Control(int ctrl_id, int *arg) {
+    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+  }
+
  void Control(int ctrl_id, struct vpx_scaling_mode *arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
@@ -150,12 +145,10 @@ class Encoder {
    cfg_ = *cfg;
  }

-  void set_deadline(unsigned long deadline) {
-    deadline_ = deadline;
-  }
+  void set_deadline(unsigned long deadline) { deadline_ = deadline; }

 protected:
-  virtual vpx_codec_iface_t* CodecInterface() const = 0;
+  virtual vpx_codec_iface_t *CodecInterface() const = 0;

  const char *EncoderError() {
    const char *detail = vpx_codec_error_detail(&encoder_);
@@ -169,11 +162,11 @@ class Encoder {
  // Flush the encoder on EOS
  void Flush();

-  vpx_codec_ctx_t      encoder_;
-  vpx_codec_enc_cfg_t  cfg_;
-  unsigned long        deadline_;
-  unsigned long        init_flags_;
-  TwopassStatsStore   *stats_;
+  vpx_codec_ctx_t encoder_;
+  vpx_codec_enc_cfg_t cfg_;
+  unsigned long deadline_;
+  unsigned long init_flags_;
+  TwopassStatsStore *stats_;
 };

 // Common test functionality for all Encoder tests.
@@ -215,36 +208,35 @@ class EncoderTest {
  virtual void EndPassHook() {}

  // Hook to be called before encoding a frame.
-  virtual void PreEncodeFrameHook(VideoSource* /*video*/) {}
-  virtual void PreEncodeFrameHook(VideoSource* /*video*/,
-                                  Encoder* /*encoder*/) {}
+  virtual void PreEncodeFrameHook(VideoSource * /*video*/) {}
+  virtual void PreEncodeFrameHook(VideoSource * /*video*/,
+                                  Encoder * /*encoder*/) {}

  // Hook to be called on every compressed data packet.
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t* /*pkt*/) {}
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}

  // Hook to be called on every PSNR packet.
-  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t* /*pkt*/) {}
+  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}

  // Hook to determine whether the encode loop should continue.
  virtual bool Continue() const {
    return !(::testing::Test::HasFatalFailure() || abort_);
  }

-  const CodecFactory   *codec_;
+  const CodecFactory *codec_;
  // Hook to determine whether to decode frame after encoding
  virtual bool DoDecode() const { return 1; }

  // Hook to handle encode/decode mismatch
-  virtual void MismatchHook(const vpx_image_t *img1,
-                            const vpx_image_t *img2);
+  virtual void MismatchHook(const vpx_image_t *img1, const vpx_image_t *img2);

  // Hook to be called on every decompressed frame.
-  virtual void DecompressedFrameHook(const vpx_image_t& /*img*/,
+  virtual void DecompressedFrameHook(const vpx_image_t & /*img*/,
                                     vpx_codec_pts_t /*pts*/) {}

  // Hook to be called to handle decode result. Return true to continue.
  virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
-                                  const VideoSource& /*video*/,
+                                  const VideoSource & /*video*/,
                                  Decoder *decoder) {
    EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
    return VPX_CODEC_OK == res_dec;
@@ -256,15 +248,15 @@ class EncoderTest {
    return pkt;
  }

-  bool                 abort_;
-  vpx_codec_enc_cfg_t  cfg_;
-  vpx_codec_dec_cfg_t  dec_cfg_;
-  unsigned int         passes_;
-  unsigned long        deadline_;
-  TwopassStatsStore    stats_;
-  unsigned long        init_flags_;
-  unsigned long        frame_flags_;
-  vpx_codec_pts_t      last_pts_;
+  bool abort_;
+  vpx_codec_enc_cfg_t cfg_;
+  vpx_codec_dec_cfg_t dec_cfg_;
+  unsigned int passes_;
+  unsigned long deadline_;
+  TwopassStatsStore stats_;
+  unsigned long init_flags_;
+  unsigned long frame_flags_;
+  vpx_codec_pts_t last_pts_;
 };

 }  // namespace libvpx_test
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -19,15 +19,13 @@ namespace {
 const int kMaxErrorFrames = 12;
 const int kMaxDroppableFrames = 12;

-class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+class ErrorResilienceTestLarge
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, bool> {
 protected:
  ErrorResilienceTestLarge()
-      : EncoderTest(GET_PARAM(0)),
-        psnr_(0.0),
-        nframes_(0),
-        mismatch_psnr_(0.0),
-        mismatch_nframes_(0),
+      : EncoderTest(GET_PARAM(0)), svc_support_(GET_PARAM(2)), psnr_(0.0),
+        nframes_(0), mismatch_psnr_(0.0), mismatch_nframes_(0),
        encoding_mode_(GET_PARAM(1)) {
    Reset();
  }
@@ -65,81 +63,69 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
  // LAST is updated on base/layer 0, GOLDEN  updated on layer 1.
  // Non-zero pattern_switch parameter means pattern will switch to
  // not using LAST for frame_num >= pattern_switch.
-  int SetFrameFlags(int frame_num,
-                    int num_temp_layers,
-                    int pattern_switch) {
+  int SetFrameFlags(int frame_num, int num_temp_layers, int pattern_switch) {
    int frame_flags = 0;
    if (num_temp_layers == 2) {
-        if (frame_num % 2 == 0) {
-          if (frame_num < pattern_switch || pattern_switch == 0) {
-            // Layer 0: predict from LAST and ARF, update LAST.
-            frame_flags = VP8_EFLAG_NO_REF_GF |
-                          VP8_EFLAG_NO_UPD_GF |
-                          VP8_EFLAG_NO_UPD_ARF;
-          } else {
-            // Layer 0: predict from GF and ARF, update GF.
-            frame_flags = VP8_EFLAG_NO_REF_LAST |
-                          VP8_EFLAG_NO_UPD_LAST |
-                          VP8_EFLAG_NO_UPD_ARF;
-          }
+      if (frame_num % 2 == 0) {
+        if (frame_num < pattern_switch || pattern_switch == 0) {
+          // Layer 0: predict from LAST and ARF, update LAST.
+          frame_flags =
+              VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
        } else {
-          if (frame_num < pattern_switch || pattern_switch == 0) {
-            // Layer 1: predict from L, GF, and ARF, update GF.
-            frame_flags = VP8_EFLAG_NO_UPD_ARF |
-                          VP8_EFLAG_NO_UPD_LAST;
-          } else {
-            // Layer 1: predict from GF and ARF, update GF.
-            frame_flags = VP8_EFLAG_NO_REF_LAST |
-                          VP8_EFLAG_NO_UPD_LAST |
-                          VP8_EFLAG_NO_UPD_ARF;
-          }
+          // Layer 0: predict from GF and ARF, update GF.
+          frame_flags = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_LAST |
+                        VP8_EFLAG_NO_UPD_ARF;
        }
+      } else {
+        if (frame_num < pattern_switch || pattern_switch == 0) {
+          // Layer 1: predict from L, GF, and ARF, update GF.
+          frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
+        } else {
+          // Layer 1: predict from GF and ARF, update GF.
+          frame_flags = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_LAST |
+                        VP8_EFLAG_NO_UPD_ARF;
+        }
+      }
    }
    return frame_flags;
  }

-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    frame_flags_ &= ~(VP8_EFLAG_NO_UPD_LAST |
-                      VP8_EFLAG_NO_UPD_GF |
-                      VP8_EFLAG_NO_UPD_ARF);
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video) {
+    frame_flags_ &=
+        ~(VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF);
    // For temporal layer case.
    if (cfg_.ts_number_layers > 1) {
-      frame_flags_ = SetFrameFlags(video->frame(),
-                                   cfg_.ts_number_layers,
-                                   pattern_switch_);
+      frame_flags_ =
+          SetFrameFlags(video->frame(), cfg_.ts_number_layers, pattern_switch_);
      for (unsigned int i = 0; i < droppable_nframes_; ++i) {
        if (droppable_frames_[i] == video->frame()) {
-          std::cout << "Encoding droppable frame: "
-                    << droppable_frames_[i] << "\n";
+          std::cout << "Encoding droppable frame: " << droppable_frames_[i]
+                    << "\n";
        }
      }
    } else {
-       if (droppable_nframes_ > 0 &&
-         (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
-         for (unsigned int i = 0; i < droppable_nframes_; ++i) {
-           if (droppable_frames_[i] == video->frame()) {
-             std::cout << "Encoding droppable frame: "
-                       << droppable_frames_[i] << "\n";
-             frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
-                              VP8_EFLAG_NO_UPD_GF |
-                              VP8_EFLAG_NO_UPD_ARF);
-             return;
-           }
-         }
-       }
+      if (droppable_nframes_ > 0 &&
+          (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
+        for (unsigned int i = 0; i < droppable_nframes_; ++i) {
+          if (droppable_frames_[i] == video->frame()) {
+            std::cout << "Encoding droppable frame: " << droppable_frames_[i]
+                      << "\n";
+            frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+                             VP8_EFLAG_NO_UPD_ARF);
+            return;
+          }
+        }
+      }
    }
  }

  double GetAveragePsnr() const {
-    if (nframes_)
-      return psnr_ / nframes_;
+    if (nframes_) return psnr_ / nframes_;
    return 0.0;
  }

  double GetAverageMismatchPsnr() const {
-    if (mismatch_nframes_)
-      return mismatch_psnr_ / mismatch_nframes_;
+    if (mismatch_nframes_) return mismatch_psnr_ / mismatch_nframes_;
    return 0.0;
  }

@@ -157,8 +143,7 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
    return 1;
  }

-  virtual void MismatchHook(const vpx_image_t *img1,
-                            const vpx_image_t *img2) {
+  virtual void MismatchHook(const vpx_image_t *img1, const vpx_image_t *img2) {
    double mismatch_psnr = compute_psnr(img1, img2);
    mismatch_psnr_ += mismatch_psnr;
    ++mismatch_nframes_;
@@ -166,32 +151,34 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
  }

  void SetErrorFrames(int num, unsigned int *list) {
-    if (num > kMaxErrorFrames)
+    if (num > kMaxErrorFrames) {
      num = kMaxErrorFrames;
-    else if (num < 0)
+    } else if (num < 0) {
      num = 0;
+    }
    error_nframes_ = num;
-    for (unsigned int i = 0; i < error_nframes_; ++i)
+    for (unsigned int i = 0; i < error_nframes_; ++i) {
      error_frames_[i] = list[i];
+    }
  }

  void SetDroppableFrames(int num, unsigned int *list) {
-    if (num > kMaxDroppableFrames)
+    if (num > kMaxDroppableFrames) {
      num = kMaxDroppableFrames;
-    else if (num < 0)
+    } else if (num < 0) {
      num = 0;
+    }
    droppable_nframes_ = num;
-    for (unsigned int i = 0; i < droppable_nframes_; ++i)
+    for (unsigned int i = 0; i < droppable_nframes_; ++i) {
      droppable_frames_[i] = list[i];
+    }
  }

-  unsigned int GetMismatchFrames() {
-    return mismatch_nframes_;
-  }
+  unsigned int GetMismatchFrames() { return mismatch_nframes_; }

-  void SetPatternSwitch(int frame_switch) {
-     pattern_switch_ = frame_switch;
-   }
+  void SetPatternSwitch(int frame_switch) { pattern_switch_ = frame_switch; }
+
+  bool svc_support_;

 private:
  double psnr_;
@@ -262,15 +249,14 @@ TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
  // In addition to isolated loss/drop, add a long consecutive series
  // (of size 9) of dropped frames.
  unsigned int num_droppable_frames = 11;
-  unsigned int droppable_frame_list[] = {5, 16, 22, 23, 24, 25, 26, 27, 28,
-                                         29, 30};
+  unsigned int droppable_frame_list[] = { 5,  16, 22, 23, 24, 25,
+                                          26, 27, 28, 29, 30 };
  SetDroppableFrames(num_droppable_frames, droppable_frame_list);
  SetErrorFrames(num_droppable_frames, droppable_frame_list);
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  // Test that no mismatches have been found
-  std::cout << "             Mismatch frames: "
-            << GetMismatchFrames() << "\n";
-  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+  std::cout << "             Mismatch frames: " << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetMismatchFrames(), (unsigned int)0);

  // Reset previously set of error/droppable frames.
  Reset();
@@ -302,6 +288,9 @@ TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
 // two layer temporal pattern. The base layer does not predict from the top
 // layer, so successful decoding is expected.
 TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) {
+  // This test doesn't run if SVC is not supported.
+  if (!svc_support_) return;
+
  const vpx_rational timebase = { 33333333, 1000000000 };
  cfg_.g_timebase = timebase;
  cfg_.rc_target_bitrate = 500;
@@ -330,14 +319,13 @@ TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) {
  // The odd frames are the enhancement layer for 2 layer pattern, so set
  // those frames as droppable. Drop the last 7 frames.
  unsigned int num_droppable_frames = 7;
-  unsigned int droppable_frame_list[] = {27, 29, 31, 33, 35, 37, 39};
+  unsigned int droppable_frame_list[] = { 27, 29, 31, 33, 35, 37, 39 };
  SetDroppableFrames(num_droppable_frames, droppable_frame_list);
  SetErrorFrames(num_droppable_frames, droppable_frame_list);
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  // Test that no mismatches have been found
-  std::cout << "             Mismatch frames: "
-            << GetMismatchFrames() << "\n";
-  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+  std::cout << "             Mismatch frames: " << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetMismatchFrames(), (unsigned int)0);

  // Reset previously set of error/droppable frames.
  Reset();
@@ -347,6 +335,9 @@ TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) {
 // for a two layer temporal pattern, where at some point in the
 // sequence, the LAST ref is not used anymore.
 TEST_P(ErrorResilienceTestLarge, 2LayersNoRefLast) {
+  // This test doesn't run if SVC is not supported.
+  if (!svc_support_) return;
+
  const vpx_rational timebase = { 33333333, 1000000000 };
  cfg_.g_timebase = timebase;
  cfg_.rc_target_bitrate = 500;
@@ -374,20 +365,19 @@ TEST_P(ErrorResilienceTestLarge, 2LayersNoRefLast) {

  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  // Test that no mismatches have been found
-  std::cout << "             Mismatch frames: "
-            << GetMismatchFrames() << "\n";
-  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+  std::cout << "             Mismatch frames: " << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetMismatchFrames(), (unsigned int)0);

  // Reset previously set of error/droppable frames.
  Reset();
 }

-class ErrorResilienceTestLargeCodecControls : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+class ErrorResilienceTestLargeCodecControls
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
 protected:
  ErrorResilienceTestLargeCodecControls()
-      : EncoderTest(GET_PARAM(0)),
-        encoding_mode_(GET_PARAM(1)) {
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)) {
    Reset();
  }

@@ -426,8 +416,8 @@ class ErrorResilienceTestLargeCodecControls : public ::libvpx_test::EncoderTest,
    if (num_temp_layers == 2) {
      if (frame_num % 2 == 0) {
        // Layer 0: predict from L and ARF, update L.
-        frame_flags = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
-                      VP8_EFLAG_NO_UPD_ARF;
+        frame_flags =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
      } else {
        // Layer 1: predict from L, G and ARF, and update G.
        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
@@ -440,9 +430,9 @@ class ErrorResilienceTestLargeCodecControls : public ::libvpx_test::EncoderTest,
                      VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
      } else if ((frame_num - 2) % 4 == 0) {
        // Layer 1: predict from L, G,  update G.
-        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-                      VP8_EFLAG_NO_REF_ARF;
-      }  else if ((frame_num - 1) % 2 == 0) {
+        frame_flags =
+            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_ARF;
+      } else if ((frame_num - 1) % 2 == 0) {
        // Layer 2: predict from L, G, ARF; update ARG.
        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
      }
@@ -456,7 +446,7 @@ class ErrorResilienceTestLargeCodecControls : public ::libvpx_test::EncoderTest,
      if (frame_num % 2 == 0) {
        layer_id = 0;
      } else {
-         layer_id = 1;
+        layer_id = 1;
      }
    } else if (num_temp_layers == 3) {
      if (frame_num % 4 == 0) {
@@ -473,16 +463,16 @@ class ErrorResilienceTestLargeCodecControls : public ::libvpx_test::EncoderTest,
  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
                                  libvpx_test::Encoder *encoder) {
    if (cfg_.ts_number_layers > 1) {
-        int layer_id = SetLayerId(video->frame(), cfg_.ts_number_layers);
-        int frame_flags = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
-        if (video->frame() > 0) {
-          encoder->Control(VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
-          encoder->Control(VP8E_SET_FRAME_FLAGS, frame_flags);
-        }
-       const vpx_rational_t tb = video->timebase();
-       timebase_ = static_cast<double>(tb.num) / tb.den;
-       duration_ = 0;
-       return;
+      int layer_id = SetLayerId(video->frame(), cfg_.ts_number_layers);
+      int frame_flags = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
+      if (video->frame() > 0) {
+        encoder->Control(VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
+        encoder->Control(VP8E_SET_FRAME_FLAGS, frame_flags);
+      }
+      const vpx_rational_t tb = video->timebase();
+      timebase_ = static_cast<double>(tb.num) / tb.den;
+      duration_ = 0;
+      return;
    }
  }

@@ -508,26 +498,28 @@ class ErrorResilienceTestLargeCodecControls : public ::libvpx_test::EncoderTest,

  virtual void EndPassHook(void) {
    duration_ = (last_pts_ + 1) * timebase_;
-    if (cfg_.ts_number_layers  > 1) {
+    if (cfg_.ts_number_layers > 1) {
      for (int layer = 0; layer < static_cast<int>(cfg_.ts_number_layers);
-          ++layer) {
+           ++layer) {
        if (bits_total_[layer]) {
          // Effective file datarate:
-          effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_;
+          effective_datarate_[layer] =
+              (bits_total_[layer] / 1000.0) / duration_;
        }
      }
    }
  }

  double effective_datarate_[3];
-   private:
-    libvpx_test::TestMode encoding_mode_;
-    vpx_codec_pts_t last_pts_;
-    double timebase_;
-    int64_t bits_total_[3];
-    double duration_;
-    int tot_frame_number_;
-  };
+
+ private:
+  libvpx_test::TestMode encoding_mode_;
+  vpx_codec_pts_t last_pts_;
+  double timebase_;
+  int64_t bits_total_[3];
+  double duration_;
+  int tot_frame_number_;
+};

 // Check two codec controls used for:
 // (1) for setting temporal layer id, and (2) for settings encoder flags.
@@ -571,16 +563,20 @@ TEST_P(ErrorResilienceTestLargeCodecControls, CodecControl3TemporalLayers) {
    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
      ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75)
          << " The datarate for the file is lower than target by too much, "
-              "for layer: " << j;
+             "for layer: "
+          << j;
      ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25)
          << " The datarate for the file is greater than target by too much, "
-              "for layer: " << j;
+             "for layer: "
+          << j;
    }
  }
 }

-VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
+                          ::testing::Values(true));
 VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLargeCodecControls,
                          ONE_PASS_TEST_MODES);
-VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
+                          ::testing::Values(true));
 }  // namespace
--- a/test/examples.sh
+++ b/test/examples.sh
@@ -15,7 +15,7 @@
 example_tests=$(ls $(dirname $0)/*.sh)

 # List of script names to exclude.
-exclude_list="examples tools_common"
+exclude_list="examples stress tools_common"

 # Filter out the scripts in $exclude_list.
 for word in ${exclude_list}; do
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@@ -24,7 +24,6 @@
 namespace {

 const int kVideoNameParam = 1;
-const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";

 struct ExternalFrameBuffer {
  uint8_t *data;
@@ -35,21 +34,18 @@ struct ExternalFrameBuffer {
 // Class to manipulate a list of external frame buffers.
 class ExternalFrameBufferList {
 public:
-  ExternalFrameBufferList()
-      : num_buffers_(0),
-        ext_fb_list_(NULL) {}
+  ExternalFrameBufferList() : num_buffers_(0), ext_fb_list_(NULL) {}

  virtual ~ExternalFrameBufferList() {
    for (int i = 0; i < num_buffers_; ++i) {
-      delete [] ext_fb_list_[i].data;
+      delete[] ext_fb_list_[i].data;
    }
-    delete [] ext_fb_list_;
+    delete[] ext_fb_list_;
  }

  // Creates the list to hold the external buffers. Returns true on success.
  bool CreateBufferList(int num_buffers) {
-    if (num_buffers < 0)
-      return false;
+    if (num_buffers < 0) return false;

    num_buffers_ = num_buffers;
    ext_fb_list_ = new ExternalFrameBuffer[num_buffers_];
@@ -65,11 +61,10 @@ class ExternalFrameBufferList {
  int GetFreeFrameBuffer(size_t min_size, vpx_codec_frame_buffer_t *fb) {
    EXPECT_TRUE(fb != NULL);
    const int idx = FindFreeBufferIndex();
-    if (idx == num_buffers_)
-      return -1;
+    if (idx == num_buffers_) return -1;

    if (ext_fb_list_[idx].size < min_size) {
-      delete [] ext_fb_list_[idx].data;
+      delete[] ext_fb_list_[idx].data;
      ext_fb_list_[idx].data = new uint8_t[min_size];
      memset(ext_fb_list_[idx].data, 0, min_size);
      ext_fb_list_[idx].size = min_size;
@@ -84,11 +79,10 @@ class ExternalFrameBufferList {
  int GetZeroFrameBuffer(size_t min_size, vpx_codec_frame_buffer_t *fb) {
    EXPECT_TRUE(fb != NULL);
    const int idx = FindFreeBufferIndex();
-    if (idx == num_buffers_)
-      return -1;
+    if (idx == num_buffers_) return -1;

    if (ext_fb_list_[idx].size < min_size) {
-      delete [] ext_fb_list_[idx].data;
+      delete[] ext_fb_list_[idx].data;
      ext_fb_list_[idx].data = NULL;
      ext_fb_list_[idx].size = min_size;
    }
@@ -105,7 +99,7 @@ class ExternalFrameBufferList {
      return -1;
    }
    ExternalFrameBuffer *const ext_fb =
-        reinterpret_cast<ExternalFrameBuffer*>(fb->priv);
+        reinterpret_cast<ExternalFrameBuffer *>(fb->priv);
    if (ext_fb == NULL) {
      EXPECT_TRUE(ext_fb != NULL);
      return -1;
@@ -120,7 +114,7 @@ class ExternalFrameBufferList {
  void CheckXImageFrameBuffer(const vpx_image_t *img) {
    if (img->fb_priv != NULL) {
      const struct ExternalFrameBuffer *const ext_fb =
-          reinterpret_cast<ExternalFrameBuffer*>(img->fb_priv);
+          reinterpret_cast<ExternalFrameBuffer *>(img->fb_priv);

      ASSERT_TRUE(img->planes[0] >= ext_fb->data &&
                  img->planes[0] < (ext_fb->data + ext_fb->size));
@@ -134,8 +128,7 @@ class ExternalFrameBufferList {
    int i;
    // Find a free frame buffer.
    for (i = 0; i < num_buffers_; ++i) {
-      if (!ext_fb_list_[i].in_use)
-        break;
+      if (!ext_fb_list_[i].in_use) break;
    }
    return i;
  }
@@ -155,21 +148,22 @@ class ExternalFrameBufferList {
  ExternalFrameBuffer *ext_fb_list_;
 };

+#if CONFIG_WEBM_IO
+
 // Callback used by libvpx to request the application to return a frame
 // buffer of at least |min_size| in bytes.
 int get_vp9_frame_buffer(void *user_priv, size_t min_size,
                         vpx_codec_frame_buffer_t *fb) {
  ExternalFrameBufferList *const fb_list =
-      reinterpret_cast<ExternalFrameBufferList*>(user_priv);
+      reinterpret_cast<ExternalFrameBufferList *>(user_priv);
  return fb_list->GetFreeFrameBuffer(min_size, fb);
 }

 // Callback used by libvpx to tell the application that |fb| is not needed
 // anymore.
-int release_vp9_frame_buffer(void *user_priv,
-                             vpx_codec_frame_buffer_t *fb) {
+int release_vp9_frame_buffer(void *user_priv, vpx_codec_frame_buffer_t *fb) {
  ExternalFrameBufferList *const fb_list =
-      reinterpret_cast<ExternalFrameBufferList*>(user_priv);
+      reinterpret_cast<ExternalFrameBufferList *>(user_priv);
  return fb_list->ReturnFrameBuffer(fb);
 }

@@ -177,7 +171,7 @@ int release_vp9_frame_buffer(void *user_priv,
 int get_vp9_zero_frame_buffer(void *user_priv, size_t min_size,
                              vpx_codec_frame_buffer_t *fb) {
  ExternalFrameBufferList *const fb_list =
-      reinterpret_cast<ExternalFrameBufferList*>(user_priv);
+      reinterpret_cast<ExternalFrameBufferList *>(user_priv);
  return fb_list->GetZeroFrameBuffer(min_size, fb);
 }

@@ -185,7 +179,7 @@ int get_vp9_zero_frame_buffer(void *user_priv, size_t min_size,
 int get_vp9_one_less_byte_frame_buffer(void *user_priv, size_t min_size,
                                       vpx_codec_frame_buffer_t *fb) {
  ExternalFrameBufferList *const fb_list =
-      reinterpret_cast<ExternalFrameBufferList*>(user_priv);
+      reinterpret_cast<ExternalFrameBufferList *>(user_priv);
  return fb_list->GetFreeFrameBuffer(min_size - 1, fb);
 }

@@ -197,19 +191,19 @@ int do_not_release_vp9_frame_buffer(void *user_priv,
  return 0;
 }

+#endif  // CONFIG_WEBM_IO
+
 // Class for testing passing in external frame buffers to libvpx.
 class ExternalFrameBufferMD5Test
    : public ::libvpx_test::DecoderTest,
-      public ::libvpx_test::CodecTestWithParam<const char*> {
+      public ::libvpx_test::CodecTestWithParam<const char *> {
 protected:
  ExternalFrameBufferMD5Test()
      : DecoderTest(GET_PARAM(::libvpx_test::kCodecFactoryParam)),
-        md5_file_(NULL),
-        num_buffers_(0) {}
+        md5_file_(NULL), num_buffers_(0) {}

  virtual ~ExternalFrameBufferMD5Test() {
-    if (md5_file_ != NULL)
-      fclose(md5_file_);
+    if (md5_file_ != NULL) fclose(md5_file_);
  }

  virtual void PreDecodeFrameHook(
@@ -219,15 +213,15 @@ class ExternalFrameBufferMD5Test
      // Have libvpx use frame buffers we create.
      ASSERT_TRUE(fb_list_.CreateBufferList(num_buffers_));
      ASSERT_EQ(VPX_CODEC_OK,
-                decoder->SetFrameBufferFunctions(
-                    GetVP9FrameBuffer, ReleaseVP9FrameBuffer, this));
+                decoder->SetFrameBufferFunctions(GetVP9FrameBuffer,
+                                                 ReleaseVP9FrameBuffer, this));
    }
  }

  void OpenMD5File(const std::string &md5_file_name_) {
    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
    ASSERT_TRUE(md5_file_ != NULL) << "Md5 file open failed. Filename: "
-        << md5_file_name_;
+                                   << md5_file_name_;
  }

  virtual void DecompressedFrameHook(const vpx_image_t &img,
@@ -255,7 +249,7 @@ class ExternalFrameBufferMD5Test
  static int GetVP9FrameBuffer(void *user_priv, size_t min_size,
                               vpx_codec_frame_buffer_t *fb) {
    ExternalFrameBufferMD5Test *const md5Test =
-        reinterpret_cast<ExternalFrameBufferMD5Test*>(user_priv);
+        reinterpret_cast<ExternalFrameBufferMD5Test *>(user_priv);
    return md5Test->fb_list_.GetFreeFrameBuffer(min_size, fb);
  }

@@ -264,7 +258,7 @@ class ExternalFrameBufferMD5Test
  static int ReleaseVP9FrameBuffer(void *user_priv,
                                   vpx_codec_frame_buffer_t *fb) {
    ExternalFrameBufferMD5Test *const md5Test =
-        reinterpret_cast<ExternalFrameBufferMD5Test*>(user_priv);
+        reinterpret_cast<ExternalFrameBufferMD5Test *>(user_priv);
    return md5Test->fb_list_.ReturnFrameBuffer(fb);
  }

@@ -278,13 +272,12 @@ class ExternalFrameBufferMD5Test
 };

 #if CONFIG_WEBM_IO
+const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
+
 // Class for testing passing in external frame buffers to libvpx.
 class ExternalFrameBufferTest : public ::testing::Test {
 protected:
-  ExternalFrameBufferTest()
-      : video_(NULL),
-        decoder_(NULL),
-        num_buffers_(0) {}
+  ExternalFrameBufferTest() : video_(NULL), decoder_(NULL), num_buffers_(0) {}

  virtual void SetUp() {
    video_ = new libvpx_test::WebMVideoSource(kVP9TestFile);
@@ -304,8 +297,7 @@ class ExternalFrameBufferTest : public ::testing::Test {

  // Passes the external frame buffer information to libvpx.
  vpx_codec_err_t SetFrameBufferFunctions(
-      int num_buffers,
-      vpx_get_frame_buffer_cb_fn_t cb_get,
+      int num_buffers, vpx_get_frame_buffer_cb_fn_t cb_get,
      vpx_release_frame_buffer_cb_fn_t cb_release) {
    if (num_buffers > 0) {
      num_buffers_ = num_buffers;
@@ -319,8 +311,7 @@ class ExternalFrameBufferTest : public ::testing::Test {
    const vpx_codec_err_t res =
        decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
    CheckDecodedFrames();
-    if (res == VPX_CODEC_OK)
-      video_->Next();
+    if (res == VPX_CODEC_OK) video_->Next();
    return res;
  }

@@ -328,8 +319,7 @@ class ExternalFrameBufferTest : public ::testing::Test {
    for (; video_->cxdata() != NULL; video_->Next()) {
      const vpx_codec_err_t res =
          decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
-      if (res != VPX_CODEC_OK)
-        return res;
+      if (res != VPX_CODEC_OK) return res;
      CheckDecodedFrames();
    }
    return VPX_CODEC_OK;
@@ -360,7 +350,6 @@ class ExternalFrameBufferTest : public ::testing::Test {
 // Otherwise, the test failed.
 TEST_P(ExternalFrameBufferMD5Test, ExtFBMD5Match) {
  const std::string filename = GET_PARAM(kVideoNameParam);
-  libvpx_test::CompressedVideoSource *video = NULL;

  // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
  // #VPX_MAXIMUM_WORK_BUFFERS + four jitter buffers.
@@ -375,18 +364,19 @@ TEST_P(ExternalFrameBufferMD5Test, ExtFBMD5Match) {
 #endif

  // Open compressed video file.
+  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
  if (filename.substr(filename.length() - 3, 3) == "ivf") {
-    video = new libvpx_test::IVFVideoSource(filename);
+    video.reset(new libvpx_test::IVFVideoSource(filename));
  } else {
 #if CONFIG_WEBM_IO
-    video = new libvpx_test::WebMVideoSource(filename);
+    video.reset(new libvpx_test::WebMVideoSource(filename));
 #else
    fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n",
            filename.c_str());
    return;
 #endif
  }
-  ASSERT_TRUE(video != NULL);
+  ASSERT_TRUE(video.get() != NULL);
  video->Init();

  // Construct md5 file name.
@@ -394,8 +384,7 @@ TEST_P(ExternalFrameBufferMD5Test, ExtFBMD5Match) {
  OpenMD5File(md5_filename);

  // Decode frame, and check the md5 matching.
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
-  delete video;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
 }

 #if CONFIG_WEBM_IO
@@ -404,8 +393,8 @@ TEST_F(ExternalFrameBufferTest, MinFrameBuffers) {
  // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS.
  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
  ASSERT_EQ(VPX_CODEC_OK,
-            SetFrameBufferFunctions(
-                num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer));
+            SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer,
+                                    release_vp9_frame_buffer));
  ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames());
 }

@@ -416,8 +405,8 @@ TEST_F(ExternalFrameBufferTest, EightJitterBuffers) {
  const int num_buffers =
      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
  ASSERT_EQ(VPX_CODEC_OK,
-            SetFrameBufferFunctions(
-                num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer));
+            SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer,
+                                    release_vp9_frame_buffer));
  ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames());
 }

@@ -427,8 +416,8 @@ TEST_F(ExternalFrameBufferTest, NotEnoughBuffers) {
  // only use 5 frame buffers at one time.
  const int num_buffers = 2;
  ASSERT_EQ(VPX_CODEC_OK,
-            SetFrameBufferFunctions(
-                num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer));
+            SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer,
+                                    release_vp9_frame_buffer));
  ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame());
  ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeRemainingFrames());
 }
@@ -452,18 +441,17 @@ TEST_F(ExternalFrameBufferTest, NullRealloc) {

 TEST_F(ExternalFrameBufferTest, ReallocOneLessByte) {
  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
-  ASSERT_EQ(VPX_CODEC_OK,
-            SetFrameBufferFunctions(
-                num_buffers, get_vp9_one_less_byte_frame_buffer,
-                release_vp9_frame_buffer));
+  ASSERT_EQ(VPX_CODEC_OK, SetFrameBufferFunctions(
+                              num_buffers, get_vp9_one_less_byte_frame_buffer,
+                              release_vp9_frame_buffer));
  ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeOneFrame());
 }

 TEST_F(ExternalFrameBufferTest, NullGetFunction) {
  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
-  ASSERT_EQ(VPX_CODEC_INVALID_PARAM,
-            SetFrameBufferFunctions(num_buffers, NULL,
-                                    release_vp9_frame_buffer));
+  ASSERT_EQ(
+      VPX_CODEC_INVALID_PARAM,
+      SetFrameBufferFunctions(num_buffers, NULL, release_vp9_frame_buffer));
 }

 TEST_F(ExternalFrameBufferTest, NullReleaseFunction) {
@@ -476,13 +464,14 @@ TEST_F(ExternalFrameBufferTest, SetAfterDecode) {
  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
  ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame());
  ASSERT_EQ(VPX_CODEC_ERROR,
-            SetFrameBufferFunctions(
-                num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer));
+            SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer,
+                                    release_vp9_frame_buffer));
 }
 #endif  // CONFIG_WEBM_IO

-VP9_INSTANTIATE_TEST_CASE(ExternalFrameBufferMD5Test,
-                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
-                                              libvpx_test::kVP9TestVectors +
-                                              libvpx_test::kNumVP9TestVectors));
+VP9_INSTANTIATE_TEST_CASE(
+    ExternalFrameBufferMD5Test,
+    ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
+                        libvpx_test::kVP9TestVectors +
+                            libvpx_test::kNumVP9TestVectors));
 }  // namespace
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -13,12 +13,13 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
@@ -39,8 +40,8 @@ typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct4x4Param;
 typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param;

 void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
-                 int tx_type) {
-  vp9_fdct4x4_c(in, out, stride);
+                 int /*tx_type*/) {
+  vpx_fdct4x4_c(in, out, stride);
 }

 void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
@@ -48,17 +49,17 @@ void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
 }

 void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
-                 int tx_type) {
+                 int /*tx_type*/) {
  vp9_fwht4x4_c(in, out, stride);
 }

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct4x4_16_add_c(in, out, stride, 10);
+  vpx_highbd_idct4x4_16_add_c(in, out, stride, 10);
 }

 void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct4x4_16_add_c(in, out, stride, 12);
+  vpx_highbd_idct4x4_16_add_c(in, out, stride, 12);
 }

 void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
@@ -70,20 +71,20 @@ void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 }

 void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_iwht4x4_16_add_c(in, out, stride, 10);
+  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 10);
 }

 void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_iwht4x4_16_add_c(in, out, stride, 12);
+  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 12);
 }

 #if HAVE_SSE2
 void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
 }

 void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
 }
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -127,35 +128,33 @@ class Trans4x4TestBase {
        }
      }

-      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
-                                          test_temp_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block,
-                                            CONVERT_TO_BYTEPTR(dst16), pitch_));
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
-        const uint32_t diff =
+        const int diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
        ASSERT_EQ(VPX_BITS_8, bit_depth_);
-        const uint32_t diff = dst[j] - src[j];
+        const int diff = dst[j] - src[j];
 #endif
        const uint32_t error = diff * diff;
-        if (max_error < error)
-          max_error = error;
+        if (max_error < error) max_error = error;
        total_error += error;
      }
    }

    EXPECT_GE(static_cast<uint32_t>(limit), max_error)
-        << "Error: 4x4 FHT/IHT has an individual round trip error > "
-        << limit;
+        << "Error: 4x4 FHT/IHT has an individual round trip error > " << limit;

    EXPECT_GE(count_test_block * limit, total_error)
        << "Error: 4x4 FHT/IHT has average round trip error > " << limit
@@ -171,8 +170,9 @@ class Trans4x4TestBase {

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
-      for (int j = 0; j < kNumCoeffs; ++j)
+      for (int j = 0; j < kNumCoeffs; ++j) {
        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+      }

      fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
@@ -196,16 +196,14 @@ class Trans4x4TestBase {
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
      if (i == 0) {
-        for (int j = 0; j < kNumCoeffs; ++j)
-          input_extreme_block[j] = mask_;
+        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
      } else if (i == 1) {
-        for (int j = 0; j < kNumCoeffs; ++j)
-          input_extreme_block[j] = -mask_;
+        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
      }

      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
-      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
-                                          output_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(input_extreme_block, output_block, pitch_));

      // The minimum quant value is 4.
      for (int j = 0; j < kNumCoeffs; ++j) {
@@ -250,22 +248,21 @@ class Trans4x4TestBase {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
-                                            pitch_));
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
-        const uint32_t diff =
+        const int diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
-        const uint32_t diff = dst[j] - src[j];
+        const int diff = dst[j] - src[j];
 #endif
        const uint32_t error = diff * diff;
        EXPECT_GE(static_cast<uint32_t>(limit), error)
-            << "Error: 4x4 IDCT has error " << error
-            << " at index " << j;
+            << "Error: 4x4 IDCT has error " << error << " at index " << j;
      }
    }
  }
@@ -277,17 +274,16 @@ class Trans4x4TestBase {
  int mask_;
 };

-class Trans4x4DCT
-    : public Trans4x4TestBase,
-      public ::testing::TestWithParam<Dct4x4Param> {
+class Trans4x4DCT : public Trans4x4TestBase,
+                    public ::testing::TestWithParam<Dct4x4Param> {
 public:
  virtual ~Trans4x4DCT() {}

  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    tx_type_  = GET_PARAM(2);
-    pitch_    = 4;
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 4;
    fwd_txfm_ref = fdct4x4_ref;
    bit_depth_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
@@ -306,33 +302,24 @@ class Trans4x4DCT
  IdctFunc inv_txfm_;
 };

-TEST_P(Trans4x4DCT, AccuracyCheck) {
-  RunAccuracyCheck(1);
-}
+TEST_P(Trans4x4DCT, AccuracyCheck) { RunAccuracyCheck(1); }

-TEST_P(Trans4x4DCT, CoeffCheck) {
-  RunCoeffCheck();
-}
+TEST_P(Trans4x4DCT, CoeffCheck) { RunCoeffCheck(); }

-TEST_P(Trans4x4DCT, MemCheck) {
-  RunMemCheck();
-}
+TEST_P(Trans4x4DCT, MemCheck) { RunMemCheck(); }

-TEST_P(Trans4x4DCT, InvAccuracyCheck) {
-  RunInvAccuracyCheck(1);
-}
+TEST_P(Trans4x4DCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }

-class Trans4x4HT
-    : public Trans4x4TestBase,
-      public ::testing::TestWithParam<Ht4x4Param> {
+class Trans4x4HT : public Trans4x4TestBase,
+                   public ::testing::TestWithParam<Ht4x4Param> {
 public:
  virtual ~Trans4x4HT() {}

  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    tx_type_  = GET_PARAM(2);
-    pitch_    = 4;
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 4;
    fwd_txfm_ref = fht4x4_ref;
    bit_depth_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
@@ -352,33 +339,24 @@ class Trans4x4HT
  IhtFunc inv_txfm_;
 };

-TEST_P(Trans4x4HT, AccuracyCheck) {
-  RunAccuracyCheck(1);
-}
+TEST_P(Trans4x4HT, AccuracyCheck) { RunAccuracyCheck(1); }

-TEST_P(Trans4x4HT, CoeffCheck) {
-  RunCoeffCheck();
-}
+TEST_P(Trans4x4HT, CoeffCheck) { RunCoeffCheck(); }

-TEST_P(Trans4x4HT, MemCheck) {
-  RunMemCheck();
-}
+TEST_P(Trans4x4HT, MemCheck) { RunMemCheck(); }

-TEST_P(Trans4x4HT, InvAccuracyCheck) {
-  RunInvAccuracyCheck(1);
-}
+TEST_P(Trans4x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }

-class Trans4x4WHT
-    : public Trans4x4TestBase,
-      public ::testing::TestWithParam<Dct4x4Param> {
+class Trans4x4WHT : public Trans4x4TestBase,
+                    public ::testing::TestWithParam<Dct4x4Param> {
 public:
  virtual ~Trans4x4WHT() {}

  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    tx_type_  = GET_PARAM(2);
-    pitch_    = 4;
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 4;
    fwd_txfm_ref = fwht4x4_ref;
    bit_depth_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
@@ -397,35 +375,27 @@ class Trans4x4WHT
  IdctFunc inv_txfm_;
 };

-TEST_P(Trans4x4WHT, AccuracyCheck) {
-  RunAccuracyCheck(0);
-}
+TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0); }

-TEST_P(Trans4x4WHT, CoeffCheck) {
-  RunCoeffCheck();
-}
+TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); }

-TEST_P(Trans4x4WHT, MemCheck) {
-  RunMemCheck();
-}
+TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); }

-TEST_P(Trans4x4WHT, InvAccuracyCheck) {
-  RunInvAccuracyCheck(0);
-}
+TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
 using std::tr1::make_tuple;

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
+        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
 #else
-INSTANTIATE_TEST_CASE_P(
-    C, Trans4x4DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, Trans4x4DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct4x4_c,
+                                                     &vpx_idct4x4_16_add_c, 0,
+                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
@@ -460,23 +430,20 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
 #else
-INSTANTIATE_TEST_CASE_P(
-    C, Trans4x4WHT,
-    ::testing::Values(
-        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, Trans4x4WHT,
+                        ::testing::Values(make_tuple(&vp9_fwht4x4_c,
+                                                     &vpx_iwht4x4_16_add_c, 0,
+                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    NEON, Trans4x4DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_neon, 0, VPX_BITS_8)));
-#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(NEON, Trans4x4DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct4x4_c,
+                                                     &vpx_idct4x4_16_add_neon,
+                                                     0, VPX_BITS_8)));
+#if !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans4x4HT,
    ::testing::Values(
@@ -484,22 +451,22 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif  // !CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE

-#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
-    !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    MMX, Trans4x4WHT,
+    SSE2, Trans4x4WHT,
    ::testing::Values(
-        make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fwht4x4_sse2, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8),
+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));
 #endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans4x4DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_sse2,
-                   &vp9_idct4x4_16_add_sse2, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, Trans4x4DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct4x4_sse2,
+                                                     &vpx_idct4x4_16_add_sse2,
+                                                     0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4HT,
    ::testing::Values(
@@ -513,24 +480,15 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct4x4_c,    &idct4x4_10_sse2, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct4x4_c,    &idct4x4_12_sse2, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct4x4_sse2,      &vp9_idct4x4_16_add_c, 0,
-                   VPX_BITS_8)));
+        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));

 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4HT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 1, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 2, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 3, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 1, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 2, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 3, VPX_BITS_12),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
@@ -538,10 +496,10 @@ INSTANTIATE_TEST_CASE_P(
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    MSA, Trans4x4DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_msa, &vp9_idct4x4_16_add_msa, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(MSA, Trans4x4DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct4x4_msa,
+                                                     &vpx_idct4x4_16_add_msa, 0,
+                                                     VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    MSA, Trans4x4HT,
    ::testing::Values(
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -13,12 +13,13 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
@@ -46,14 +47,14 @@ typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
 typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
 typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;

-void reference_8x8_dct_1d(const double in[8], double out[8], int stride) {
+void reference_8x8_dct_1d(const double in[8], double out[8]) {
  const double kInvSqrt2 = 0.707106781186547524400844362104;
  for (int k = 0; k < 8; k++) {
    out[k] = 0.0;
-    for (int n = 0; n < 8; n++)
+    for (int n = 0; n < 8; n++) {
      out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
-    if (k == 0)
-      out[k] = out[k] * kInvSqrt2;
+    }
+    if (k == 0) out[k] = out[k] * kInvSqrt2;
  }
 }

@@ -62,27 +63,23 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
  // First transform columns
  for (int i = 0; i < 8; ++i) {
    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j)
-      temp_in[j] = input[j*8 + i];
-    reference_8x8_dct_1d(temp_in, temp_out, 1);
-    for (int j = 0; j < 8; ++j)
-      output[j * 8 + i] = temp_out[j];
+    for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
+    reference_8x8_dct_1d(temp_in, temp_out);
+    for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
  }
  // Then transform rows
  for (int i = 0; i < 8; ++i) {
    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j)
-      temp_in[j] = output[j + i*8];
-    reference_8x8_dct_1d(temp_in, temp_out, 1);
+    for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
+    reference_8x8_dct_1d(temp_in, temp_out);
    // Scale by some magic number
-    for (int j = 0; j < 8; ++j)
-      output[j + i * 8] = temp_out[j] * 2;
+    for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2;
  }
 }

-
-void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
-  vp9_fdct8x8_c(in, out, stride);
+void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
+                 int /*tx_type*/) {
+  vpx_fdct8x8_c(in, out, stride);
 }

 void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
@@ -91,11 +88,11 @@ void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_64_add_c(in, out, stride, 10);
+  vpx_highbd_idct8x8_64_add_c(in, out, stride, 10);
 }

 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_64_add_c(in, out, stride, 12);
+  vpx_highbd_idct8x8_64_add_c(in, out, stride, 12);
 }

 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
@@ -106,29 +103,30 @@ void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  vp9_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 12);
 }

-void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_10_add_c(in, out, stride, 10);
-}
-
-void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_10_add_c(in, out, stride, 12);
-}
-
 #if HAVE_SSE2
-void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
+
+void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct8x8_12_add_c(in, out, stride, 10);
 }

-void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
+void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct8x8_12_add_c(in, out, stride, 12);
+}
+
+void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct8x8_12_add_sse2(in, out, stride, 10);
+}
+
+void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct8x8_12_add_sse2(in, out, stride, 12);
 }

 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
 }

 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
 }
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -152,17 +150,19 @@ class FwdTrans8x8TestBase {

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-255, 255].
-      for (int j = 0; j < 64; ++j)
+      for (int j = 0; j < 64; ++j) {
        test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
                              ((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
+      }
      ASM_REGISTER_STATE_CHECK(
          RunFwdTxfm(test_input_block, test_output_block, pitch_));

      for (int j = 0; j < 64; ++j) {
-        if (test_output_block[j] < 0)
+        if (test_output_block[j] < 0) {
          ++count_sign_block[j][0];
-        else if (test_output_block[j] > 0)
+        } else if (test_output_block[j] > 0) {
          ++count_sign_block[j][1];
+        }
      }
    }

@@ -174,25 +174,26 @@ class FwdTrans8x8TestBase {
          << 1. * max_diff / count_test_block * 100 << "%"
          << " for input range [-255, 255] at index " << j
          << " count0: " << count_sign_block[j][0]
-          << " count1: " << count_sign_block[j][1]
-          << " diff: " << diff;
+          << " count1: " << count_sign_block[j][1] << " diff: " << diff;
    }

    memset(count_sign_block, 0, sizeof(count_sign_block));

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
-      for (int j = 0; j < 64; ++j)
-        test_input_block[j] = ((rnd.Rand16() & mask_) >> 4) -
-                              ((rnd.Rand16() & mask_) >> 4);
+      for (int j = 0; j < 64; ++j) {
+        test_input_block[j] =
+            ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4);
+      }
      ASM_REGISTER_STATE_CHECK(
          RunFwdTxfm(test_input_block, test_output_block, pitch_));

      for (int j = 0; j < 64; ++j) {
-        if (test_output_block[j] < 0)
+        if (test_output_block[j] < 0) {
          ++count_sign_block[j][0];
-        else if (test_output_block[j] > 0)
+        } else if (test_output_block[j] > 0) {
          ++count_sign_block[j][1];
+        }
      }
    }

@@ -204,8 +205,7 @@ class FwdTrans8x8TestBase {
          << 1. * max_diff / count_test_block * 100 << "%"
          << " for input range [-15, 15] at index " << j
          << " count0: " << count_sign_block[j][0]
-          << " count1: " << count_sign_block[j][1]
-          << " diff: " << diff;
+          << " count1: " << count_sign_block[j][1] << " diff: " << diff;
    }
  }

@@ -242,19 +242,18 @@ class FwdTrans8x8TestBase {
      ASM_REGISTER_STATE_CHECK(
          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
      for (int j = 0; j < 64; ++j) {
-          if (test_temp_block[j] > 0) {
-            test_temp_block[j] += 2;
-            test_temp_block[j] /= 4;
-            test_temp_block[j] *= 4;
-          } else {
-            test_temp_block[j] -= 2;
-            test_temp_block[j] /= 4;
-            test_temp_block[j] *= 4;
-          }
+        if (test_temp_block[j] > 0) {
+          test_temp_block[j] += 2;
+          test_temp_block[j] /= 4;
+          test_temp_block[j] *= 4;
+        } else {
+          test_temp_block[j] -= 2;
+          test_temp_block[j] /= 4;
+          test_temp_block[j] *= 4;
+        }
      }
      if (bit_depth_ == VPX_BITS_8) {
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, dst, pitch_));
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
@@ -270,19 +269,18 @@ class FwdTrans8x8TestBase {
        const int diff = dst[j] - src[j];
 #endif
        const int error = diff * diff;
-        if (max_error < error)
-          max_error = error;
+        if (max_error < error) max_error = error;
        total_error += error;
      }
    }

    EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
-      << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
-      << " roundtrip error > 1";
+        << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
+        << " roundtrip error > 1";

-    EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8))/5, total_error)
-      << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
-      << "error > 1/5 per block";
+    EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
+        << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
+        << "error > 1/5 per block";
  }

  void RunExtremalCheck() {
@@ -338,8 +336,7 @@ class FwdTrans8x8TestBase {
      ASM_REGISTER_STATE_CHECK(
          fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
      if (bit_depth_ == VPX_BITS_8) {
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, dst, pitch_));
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
@@ -355,8 +352,7 @@ class FwdTrans8x8TestBase {
        const int diff = dst[j] - src[j];
 #endif
        const int error = diff * diff;
-        if (max_error < error)
-          max_error = error;
+        if (max_error < error) max_error = error;
        total_error += error;

        const int coeff_diff = test_temp_block[j] - ref_temp_block[j];
@@ -367,7 +363,7 @@ class FwdTrans8x8TestBase {
          << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
          << "an individual roundtrip error > 1";

-      EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8))/5, total_error)
+      EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
          << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
          << " roundtrip error > 1/5 per block";

@@ -408,29 +404,29 @@ class FwdTrans8x8TestBase {
      }

      reference_8x8_dct_2d(in, out_r);
-      for (int j = 0; j < kNumCoeffs; ++j)
+      for (int j = 0; j < kNumCoeffs; ++j) {
        coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
+      }

      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
-                                            pitch_));
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
-        const uint32_t diff =
+        const int diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
-        const uint32_t diff = dst[j] - src[j];
+        const int diff = dst[j] - src[j];
 #endif
        const uint32_t error = diff * diff;
        EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
-            << "Error: 8x8 IDCT has error " << error
-            << " at index " << j;
+            << "Error: 8x8 IDCT has error " << error << " at index " << j;
      }
    }
  }
@@ -446,25 +442,26 @@ class FwdTrans8x8TestBase {
      double out_r[kNumCoeffs];

      // Initialize a test block with input range [-mask_, mask_].
-      for (int j = 0; j < kNumCoeffs; ++j)
+      for (int j = 0; j < kNumCoeffs; ++j) {
        in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
+      }

      RunFwdTxfm(in, coeff, pitch_);
      reference_8x8_dct_2d(in, out_r);
-      for (int j = 0; j < kNumCoeffs; ++j)
+      for (int j = 0; j < kNumCoeffs; ++j) {
        coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
+      }

      for (int j = 0; j < kNumCoeffs; ++j) {
-        const uint32_t diff = coeff[j] - coeff_r[j];
+        const int32_t diff = coeff[j] - coeff_r[j];
        const uint32_t error = diff * diff;
        EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
-            << "Error: 8x8 DCT has error " << error
-            << " at index " << j;
+            << "Error: 8x8 DCT has error " << error << " at index " << j;
      }
    }
  }

-void CompareInvReference(IdctFunc ref_txfm, int thresh) {
+  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 10000;
    const int eob = 12;
@@ -481,7 +478,7 @@ void CompareInvReference(IdctFunc ref_txfm, int thresh) {
      for (int j = 0; j < kNumCoeffs; ++j) {
        if (j < eob) {
          // Random values less than the threshold, either positive or negative
-          coeff[scan[j]] = rnd(thresh) * (1-2*(i%2));
+          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
        } else {
          coeff[scan[j]] = 0;
        }
@@ -501,22 +498,21 @@ void CompareInvReference(IdctFunc ref_txfm, int thresh) {
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
-                                            pitch_));
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
-        const uint32_t diff =
+        const int diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
 #else
-        const uint32_t diff = dst[j] - ref[j];
+        const int diff = dst[j] - ref[j];
 #endif
        const uint32_t error = diff * diff;
-        EXPECT_EQ(0u, error)
-            << "Error: 8x8 IDCT has error " << error
-            << " at index " << j;
+        EXPECT_EQ(0u, error) << "Error: 8x8 IDCT has error " << error
+                             << " at index " << j;
      }
    }
  }
@@ -527,17 +523,16 @@ void CompareInvReference(IdctFunc ref_txfm, int thresh) {
  int mask_;
 };

-class FwdTrans8x8DCT
-    : public FwdTrans8x8TestBase,
-      public ::testing::TestWithParam<Dct8x8Param> {
+class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
+                       public ::testing::TestWithParam<Dct8x8Param> {
 public:
  virtual ~FwdTrans8x8DCT() {}

  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    tx_type_  = GET_PARAM(2);
-    pitch_    = 8;
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 8;
    fwd_txfm_ref = fdct8x8_ref;
    bit_depth_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
@@ -557,37 +552,26 @@ class FwdTrans8x8DCT
  IdctFunc inv_txfm_;
 };

-TEST_P(FwdTrans8x8DCT, SignBiasCheck) {
-  RunSignBiasCheck();
-}
+TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); }

-TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) {
-  RunRoundTripErrorCheck();
-}
+TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }

-TEST_P(FwdTrans8x8DCT, ExtremalCheck) {
-  RunExtremalCheck();
-}
+TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); }

-TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) {
-  RunFwdAccuracyCheck();
-}
+TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); }

-TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) {
-  RunInvAccuracyCheck();
-}
+TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }

-class FwdTrans8x8HT
-    : public FwdTrans8x8TestBase,
-      public ::testing::TestWithParam<Ht8x8Param> {
+class FwdTrans8x8HT : public FwdTrans8x8TestBase,
+                      public ::testing::TestWithParam<Ht8x8Param> {
 public:
  virtual ~FwdTrans8x8HT() {}

  virtual void SetUp() {
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    tx_type_  = GET_PARAM(2);
-    pitch_    = 8;
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 8;
    fwd_txfm_ref = fht8x8_ref;
    bit_depth_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
@@ -607,21 +591,14 @@ class FwdTrans8x8HT
  IhtFunc inv_txfm_;
 };

-TEST_P(FwdTrans8x8HT, SignBiasCheck) {
-  RunSignBiasCheck();
-}
+TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); }

-TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) {
-  RunRoundTripErrorCheck();
-}
+TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }

-TEST_P(FwdTrans8x8HT, ExtremalCheck) {
-  RunExtremalCheck();
-}
+TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); }

-class InvTrans8x8DCT
-    : public FwdTrans8x8TestBase,
-      public ::testing::TestWithParam<Idct8x8Param> {
+class InvTrans8x8DCT : public FwdTrans8x8TestBase,
+                       public ::testing::TestWithParam<Idct8x8Param> {
 public:
  virtual ~InvTrans8x8DCT() {}

@@ -640,7 +617,7 @@ class InvTrans8x8DCT
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
    inv_txfm_(out, dst, stride);
  }
-  void RunFwdTxfm(int16_t *out, tran_low_t *dst, int stride) {}
+  void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {}

  IdctFunc ref_txfm_;
  IdctFunc inv_txfm_;
@@ -657,14 +634,14 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8),
-        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
+        make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
+        make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
 #else
-INSTANTIATE_TEST_CASE_P(
-    C, FwdTrans8x8DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct8x8_c,
+                                                     &vpx_idct8x8_64_add_c, 0,
+                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
@@ -684,8 +661,6 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
 #else
-// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
-// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8HT,
    ::testing::Values(
@@ -695,17 +670,17 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
-// returned from Rand16().
-INSTANTIATE_TEST_CASE_P(
-    NEON, FwdTrans8x8DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_neon, &vp9_idct8x8_64_add_neon, 0,
-                   VPX_BITS_8)));
-#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct8x8_c,
+                                                     &vpx_idct8x8_64_add_neon,
+                                                     0, VPX_BITS_8)));
+#else   // !CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
+                                                     &vpx_idct8x8_64_add_neon,
+                                                     0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    NEON, FwdTrans8x8HT,
    ::testing::Values(
@@ -713,16 +688,14 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-// TODO(jingning): re-enable after these handle the expanded range [0, 65535]
-// returned from Rand16().
-INSTANTIATE_TEST_CASE_P(
-    SSE2, FwdTrans8x8DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0,
-                   VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct8x8_sse2,
+                                                     &vpx_idct8x8_64_add_sse2,
+                                                     0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8HT,
    ::testing::Values(
@@ -735,19 +708,17 @@ INSTANTIATE_TEST_CASE_P(
 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8),
-        make_tuple(&vp9_highbd_fdct8x8_c,
-                   &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct8x8_sse2,
-                   &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct8x8_c,
-                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct8x8_sse2,
-                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
+    ::testing::Values(make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0,
+                                 VPX_BITS_8),
+                      make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_10_sse2,
+                                 12, VPX_BITS_10),
+                      make_tuple(&vpx_highbd_fdct8x8_sse2,
+                                 &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
+                      make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_12_sse2,
+                                 12, VPX_BITS_12),
+                      make_tuple(&vpx_highbd_fdct8x8_sse2,
+                                 &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));

-// TODO(jingning): re-enable after these handle the expanded range [0, 65535]
-// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8HT,
    ::testing::Values(
@@ -761,32 +732,27 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    SSE2, InvTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&idct8x8_10_add_10_c,
-                   &idct8x8_10_add_10_sse2, 6225, VPX_BITS_10),
-        make_tuple(&idct8x8_10,
-                   &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
-        make_tuple(&idct8x8_10_add_12_c,
-                   &idct8x8_10_add_12_sse2, 6225, VPX_BITS_12),
-        make_tuple(&idct8x8_12,
-                   &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
+        make_tuple(&idct8x8_12_add_10_c, &idct8x8_12_add_10_sse2, 6225,
+                   VPX_BITS_10),
+        make_tuple(&idct8x8_10, &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
+        make_tuple(&idct8x8_12_add_12_c, &idct8x8_12_add_12_sse2, 6225,
+                   VPX_BITS_12),
+        make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
    !CONFIG_EMULATE_HARDWARE
-// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
-// returned from Rand16().
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, FwdTrans8x8DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0,
-                   VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
+                                                     &vpx_idct8x8_64_add_ssse3,
+                                                     0, VPX_BITS_8)));
 #endif

 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    MSA, FwdTrans8x8DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_msa, &vp9_idct8x8_64_add_msa, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(MSA, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct8x8_msa,
+                                                     &vpx_idct8x8_64_add_msa, 0,
+                                                     VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    MSA, FwdTrans8x8HT,
    ::testing::Values(
--- a/test/frame_size_tests.cc
+++ b/test/frame_size_tests.cc
@@ -13,12 +13,11 @@

 namespace {

-class VP9FrameSizeTestsLarge
-    : public ::libvpx_test::EncoderTest,
-      public ::testing::Test {
+class VP9FrameSizeTestsLarge : public ::libvpx_test::EncoderTest,
+                               public ::testing::Test {
 protected:
-  VP9FrameSizeTestsLarge() : EncoderTest(&::libvpx_test::kVP9),
-                             expected_res_(VPX_CODEC_OK) {}
+  VP9FrameSizeTestsLarge()
+      : EncoderTest(&::libvpx_test::kVP9), expected_res_(VPX_CODEC_OK) {}
  virtual ~VP9FrameSizeTestsLarge() {}

  virtual void SetUp() {
@@ -27,7 +26,7 @@ class VP9FrameSizeTestsLarge
  }

  virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
-                                  const libvpx_test::VideoSource& /*video*/,
+                                  const libvpx_test::VideoSource & /*video*/,
                                  libvpx_test::Decoder *decoder) {
    EXPECT_EQ(expected_res_, res_dec) << decoder->DecodeError();
    return !::testing::Test::HasFailure();
@@ -67,14 +66,14 @@ TEST_F(VP9FrameSizeTestsLarge, ValidSizes) {
  expected_res_ = VPX_CODEC_OK;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 #else
-  // This test produces a pretty large single frame allocation,  (roughly
-  // 25 megabits). The encoder allocates a good number of these frames
-  // one for each lag in frames (for 2 pass), and then one for each possible
-  // reference buffer (8) - we can end up with up to 30 buffers of roughly this
-  // size or almost 1 gig of memory.
-  // In total the allocations will exceed 2GiB which may cause a failure with
-  // mingw + wine, use a smaller size in that case.
-#if defined(_WIN32) && !defined(_WIN64)
+// This test produces a pretty large single frame allocation,  (roughly
+// 25 megabits). The encoder allocates a good number of these frames
+// one for each lag in frames (for 2 pass), and then one for each possible
+// reference buffer (8) - we can end up with up to 30 buffers of roughly this
+// size or almost 1 gig of memory.
+// In total the allocations will exceed 2GiB which may cause a failure with
+// mingw + wine, use a smaller size in that case.
+#if defined(_WIN32) && !defined(_WIN64) || defined(__OS2__)
  video.SetSize(4096, 3072);
 #else
  video.SetSize(4096, 4096);
--- a/test/hadamard_test.cc
+++ b/test/hadamard_test.cc
@@ -0,0 +1,220 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <algorithm>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+
+namespace {
+
+using ::libvpx_test::ACMRandom;
+
+typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b);
+
+void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
+  int16_t b[8];
+  for (int i = 0; i < 8; i += 2) {
+    b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride];
+    b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride];
+  }
+  int16_t c[8];
+  for (int i = 0; i < 8; i += 4) {
+    c[i + 0] = b[i + 0] + b[i + 2];
+    c[i + 1] = b[i + 1] + b[i + 3];
+    c[i + 2] = b[i + 0] - b[i + 2];
+    c[i + 3] = b[i + 1] - b[i + 3];
+  }
+  out[0] = c[0] + c[4];
+  out[7] = c[1] + c[5];
+  out[3] = c[2] + c[6];
+  out[4] = c[3] + c[7];
+  out[2] = c[0] - c[4];
+  out[6] = c[1] - c[5];
+  out[1] = c[2] - c[6];
+  out[5] = c[3] - c[7];
+}
+
+void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) {
+  int16_t buf[64];
+  for (int i = 0; i < 8; ++i) {
+    hadamard_loop(a + i, a_stride, buf + i * 8);
+  }
+
+  for (int i = 0; i < 8; ++i) {
+    hadamard_loop(buf + i, 8, b + i * 8);
+  }
+}
+
+void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) {
+  /* The source is a 16x16 block. The destination is rearranged to 8x32.
+   * Input is 9 bit. */
+  reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
+  reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
+  reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
+  reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
+
+  /* Overlay the 8x8 blocks and combine. */
+  for (int i = 0; i < 64; ++i) {
+    /* 8x8 steps the range up to 15 bits. */
+    const int16_t a0 = b[0];
+    const int16_t a1 = b[64];
+    const int16_t a2 = b[128];
+    const int16_t a3 = b[192];
+
+    /* Prevent the result from escaping int16_t. */
+    const int16_t b0 = (a0 + a1) >> 1;
+    const int16_t b1 = (a0 - a1) >> 1;
+    const int16_t b2 = (a2 + a3) >> 1;
+    const int16_t b3 = (a2 - a3) >> 1;
+
+    /* Store a 16 bit value. */
+    b[0] = b0 + b2;
+    b[64] = b1 + b3;
+    b[128] = b0 - b2;
+    b[192] = b1 - b3;
+
+    ++b;
+  }
+}
+
+class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
+ public:
+  virtual void SetUp() {
+    h_func_ = GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+ protected:
+  HadamardFunc h_func_;
+  ACMRandom rnd_;
+};
+
+class Hadamard8x8Test : public HadamardTestBase {};
+
+TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
+  DECLARE_ALIGNED(16, int16_t, a[64]);
+  DECLARE_ALIGNED(16, int16_t, b[64]);
+  int16_t b_ref[64];
+  for (int i = 0; i < 64; ++i) {
+    a[i] = rnd_.Rand9Signed();
+  }
+  memset(b, 0, sizeof(b));
+  memset(b_ref, 0, sizeof(b_ref));
+
+  reference_hadamard8x8(a, 8, b_ref);
+  ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
+
+  // The order of the output is not important. Sort before checking.
+  std::sort(b, b + 64);
+  std::sort(b_ref, b_ref + 64);
+  EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+}
+
+TEST_P(Hadamard8x8Test, VaryStride) {
+  DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
+  DECLARE_ALIGNED(16, int16_t, b[64]);
+  int16_t b_ref[64];
+  for (int i = 0; i < 64 * 8; ++i) {
+    a[i] = rnd_.Rand9Signed();
+  }
+
+  for (int i = 8; i < 64; i += 8) {
+    memset(b, 0, sizeof(b));
+    memset(b_ref, 0, sizeof(b_ref));
+
+    reference_hadamard8x8(a, i, b_ref);
+    ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
+
+    // The order of the output is not important. Sort before checking.
+    std::sort(b, b + 64);
+    std::sort(b_ref, b_ref + 64);
+    EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
+                        ::testing::Values(&vpx_hadamard_8x8_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
+                        ::testing::Values(&vpx_hadamard_8x8_sse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3 && ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
+                        ::testing::Values(&vpx_hadamard_8x8_ssse3));
+#endif  // HAVE_SSSE3 && ARCH_X86_64
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
+                        ::testing::Values(&vpx_hadamard_8x8_neon));
+#endif  // HAVE_NEON
+
+class Hadamard16x16Test : public HadamardTestBase {};
+
+TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
+  DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
+  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
+  int16_t b_ref[16 * 16];
+  for (int i = 0; i < 16 * 16; ++i) {
+    a[i] = rnd_.Rand9Signed();
+  }
+  memset(b, 0, sizeof(b));
+  memset(b_ref, 0, sizeof(b_ref));
+
+  reference_hadamard16x16(a, 16, b_ref);
+  ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b));
+
+  // The order of the output is not important. Sort before checking.
+  std::sort(b, b + 16 * 16);
+  std::sort(b_ref, b_ref + 16 * 16);
+  EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+}
+
+TEST_P(Hadamard16x16Test, VaryStride) {
+  DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
+  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
+  int16_t b_ref[16 * 16];
+  for (int i = 0; i < 16 * 16 * 8; ++i) {
+    a[i] = rnd_.Rand9Signed();
+  }
+
+  for (int i = 8; i < 64; i += 8) {
+    memset(b, 0, sizeof(b));
+    memset(b_ref, 0, sizeof(b_ref));
+
+    reference_hadamard16x16(a, i, b_ref);
+    ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
+
+    // The order of the output is not important. Sort before checking.
+    std::sort(b, b + 16 * 16);
+    std::sort(b_ref, b_ref + 16 * 16);
+    EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
+                        ::testing::Values(&vpx_hadamard_16x16_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
+                        ::testing::Values(&vpx_hadamard_16x16_sse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
+                        ::testing::Values(&vpx_hadamard_16x16_neon));
+#endif  // HAVE_NEON
+}  // namespace
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -21,14 +21,11 @@ namespace libvpx_test {
 // so that we can do actual file encodes.
 class I420VideoSource : public YUVVideoSource {
 public:
-  I420VideoSource(const std::string &file_name,
-                  unsigned int width, unsigned int height,
-                  int rate_numerator, int rate_denominator,
+  I420VideoSource(const std::string &file_name, unsigned int width,
+                  unsigned int height, int rate_numerator, int rate_denominator,
                  unsigned int start, int limit)
-      : YUVVideoSource(file_name, VPX_IMG_FMT_I420,
-                       width, height,
-                       rate_numerator, rate_denominator,
-                       start, limit) {}
+      : YUVVideoSource(file_name, VPX_IMG_FMT_I420, width, height,
+                       rate_numerator, rate_denominator, start, limit) {}
 };

 }  // namespace libvpx_test
--- a/test/idct8x8_test.cc
+++ b/test/idct8x8_test.cc
@@ -14,33 +14,24 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "./vp9_rtcd.h"
-
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/msvc.h"  // for round()

 using libvpx_test::ACMRandom;

 namespace {

-#ifdef _MSC_VER
-static int round(double x) {
-  if (x < 0)
-    return static_cast<int>(ceil(x - 0.5));
-  else
-    return static_cast<int>(floor(x + 0.5));
-}
-#endif
-
 void reference_dct_1d(double input[8], double output[8]) {
  const double kPi = 3.141592653589793238462643383279502884;
  const double kInvSqrt2 = 0.707106781186547524400844362104;
  for (int k = 0; k < 8; k++) {
    output[k] = 0.0;
-    for (int n = 0; n < 8; n++)
-      output[k] += input[n]*cos(kPi*(2*n+1)*k/16.0);
-    if (k == 0)
-      output[k] = output[k]*kInvSqrt2;
+    for (int n = 0; n < 8; n++) {
+      output[k] += input[n] * cos(kPi * (2 * n + 1) * k / 16.0);
+    }
+    if (k == 0) output[k] = output[k] * kInvSqrt2;
  }
 }

@@ -48,61 +39,19 @@ void reference_dct_2d(int16_t input[64], double output[64]) {
  // First transform columns
  for (int i = 0; i < 8; ++i) {
    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j)
-      temp_in[j] = input[j*8 + i];
+    for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
    reference_dct_1d(temp_in, temp_out);
-    for (int j = 0; j < 8; ++j)
-      output[j*8 + i] = temp_out[j];
+    for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
  }
  // Then transform rows
  for (int i = 0; i < 8; ++i) {
    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j)
-      temp_in[j] = output[j + i*8];
+    for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
    reference_dct_1d(temp_in, temp_out);
-    for (int j = 0; j < 8; ++j)
-      output[j + i*8] = temp_out[j];
+    for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j];
  }
  // Scale by some magic number
-  for (int i = 0; i < 64; ++i)
-    output[i] *= 2;
-}
-
-void reference_idct_1d(double input[8], double output[8]) {
-  const double kPi = 3.141592653589793238462643383279502884;
-  const double kSqrt2 = 1.414213562373095048801688724209698;
-  for (int k = 0; k < 8; k++) {
-    output[k] = 0.0;
-    for (int n = 0; n < 8; n++) {
-      output[k] += input[n]*cos(kPi*(2*k+1)*n/16.0);
-      if (n == 0)
-        output[k] = output[k]/kSqrt2;
-    }
-  }
-}
-
-void reference_idct_2d(double input[64], int16_t output[64]) {
-  double out[64], out2[64];
-  // First transform rows
-  for (int i = 0; i < 8; ++i) {
-    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j)
-      temp_in[j] = input[j + i*8];
-    reference_idct_1d(temp_in, temp_out);
-    for (int j = 0; j < 8; ++j)
-      out[j + i*8] = temp_out[j];
-  }
-  // Then transform columns
-  for (int i = 0; i < 8; ++i) {
-    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j)
-      temp_in[j] = out[j*8 + i];
-    reference_idct_1d(temp_in, temp_out);
-    for (int j = 0; j < 8; ++j)
-      out2[j*8 + i] = temp_out[j];
-  }
-  for (int i = 0; i < 64; ++i)
-    output[i] = round(out2[i]/32);
+  for (int i = 0; i < 64; ++i) output[i] *= 2;
 }

 TEST(VP9Idct8x8Test, AccuracyCheck) {
@@ -119,19 +68,18 @@ TEST(VP9Idct8x8Test, AccuracyCheck) {
      dst[j] = rnd.Rand8();
    }
    // Initialize a test block with input range [-255, 255].
-    for (int j = 0; j < 64; ++j)
-      input[j] = src[j] - dst[j];
+    for (int j = 0; j < 64; ++j) input[j] = src[j] - dst[j];

    reference_dct_2d(input, output_r);
-    for (int j = 0; j < 64; ++j)
-      coeff[j] = round(output_r[j]);
-    vp9_idct8x8_64_add_c(coeff, dst, 8);
+    for (int j = 0; j < 64; ++j) {
+      coeff[j] = static_cast<tran_low_t>(round(output_r[j]));
+    }
+    vpx_idct8x8_64_add_c(coeff, dst, 8);
    for (int j = 0; j < 64; ++j) {
      const int diff = dst[j] - src[j];
      const int error = diff * diff;
-      EXPECT_GE(1, error)
-          << "Error: 8x8 FDCT/IDCT has error " << error
-          << " at index " << j;
+      EXPECT_GE(1, error) << "Error: 8x8 FDCT/IDCT has error " << error
+                          << " at index " << j;
    }
  }
 }
--- a/test/idct_test.cc
+++ b/test/idct_test.cc
@@ -10,10 +10,11 @@

 #include "./vpx_config.h"
 #include "./vp8_rtcd.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
+
 #include "third_party/googletest/src/include/gtest/gtest.h"

+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
 #include "vpx/vpx_integer.h"

 typedef void (*IdctFunc)(int16_t *input, unsigned char *pred_ptr,
@@ -42,11 +43,12 @@ class IDCTTest : public ::testing::TestWithParam<IdctFunc> {
 TEST_P(IDCTTest, TestGuardBlocks) {
  int i;

-  for (i = 0; i < 256; i++)
+  for (i = 0; i < 256; i++) {
    if ((i & 0xF) < 4 && i < 64)
      EXPECT_EQ(0, output[i]) << i;
    else
      EXPECT_EQ(255, output[i]);
+  }
 }

 TEST_P(IDCTTest, TestAllZeros) {
@@ -54,11 +56,12 @@ TEST_P(IDCTTest, TestAllZeros) {

  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));

-  for (i = 0; i < 256; i++)
+  for (i = 0; i < 256; i++) {
    if ((i & 0xF) < 4 && i < 64)
      EXPECT_EQ(0, output[i]) << "i==" << i;
    else
      EXPECT_EQ(255, output[i]) << "i==" << i;
+  }
 }

 TEST_P(IDCTTest, TestAllOnes) {
@@ -67,11 +70,12 @@ TEST_P(IDCTTest, TestAllOnes) {
  input[0] = 4;
  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));

-  for (i = 0; i < 256; i++)
+  for (i = 0; i < 256; i++) {
    if ((i & 0xF) < 4 && i < 64)
      EXPECT_EQ(1, output[i]) << "i==" << i;
    else
      EXPECT_EQ(255, output[i]) << "i==" << i;
+  }
 }

 TEST_P(IDCTTest, TestAddOne) {
@@ -81,11 +85,12 @@ TEST_P(IDCTTest, TestAddOne) {
  input[0] = 4;
  ASM_REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));

-  for (i = 0; i < 256; i++)
+  for (i = 0; i < 256; i++) {
    if ((i & 0xF) < 4 && i < 64)
      EXPECT_EQ(i + 1, output[i]) << "i==" << i;
    else
      EXPECT_EQ(255, output[i]) << "i==" << i;
+  }
 }

 TEST_P(IDCTTest, TestWithData) {
@@ -95,7 +100,7 @@ TEST_P(IDCTTest, TestWithData) {

  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));

-  for (i = 0; i < 256; i++)
+  for (i = 0; i < 256; i++) {
    if ((i & 0xF) > 3 || i > 63)
      EXPECT_EQ(255, output[i]) << "i==" << i;
    else if (i == 0)
@@ -106,11 +111,20 @@ TEST_P(IDCTTest, TestWithData) {
      EXPECT_EQ(3, output[i]) << "i==" << i;
    else
      EXPECT_EQ(0, output[i]) << "i==" << i;
+  }
 }

 INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, IDCTTest,
+                        ::testing::Values(vp8_short_idct4x4llm_neon));
+#endif
 #if HAVE_MMX
 INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
                        ::testing::Values(vp8_short_idct4x4llm_mmx));
 #endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, IDCTTest,
+                        ::testing::Values(vp8_short_idct4x4llm_msa));
+#endif
 }
--- a/test/intrapred_test.cc
+++ b/test/intrapred_test.cc
@@ -1,396 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include <string.h>
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
-#include "vp8/common/blockd.h"
-#include "vpx_mem/vpx_mem.h"
-
-namespace {
-
-using libvpx_test::ACMRandom;
-
-class IntraPredBase {
- public:
-  virtual ~IntraPredBase() { libvpx_test::ClearSystemState(); }
-
- protected:
-  void SetupMacroblock(MACROBLOCKD *mbptr,
-                       MODE_INFO *miptr,
-                       uint8_t *data,
-                       int block_size,
-                       int stride,
-                       int num_planes) {
-    mbptr_ = mbptr;
-    miptr_ = miptr;
-    mbptr_->up_available = 1;
-    mbptr_->left_available = 1;
-    mbptr_->mode_info_context = miptr_;
-    stride_ = stride;
-    block_size_ = block_size;
-    num_planes_ = num_planes;
-    for (int p = 0; p < num_planes; p++)
-      data_ptr_[p] = data + stride * (block_size + 1) * p +
-                     stride + block_size;
-  }
-
-  void FillRandom() {
-    // Fill edges with random data
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    for (int p = 0; p < num_planes_; p++) {
-      for (int x = -1 ; x <= block_size_; x++)
-        data_ptr_[p][x - stride_] = rnd.Rand8();
-      for (int y = 0; y < block_size_; y++)
-        data_ptr_[p][y * stride_ - 1] = rnd.Rand8();
-    }
-  }
-
-  virtual void Predict(MB_PREDICTION_MODE mode) = 0;
-
-  void SetLeftUnavailable() {
-    mbptr_->left_available = 0;
-    for (int p = 0; p < num_planes_; p++)
-      for (int i = -1; i < block_size_; ++i)
-        data_ptr_[p][stride_ * i - 1] = 129;
-  }
-
-  void SetTopUnavailable() {
-    mbptr_->up_available = 0;
-    for (int p = 0; p < num_planes_; p++)
-      memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
-  }
-
-  void SetTopLeftUnavailable() {
-    SetLeftUnavailable();
-    SetTopUnavailable();
-  }
-
-  int BlockSizeLog2Min1() const {
-    switch (block_size_) {
-      case 16:
-        return 3;
-      case 8:
-        return 2;
-      default:
-        return 0;
-    }
-  }
-
-  // check DC prediction output against a reference
-  void CheckDCPrediction() const {
-    for (int p = 0; p < num_planes_; p++) {
-      // calculate expected DC
-      int expected;
-      if (mbptr_->up_available || mbptr_->left_available) {
-        int sum = 0, shift = BlockSizeLog2Min1() + mbptr_->up_available +
-                             mbptr_->left_available;
-        if (mbptr_->up_available)
-          for (int x = 0; x < block_size_; x++)
-            sum += data_ptr_[p][x - stride_];
-        if (mbptr_->left_available)
-          for (int y = 0; y < block_size_; y++)
-            sum += data_ptr_[p][y * stride_ - 1];
-        expected = (sum + (1 << (shift - 1))) >> shift;
-      } else {
-        expected = 0x80;
-      }
-      // check that all subsequent lines are equal to the first
-      for (int y = 1; y < block_size_; ++y)
-        ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
-                            block_size_));
-      // within the first line, ensure that each pixel has the same value
-      for (int x = 1; x < block_size_; ++x)
-        ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]);
-      // now ensure that that pixel has the expected (DC) value
-      ASSERT_EQ(expected, data_ptr_[p][0]);
-    }
-  }
-
-  // check V prediction output against a reference
-  void CheckVPrediction() const {
-    // check that all lines equal the top border
-    for (int p = 0; p < num_planes_; p++)
-      for (int y = 0; y < block_size_; y++)
-        ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_],
-                            &data_ptr_[p][y * stride_], block_size_));
-  }
-
-  // check H prediction output against a reference
-  void CheckHPrediction() const {
-    // for each line, ensure that each pixel is equal to the left border
-    for (int p = 0; p < num_planes_; p++)
-      for (int y = 0; y < block_size_; y++)
-        for (int x = 0; x < block_size_; x++)
-          ASSERT_EQ(data_ptr_[p][-1 + y * stride_],
-                    data_ptr_[p][x + y * stride_]);
-  }
-
-  static int ClipByte(int value) {
-    if (value > 255)
-      return 255;
-    else if (value < 0)
-      return 0;
-    return value;
-  }
-
-  // check TM prediction output against a reference
-  void CheckTMPrediction() const {
-    for (int p = 0; p < num_planes_; p++)
-      for (int y = 0; y < block_size_; y++)
-        for (int x = 0; x < block_size_; x++) {
-          const int expected = ClipByte(data_ptr_[p][x - stride_]
-                                      + data_ptr_[p][stride_ * y - 1]
-                                      - data_ptr_[p][-1 - stride_]);
-          ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]);
-       }
-  }
-
-  // Actual test
-  void RunTest() {
-    {
-      SCOPED_TRACE("DC_PRED");
-      FillRandom();
-      Predict(DC_PRED);
-      CheckDCPrediction();
-    }
-    {
-      SCOPED_TRACE("DC_PRED LEFT");
-      FillRandom();
-      SetLeftUnavailable();
-      Predict(DC_PRED);
-      CheckDCPrediction();
-    }
-    {
-      SCOPED_TRACE("DC_PRED TOP");
-      FillRandom();
-      SetTopUnavailable();
-      Predict(DC_PRED);
-      CheckDCPrediction();
-    }
-    {
-      SCOPED_TRACE("DC_PRED TOP_LEFT");
-      FillRandom();
-      SetTopLeftUnavailable();
-      Predict(DC_PRED);
-      CheckDCPrediction();
-    }
-    {
-      SCOPED_TRACE("H_PRED");
-      FillRandom();
-      Predict(H_PRED);
-      CheckHPrediction();
-    }
-    {
-      SCOPED_TRACE("V_PRED");
-      FillRandom();
-      Predict(V_PRED);
-      CheckVPrediction();
-    }
-    {
-      SCOPED_TRACE("TM_PRED");
-      FillRandom();
-      Predict(TM_PRED);
-      CheckTMPrediction();
-    }
-  }
-
-  MACROBLOCKD *mbptr_;
-  MODE_INFO *miptr_;
-  uint8_t *data_ptr_[2];  // in the case of Y, only [0] is used
-  int stride_;
-  int block_size_;
-  int num_planes_;
-};
-
-typedef void (*IntraPredYFunc)(MACROBLOCKD *x,
-                               uint8_t *yabove_row,
-                               uint8_t *yleft,
-                               int left_stride,
-                               uint8_t *ypred_ptr,
-                               int y_stride);
-
-class IntraPredYTest
-    : public IntraPredBase,
-      public ::testing::TestWithParam<IntraPredYFunc> {
- public:
-  static void SetUpTestCase() {
-    mb_ = reinterpret_cast<MACROBLOCKD*>(
-        vpx_memalign(32, sizeof(MACROBLOCKD)));
-    mi_ = reinterpret_cast<MODE_INFO*>(
-        vpx_memalign(32, sizeof(MODE_INFO)));
-    data_array_ = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBufferSize));
-  }
-
-  static void TearDownTestCase() {
-    vpx_free(data_array_);
-    vpx_free(mi_);
-    vpx_free(mb_);
-    data_array_ = NULL;
-  }
-
- protected:
-  static const int kBlockSize = 16;
-  static const int kDataAlignment = 16;
-  static const int kStride = kBlockSize * 3;
-  // We use 48 so that the data pointer of the first pixel in each row of
-  // each macroblock is 16-byte aligned, and this gives us access to the
-  // top-left and top-right corner pixels belonging to the top-left/right
-  // macroblocks.
-  // We use 17 lines so we have one line above us for top-prediction.
-  static const int kDataBufferSize = kStride * (kBlockSize + 1);
-
-  virtual void SetUp() {
-    pred_fn_ = GetParam();
-    SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 1);
-  }
-
-  virtual void Predict(MB_PREDICTION_MODE mode) {
-    mbptr_->mode_info_context->mbmi.mode = mode;
-    ASM_REGISTER_STATE_CHECK(pred_fn_(mbptr_,
-                                      data_ptr_[0] - kStride,
-                                      data_ptr_[0] - 1, kStride,
-                                      data_ptr_[0], kStride));
-  }
-
-  IntraPredYFunc pred_fn_;
-  static uint8_t* data_array_;
-  static MACROBLOCKD * mb_;
-  static MODE_INFO *mi_;
-};
-
-MACROBLOCKD* IntraPredYTest::mb_ = NULL;
-MODE_INFO* IntraPredYTest::mi_ = NULL;
-uint8_t* IntraPredYTest::data_array_ = NULL;
-
-TEST_P(IntraPredYTest, IntraPredTests) {
-  RunTest();
-}
-
-INSTANTIATE_TEST_CASE_P(C, IntraPredYTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mby_s_c));
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mby_s_sse2));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mby_s_ssse3));
-#endif
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, IntraPredYTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mby_s_neon));
-#endif
-
-typedef void (*IntraPredUvFunc)(MACROBLOCKD *x,
-                                uint8_t *uabove_row,
-                                uint8_t *vabove_row,
-                                uint8_t *uleft,
-                                uint8_t *vleft,
-                                int left_stride,
-                                uint8_t *upred_ptr,
-                                uint8_t *vpred_ptr,
-                                int pred_stride);
-
-class IntraPredUVTest
-    : public IntraPredBase,
-      public ::testing::TestWithParam<IntraPredUvFunc> {
- public:
-  static void SetUpTestCase() {
-    mb_ = reinterpret_cast<MACROBLOCKD*>(
-        vpx_memalign(32, sizeof(MACROBLOCKD)));
-    mi_ = reinterpret_cast<MODE_INFO*>(
-        vpx_memalign(32, sizeof(MODE_INFO)));
-    data_array_ = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBufferSize));
-  }
-
-  static void TearDownTestCase() {
-    vpx_free(data_array_);
-    vpx_free(mi_);
-    vpx_free(mb_);
-    data_array_ = NULL;
-  }
-
- protected:
-  static const int kBlockSize = 8;
-  static const int kDataAlignment = 8;
-  static const int kStride = kBlockSize * 3;
-  // We use 24 so that the data pointer of the first pixel in each row of
-  // each macroblock is 8-byte aligned, and this gives us access to the
-  // top-left and top-right corner pixels belonging to the top-left/right
-  // macroblocks.
-  // We use 9 lines so we have one line above us for top-prediction.
-  // [0] = U, [1] = V
-  static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1);
-
-  virtual void SetUp() {
-    pred_fn_ = GetParam();
-    SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 2);
-  }
-
-  virtual void Predict(MB_PREDICTION_MODE mode) {
-    mbptr_->mode_info_context->mbmi.uv_mode = mode;
-    pred_fn_(mbptr_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
-             data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
-             data_ptr_[0], data_ptr_[1], kStride);
-  }
-
-  IntraPredUvFunc pred_fn_;
-  // We use 24 so that the data pointer of the first pixel in each row of
-  // each macroblock is 8-byte aligned, and this gives us access to the
-  // top-left and top-right corner pixels belonging to the top-left/right
-  // macroblocks.
-  // We use 9 lines so we have one line above us for top-prediction.
-  // [0] = U, [1] = V
-  static uint8_t* data_array_;
-  static MACROBLOCKD* mb_;
-  static MODE_INFO* mi_;
-};
-
-MACROBLOCKD* IntraPredUVTest::mb_ = NULL;
-MODE_INFO* IntraPredUVTest::mi_ = NULL;
-uint8_t* IntraPredUVTest::data_array_ = NULL;
-
-TEST_P(IntraPredUVTest, IntraPredTests) {
-  RunTest();
-}
-
-INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mbuv_s_c));
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mbuv_s_sse2));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mbuv_s_ssse3));
-#endif
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, IntraPredUVTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mbuv_s_neon));
-#endif
-
-}  // namespace
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -34,21 +34,19 @@ std::ostream &operator<<(std::ostream &os, const DecodeParam &dp) {
  return os << "threads: " << dp.threads << " file: " << dp.filename;
 }

-class InvalidFileTest
-    : public ::libvpx_test::DecoderTest,
-      public ::libvpx_test::CodecTestWithParam<DecodeParam> {
+class InvalidFileTest : public ::libvpx_test::DecoderTest,
+                        public ::libvpx_test::CodecTestWithParam<DecodeParam> {
 protected:
  InvalidFileTest() : DecoderTest(GET_PARAM(0)), res_file_(NULL) {}

  virtual ~InvalidFileTest() {
-    if (res_file_ != NULL)
-      fclose(res_file_);
+    if (res_file_ != NULL) fclose(res_file_);
  }

  void OpenResFile(const std::string &res_file_name_) {
    res_file_ = libvpx_test::OpenTestDataFile(res_file_name_);
    ASSERT_TRUE(res_file_ != NULL) << "Result file open failed. Filename: "
-        << res_file_name_;
+                                   << res_file_name_;
  }

  virtual bool HandleDecodeResult(
@@ -63,32 +61,47 @@ class InvalidFileTest
    EXPECT_NE(res, EOF) << "Read result data failed";

    // Check results match.
-    EXPECT_EQ(expected_res_dec, res_dec)
-        << "Results don't match: frame number = " << video.frame_number()
-        << ". (" << decoder->DecodeError() << ")";
+    const DecodeParam input = GET_PARAM(1);
+    if (input.threads > 1) {
+      // The serial decode check is too strict for tile-threaded decoding as
+      // there is no guarantee on the decode order nor which specific error
+      // will take precedence. Currently a tile-level error is not forwarded so
+      // the frame will simply be marked corrupt.
+      EXPECT_TRUE(res_dec == expected_res_dec ||
+                  res_dec == VPX_CODEC_CORRUPT_FRAME)
+          << "Results don't match: frame number = " << video.frame_number()
+          << ". (" << decoder->DecodeError()
+          << "). Expected: " << expected_res_dec << " or "
+          << VPX_CODEC_CORRUPT_FRAME;
+    } else {
+      EXPECT_EQ(expected_res_dec, res_dec)
+          << "Results don't match: frame number = " << video.frame_number()
+          << ". (" << decoder->DecodeError() << ")";
+    }

    return !HasFailure();
  }

  void RunTest() {
    const DecodeParam input = GET_PARAM(1);
-    libvpx_test::CompressedVideoSource *video = NULL;
    vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
    cfg.threads = input.threads;
    const std::string filename = input.filename;

    // Open compressed video file.
+    testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
    if (filename.substr(filename.length() - 3, 3) == "ivf") {
-      video = new libvpx_test::IVFVideoSource(filename);
+      video.reset(new libvpx_test::IVFVideoSource(filename));
    } else if (filename.substr(filename.length() - 4, 4) == "webm") {
 #if CONFIG_WEBM_IO
-      video = new libvpx_test::WebMVideoSource(filename);
+      video.reset(new libvpx_test::WebMVideoSource(filename));
 #else
      fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n",
              filename.c_str());
      return;
 #endif
    }
+    ASSERT_TRUE(video.get() != NULL);
    video->Init();

    // Construct result file name. The file holds a list of expected integer
@@ -98,35 +111,45 @@ class InvalidFileTest
    OpenResFile(res_filename);

    // Decode frame, and check the md5 matching.
-    ASSERT_NO_FATAL_FAILURE(RunLoop(video, cfg));
-    delete video;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get(), cfg));
  }

 private:
  FILE *res_file_;
 };

-TEST_P(InvalidFileTest, ReturnCode) {
-  RunTest();
-}
+TEST_P(InvalidFileTest, ReturnCode) { RunTest(); }

+#if CONFIG_VP9_DECODER
 const DecodeParam kVP9InvalidFileTests[] = {
-  {1, "invalid-vp90-02-v2.webm"},
-  {1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf"},
-  {1, "invalid-vp90-03-v3.webm"},
-  {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf"},
-  {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf"},
-  {1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf"},
-  {1, "invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf"},
-  {1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf"},
-  {1, "invalid-vp91-2-mixedrefcsp-444to420.ivf"},
-  {1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf"},
-  {1, "invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf"},
-  {1, "invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf"},
+  { 1, "invalid-vp90-02-v2.webm" },
+#if CONFIG_VP9_HIGHBITDEPTH
+  { 1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf" },
+#endif
+  { 1, "invalid-vp90-03-v3.webm" },
+  { 1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf" },
+  { 1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf" },
+// This file will cause a large allocation which is expected to fail in 32-bit
+// environments. Test x86 for coverage purposes as the allocation failure will
+// be in platform agnostic code.
+#if ARCH_X86
+  { 1, "invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf" },
+#endif
+  { 1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf" },
+  { 1, "invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf" },
+  { 1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf" },
+  { 1, "invalid-vp91-2-mixedrefcsp-444to420.ivf" },
+  { 1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf" },
+  { 1, "invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf" },
+  { 1, "invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf" },
+  { 1,
+    "invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf" },
+  { 1, "invalid-crbug-667044.webm" },
 };

 VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
                          ::testing::ValuesIn(kVP9InvalidFileTests));
+#endif  // CONFIG_VP9_DECODER

 // This class will include test vectors that are expected to fail
 // peek. However they are still expected to have no fatal failures.
@@ -134,34 +157,46 @@ class InvalidFileInvalidPeekTest : public InvalidFileTest {
 protected:
  InvalidFileInvalidPeekTest() : InvalidFileTest() {}
  virtual void HandlePeekResult(libvpx_test::Decoder *const /*decoder*/,
-                                libvpx_test::CompressedVideoSource* /*video*/,
+                                libvpx_test::CompressedVideoSource * /*video*/,
                                const vpx_codec_err_t /*res_peek*/) {}
 };

-TEST_P(InvalidFileInvalidPeekTest, ReturnCode) {
-  RunTest();
-}
+TEST_P(InvalidFileInvalidPeekTest, ReturnCode) { RunTest(); }

+#if CONFIG_VP8_DECODER
+const DecodeParam kVP8InvalidFileTests[] = {
+  { 1, "invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf" },
+};
+
+VP8_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,
+                          ::testing::ValuesIn(kVP8InvalidFileTests));
+#endif  // CONFIG_VP8_DECODER
+
+#if CONFIG_VP9_DECODER
 const DecodeParam kVP9InvalidFileInvalidPeekTests[] = {
-  {1, "invalid-vp90-01-v2.webm"},
+  { 1, "invalid-vp90-01-v3.webm" },
 };

 VP9_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,
                          ::testing::ValuesIn(kVP9InvalidFileInvalidPeekTests));

 const DecodeParam kMultiThreadedVP9InvalidFileTests[] = {
-  {4, "invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm"},
-  {4, "invalid-"
-      "vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf"},
-  {4, "invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf"},
-  {2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf"},
-  {4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf"},
+  { 4, "invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm" },
+  { 4,
+    "invalid-"
+    "vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf" },
+  { 4,
+    "invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf" },
+  { 2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf" },
+  { 4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf" },
+  { 2, "invalid-crbug-629481.webm" },
 };

 INSTANTIATE_TEST_CASE_P(
    VP9MultiThreaded, InvalidFileTest,
    ::testing::Combine(
        ::testing::Values(
-            static_cast<const libvpx_test::CodecFactory*>(&libvpx_test::kVP9)),
+            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
        ::testing::ValuesIn(kMultiThreadedVP9InvalidFileTests)));
+#endif  // CONFIG_VP9_DECODER
 }  // namespace
--- a/test/ivf_video_source.h
+++ b/test/ivf_video_source.h
@@ -29,19 +29,13 @@ static unsigned int MemGetLe32(const uint8_t *mem) {
 class IVFVideoSource : public CompressedVideoSource {
 public:
  explicit IVFVideoSource(const std::string &file_name)
-      : file_name_(file_name),
-        input_file_(NULL),
-        compressed_frame_buf_(NULL),
-        frame_sz_(0),
-        frame_(0),
-        end_of_file_(false) {
-  }
+      : file_name_(file_name), input_file_(NULL), compressed_frame_buf_(NULL),
+        frame_sz_(0), frame_(0), end_of_file_(false) {}

  virtual ~IVFVideoSource() {
    delete[] compressed_frame_buf_;

-    if (input_file_)
-      fclose(input_file_);
+    if (input_file_) fclose(input_file_);
  }

  virtual void Init() {
@@ -54,15 +48,16 @@ class IVFVideoSource : public CompressedVideoSource {
  virtual void Begin() {
    input_file_ = OpenTestDataFile(file_name_);
    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
-        << file_name_;
+                                     << file_name_;

    // Read file header
    uint8_t file_hdr[kIvfFileHdrSize];
    ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_))
        << "File header read failed.";
    // Check file header
-    ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' && file_hdr[2] == 'I'
-                && file_hdr[3] == 'F') << "Input is not an IVF file.";
+    ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' &&
+                file_hdr[2] == 'I' && file_hdr[3] == 'F')
+        << "Input is not an IVF file.";

    FillFrame();
  }
@@ -76,8 +71,8 @@ class IVFVideoSource : public CompressedVideoSource {
    ASSERT_TRUE(input_file_ != NULL);
    uint8_t frame_hdr[kIvfFrameHdrSize];
    // Check frame header and read a frame from input_file.
-    if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_)
-        != kIvfFrameHdrSize) {
+    if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_) !=
+        kIvfFrameHdrSize) {
      end_of_file_ = true;
    } else {
      end_of_file_ = false;
--- a/test/keyframe_test.cc
+++ b/test/keyframe_test.cc
@@ -17,8 +17,9 @@

 namespace {

-class KeyframeTest : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+class KeyframeTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
 protected:
  KeyframeTest() : EncoderTest(GET_PARAM(0)) {}
  virtual ~KeyframeTest() {}
@@ -34,10 +35,12 @@ class KeyframeTest : public ::libvpx_test::EncoderTest,

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (kf_do_force_kf_)
+    if (kf_do_force_kf_) {
      frame_flags_ = (video->frame() % 3) ? 0 : VPX_EFLAG_FORCE_KF;
-    if (set_cpu_used_ && video->frame() == 1)
+    }
+    if (set_cpu_used_ && video->frame() == 1) {
      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
+    }
  }

  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
@@ -65,8 +68,7 @@ TEST_P(KeyframeTest, TestRandomVideoSource) {

  // In realtime mode - auto placed keyframes are exceedingly rare,  don't
  // bother with this check   if(GetParam() > 0)
-  if (GET_PARAM(1) > 0)
-    EXPECT_GT(kf_count_, 1);
+  if (GET_PARAM(1) > 0) EXPECT_GT(kf_count_, 1);
 }

 TEST_P(KeyframeTest, TestDisableKeyframes) {
@@ -114,8 +116,7 @@ TEST_P(KeyframeTest, TestAutoKeyframe) {
  // may not produce a keyframe like we expect. This is necessary when running
  // on very slow environments (like Valgrind). The step -11 was determined
  // experimentally as the fastest mode that still throws the keyframe.
-  if (deadline_ == VPX_DL_REALTIME)
-    set_cpu_used_ = -11;
+  if (deadline_ == VPX_DL_REALTIME) set_cpu_used_ = -11;

  // This clip has a cut scene every 30 frames -> Frame 0, 30, 60, 90, 120.
  // I check only the first 40 frames to make sure there's a keyframe at frame
@@ -135,7 +136,7 @@ TEST_P(KeyframeTest, TestAutoKeyframe) {
       iter != kf_pts_list_.end(); ++iter) {
    if (deadline_ == VPX_DL_REALTIME && *iter > 0)
      EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame "
-        << *iter;
+                                     << *iter;
    else
      EXPECT_EQ(0, *iter % 30) << "Unexpected keyframe at frame " << *iter;
  }
--- a/test/level_test.cc
+++ b/test/level_test.cc
@@ -0,0 +1,147 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+class LevelTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+  LevelTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        cpu_used_(GET_PARAM(2)), min_gf_internal_(24), target_level_(0),
+        level_(0) {}
+  virtual ~LevelTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+    if (encoding_mode_ != ::libvpx_test::kRealTime) {
+      cfg_.g_lag_in_frames = 25;
+      cfg_.rc_end_usage = VPX_VBR;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = VPX_CBR;
+    }
+    cfg_.rc_2pass_vbr_minsection_pct = 5;
+    cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+    cfg_.rc_target_bitrate = 400;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_min_quantizer = 0;
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
+      encoder->Control(VP9E_SET_TARGET_LEVEL, target_level_);
+      encoder->Control(VP9E_SET_MIN_GF_INTERVAL, min_gf_internal_);
+      if (encoding_mode_ != ::libvpx_test::kRealTime) {
+        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+      }
+    }
+    encoder->Control(VP9E_GET_LEVEL, &level_);
+    ASSERT_LE(level_, 51);
+    ASSERT_GE(level_, 0);
+  }
+
+  ::libvpx_test::TestMode encoding_mode_;
+  int cpu_used_;
+  int min_gf_internal_;
+  int target_level_;
+  int level_;
+};
+
+TEST_P(LevelTest, TestTargetLevel11) {
+  ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
+  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       90);
+  target_level_ = 11;
+  cfg_.rc_target_bitrate = 150;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(target_level_, level_);
+}
+
+TEST_P(LevelTest, TestTargetLevel20) {
+  ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 90);
+  target_level_ = 20;
+  cfg_.rc_target_bitrate = 1200;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(target_level_, level_);
+}
+
+TEST_P(LevelTest, TestTargetLevel31) {
+  ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
+  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, 30,
+                                       1, 0, 60);
+  target_level_ = 31;
+  cfg_.rc_target_bitrate = 8000;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(target_level_, level_);
+}
+
+// Test for keeping level stats only
+TEST_P(LevelTest, TestTargetLevel0) {
+  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       40);
+  target_level_ = 0;
+  min_gf_internal_ = 4;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(11, level_);
+
+  cfg_.rc_target_bitrate = 1600;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(20, level_);
+}
+
+// Test for level control being turned off
+TEST_P(LevelTest, TestTargetLevel255) {
+  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       30);
+  target_level_ = 255;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+TEST_P(LevelTest, TestTargetLevelApi) {
+  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 1);
+  static const vpx_codec_iface_t *codec = &vpx_codec_vp9_cx_algo;
+  vpx_codec_ctx_t enc;
+  vpx_codec_enc_cfg_t cfg;
+  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(codec, &cfg, 0));
+  cfg.rc_target_bitrate = 100;
+  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, codec, &cfg, 0));
+  for (int level = 0; level <= 256; ++level) {
+    if (level == 10 || level == 11 || level == 20 || level == 21 ||
+        level == 30 || level == 31 || level == 40 || level == 41 ||
+        level == 50 || level == 51 || level == 52 || level == 60 ||
+        level == 61 || level == 62 || level == 0 || level == 255)
+      EXPECT_EQ(VPX_CODEC_OK,
+                vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level));
+    else
+      EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
+                vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level));
+  }
+  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc));
+}
+
+VP9_INSTANTIATE_TEST_CASE(LevelTest,
+                          ::testing::Values(::libvpx_test::kTwoPassGood,
+                                            ::libvpx_test::kOnePassGood),
+                          ::testing::Range(0, 9));
+}  // namespace
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -1,716 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <cmath>
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_entropy.h"
-#include "vp9/common/vp9_loopfilter.h"
-#include "vpx/vpx_integer.h"
-
-using libvpx_test::ACMRandom;
-
-namespace {
-// Horizontally and Vertically need 32x32: 8  Coeffs preceeding filtered section
-//                                         16 Coefs within filtered section
-//                                         8  Coeffs following filtered section
-const int kNumCoeffs = 1024;
-
-const int number_of_iterations = 10000;
-
-#if CONFIG_VP9_HIGHBITDEPTH
-typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit,
-                          const uint8_t *limit, const uint8_t *thresh,
-                          int count, int bd);
-typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0,
-                               const uint8_t *limit0, const uint8_t *thresh0,
-                               const uint8_t *blimit1, const uint8_t *limit1,
-                               const uint8_t *thresh1, int bd);
-#else
-typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit,
-                          const uint8_t *limit, const uint8_t *thresh,
-                          int count);
-typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
-                               const uint8_t *limit0, const uint8_t *thresh0,
-                               const uint8_t *blimit1, const uint8_t *limit1,
-                               const uint8_t *thresh1);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-typedef std::tr1::tuple<loop_op_t, loop_op_t, int, int> loop8_param_t;
-typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
-
-#if HAVE_SSE2
-#if CONFIG_VP9_HIGHBITDEPTH
-void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
-                              const uint8_t *limit, const uint8_t *thresh,
-                              int count, int bd) {
-  vp9_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
-}
-
-void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
-                           const uint8_t *limit, const uint8_t *thresh,
-                           int count, int bd) {
-  vp9_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
-}
-
-void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit,
-                                   const uint8_t *limit, const uint8_t *thresh,
-                                   int count, int bd) {
-  vp9_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
-}
-
-void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit,
-                                const uint8_t *limit, const uint8_t *thresh,
-                                int count, int bd) {
-  vp9_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
-}
-#else
-void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit,
-                              const uint8_t *limit, const uint8_t *thresh,
-                              int count) {
-  vp9_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
-                           const uint8_t *limit, const uint8_t *thresh,
-                           int count) {
-  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit,
-                                   const uint8_t *limit, const uint8_t *thresh,
-                                   int count) {
-  vp9_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
-                                const uint8_t *limit, const uint8_t *thresh,
-                                int count) {
-  vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
-}
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_SSE2
-
-#if HAVE_NEON_ASM
-#if CONFIG_VP9_HIGHBITDEPTH
-// No neon high bitdepth functions.
-#else
-void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
-                              const uint8_t *limit, const uint8_t *thresh,
-                              int count) {
-  vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
-                           const uint8_t *limit, const uint8_t *thresh,
-                           int count) {
-  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
-                                   const uint8_t *limit, const uint8_t *thresh,
-                                   int count) {
-  vp9_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
-                                const uint8_t *limit, const uint8_t *thresh,
-                                int count) {
-  vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
-}
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_NEON_ASM
-
-#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
-void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,
-                             const uint8_t *limit, const uint8_t *thresh,
-                             int count) {
-  vp9_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
-                           const uint8_t *limit, const uint8_t *thresh,
-                           int count) {
-  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
-}
-#endif  // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
-
-class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
- public:
-  virtual ~Loop8Test6Param() {}
-  virtual void SetUp() {
-    loopfilter_op_ = GET_PARAM(0);
-    ref_loopfilter_op_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(2);
-    count_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-  }
-
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  int bit_depth_;
-  int count_;
-  int mask_;
-  loop_op_t loopfilter_op_;
-  loop_op_t ref_loopfilter_op_;
-};
-
-class Loop8Test9Param : public ::testing::TestWithParam<dualloop8_param_t> {
- public:
-  virtual ~Loop8Test9Param() {}
-  virtual void SetUp() {
-    loopfilter_op_ = GET_PARAM(0);
-    ref_loopfilter_op_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(2);
-    mask_ = (1 << bit_depth_) - 1;
-  }
-
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  int bit_depth_;
-  int mask_;
-  dual_loop_op_t loopfilter_op_;
-  dual_loop_op_t ref_loopfilter_op_;
-};
-
-TEST_P(Loop8Test6Param, OperationCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = number_of_iterations;
-#if CONFIG_VP9_HIGHBITDEPTH
-  int32_t bd = bit_depth_;
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
-#else
-  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < count_test_block; ++i) {
-    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-    DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-    DECLARE_ALIGNED(16, const uint8_t, limit[16])  = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = rnd.Rand8();
-    DECLARE_ALIGNED(16, const uint8_t, thresh[16]) = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    int32_t p = kNumCoeffs/32;
-
-    uint16_t tmp_s[kNumCoeffs];
-    int j = 0;
-    while (j < kNumCoeffs) {
-      uint8_t val = rnd.Rand8();
-      if (val & 0x80) {  // 50% chance to choose a new value.
-        tmp_s[j] = rnd.Rand16();
-        j++;
-      } else {  // 50% chance to repeat previous value in row X times
-        int k = 0;
-        while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
-          if (j < 1) {
-            tmp_s[j] = rnd.Rand16();
-          } else if (val & 0x20) {  // Increment by an value within the limit
-            tmp_s[j] = (tmp_s[j - 1] + (*limit - 1));
-          } else {  // Decrement by an value within the limit
-            tmp_s[j] = (tmp_s[j - 1] - (*limit - 1));
-          }
-          j++;
-        }
-      }
-    }
-    for (j = 0; j < kNumCoeffs; j++) {
-      if (i % 2) {
-        s[j] = tmp_s[j] & mask_;
-      } else {
-        s[j] = tmp_s[p * (j % p) + j / p] & mask_;
-      }
-      ref_s[j] = s[j];
-    }
-#if CONFIG_VP9_HIGHBITDEPTH
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
-    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
-#else
-    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
-    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      err_count += ref_s[j] != s[j];
-    }
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-      << "Error: Loop8Test6Param, C output doesn't match SSE2 "
-         "loopfilter output. "
-      << "First failed at test case " << first_failure;
-}
-
-TEST_P(Loop8Test6Param, ValueCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = number_of_iterations;
-#if CONFIG_VP9_HIGHBITDEPTH
-  const int32_t bd = bit_depth_;
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
-#else
-  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-  int err_count_total = 0;
-  int first_failure = -1;
-
-  // NOTE: The code in vp9_loopfilter.c:update_sharpness computes mblim as a
-  // function of sharpness_lvl and the loopfilter lvl as:
-  // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
-  // ...
-  // memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
-  //        SIMD_WIDTH);
-  // This means that the largest value for mblim will occur when sharpness_lvl
-  // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER).
-  // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and
-  // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) =
-  // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4
-
-  for (int i = 0; i < count_test_block; ++i) {
-    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-    DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-    DECLARE_ALIGNED(16, const uint8_t, limit[16])  = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = rnd.Rand8();
-    DECLARE_ALIGNED(16, const uint8_t, thresh[16]) = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    int32_t p = kNumCoeffs / 32;
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      s[j] = rnd.Rand16() & mask_;
-      ref_s[j] = s[j];
-    }
-#if CONFIG_VP9_HIGHBITDEPTH
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
-    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
-#else
-    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
-    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      err_count += ref_s[j] != s[j];
-    }
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-      << "Error: Loop8Test6Param, C output doesn't match SSE2 "
-         "loopfilter output. "
-      << "First failed at test case " << first_failure;
-}
-
-TEST_P(Loop8Test9Param, OperationCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = number_of_iterations;
-#if CONFIG_VP9_HIGHBITDEPTH
-  const int32_t bd = bit_depth_;
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
-#else
-  DECLARE_ALIGNED(8,  uint8_t,  s[kNumCoeffs]);
-  DECLARE_ALIGNED(8,  uint8_t,  ref_s[kNumCoeffs]);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < count_test_block; ++i) {
-    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-    DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-    DECLARE_ALIGNED(16, const uint8_t, limit0[16])  = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = rnd.Rand8();
-    DECLARE_ALIGNED(16, const uint8_t, thresh0[16]) = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-    DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-    DECLARE_ALIGNED(16, const uint8_t, limit1[16])  = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = rnd.Rand8();
-    DECLARE_ALIGNED(16, const uint8_t, thresh1[16]) = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    int32_t p = kNumCoeffs / 32;
-    uint16_t tmp_s[kNumCoeffs];
-    int j = 0;
-    const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1;
-    while (j < kNumCoeffs) {
-      uint8_t val = rnd.Rand8();
-      if (val & 0x80) {  // 50% chance to choose a new value.
-        tmp_s[j] = rnd.Rand16();
-        j++;
-      } else {  // 50% chance to repeat previous value in row X times.
-        int k = 0;
-        while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
-          if (j < 1) {
-            tmp_s[j] = rnd.Rand16();
-          } else if (val & 0x20) {  // Increment by a value within the limit.
-            tmp_s[j] = (tmp_s[j - 1] + (limit - 1));
-          } else {  // Decrement by an value within the limit.
-            tmp_s[j] = (tmp_s[j - 1] - (limit - 1));
-          }
-          j++;
-        }
-      }
-    }
-    for (j = 0; j < kNumCoeffs; j++) {
-      if (i % 2) {
-        s[j] = tmp_s[j] & mask_;
-      } else {
-        s[j] = tmp_s[p * (j % p) + j / p] & mask_;
-      }
-      ref_s[j] = s[j];
-    }
-#if CONFIG_VP9_HIGHBITDEPTH
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0,
-                       blimit1, limit1, thresh1, bd);
-    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0,
-                       blimit1, limit1, thresh1, bd));
-#else
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0,
-                       blimit1, limit1, thresh1);
-    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0,
-                       blimit1, limit1, thresh1));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      err_count += ref_s[j] != s[j];
-    }
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-      << "Error: Loop8Test9Param, C output doesn't match SSE2 "
-         "loopfilter output. "
-      << "First failed at test case " << first_failure;
-}
-
-TEST_P(Loop8Test9Param, ValueCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = number_of_iterations;
-#if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
-#else
-  DECLARE_ALIGNED(8,  uint8_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(8,  uint8_t, ref_s[kNumCoeffs]);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < count_test_block; ++i) {
-    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-    DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-    DECLARE_ALIGNED(16, const uint8_t, limit0[16])  = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = rnd.Rand8();
-    DECLARE_ALIGNED(16, const uint8_t, thresh0[16]) = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-    DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-    DECLARE_ALIGNED(16, const uint8_t, limit1[16])  = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    tmp = rnd.Rand8();
-    DECLARE_ALIGNED(16, const uint8_t, thresh1[16]) = {
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
-    };
-    int32_t p = kNumCoeffs / 32;  // TODO(pdlf) can we have non-square here?
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      s[j] = rnd.Rand16() & mask_;
-      ref_s[j] = s[j];
-    }
-#if CONFIG_VP9_HIGHBITDEPTH
-    const int32_t bd = bit_depth_;
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0,
-                       blimit1, limit1, thresh1, bd);
-    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
-                       thresh0, blimit1, limit1, thresh1, bd));
-#else
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0,
-                       blimit1, limit1, thresh1);
-    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0,
-                       blimit1, limit1, thresh1));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      err_count += ref_s[j] != s[j];
-    }
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-      << "Error: Loop8Test9Param, C output doesn't match SSE2"
-         "loopfilter output. "
-      << "First failed at test case " << first_failure;
-}
-
-using std::tr1::make_tuple;
-
-#if HAVE_SSE2
-#if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Loop8Test6Param,
-    ::testing::Values(
-        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, 8, 1),
-        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, 8, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, 8, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 8, 2),
-        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, 8, 1),
-        make_tuple(&wrapper_vertical_16_sse2,
-                   &wrapper_vertical_16_c, 8, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, 10, 1),
-        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, 10, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, 10, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 10, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 10, 2),
-        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, 10, 1),
-        make_tuple(&wrapper_vertical_16_sse2,
-                   &wrapper_vertical_16_c, 10, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, 12, 1),
-        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, 12, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, 12, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 12, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 12, 2),
-        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, 12, 1),
-        make_tuple(&wrapper_vertical_16_sse2,
-                   &wrapper_vertical_16_c, 12, 1),
-        make_tuple(&wrapper_vertical_16_dual_sse2,
-                   &wrapper_vertical_16_dual_c, 8, 1),
-        make_tuple(&wrapper_vertical_16_dual_sse2,
-                   &wrapper_vertical_16_dual_c, 10, 1),
-        make_tuple(&wrapper_vertical_16_dual_sse2,
-                   &wrapper_vertical_16_dual_c, 12, 1)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Loop8Test6Param,
-    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 2),
-        make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8, 1),
-        make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif
-
-#if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
-INSTANTIATE_TEST_CASE_P(
-    AVX2, Loop8Test6Param,
-    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8,
-                   2)));
-#endif
-
-#if HAVE_SSE2
-#if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Loop8Test9Param,
-    ::testing::Values(
-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
-                   &vp9_highbd_lpf_vertical_4_dual_c, 8),
-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
-                   &vp9_highbd_lpf_vertical_8_dual_c, 8),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_4_dual_c, 10),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_8_dual_c, 10),
-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
-                   &vp9_highbd_lpf_vertical_4_dual_c, 10),
-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
-                   &vp9_highbd_lpf_vertical_8_dual_c, 10),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_4_dual_c, 12),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_8_dual_c, 12),
-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
-                   &vp9_highbd_lpf_vertical_4_dual_c, 12),
-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
-                   &vp9_highbd_lpf_vertical_8_dual_c, 12)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Loop8Test9Param,
-    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_4_dual_sse2,
-                   &vp9_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vp9_lpf_horizontal_8_dual_sse2,
-                   &vp9_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_4_dual_sse2,
-                   &vp9_lpf_vertical_4_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_8_dual_sse2,
-                   &vp9_lpf_vertical_8_dual_c, 8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif
-
-#if HAVE_NEON
-#if CONFIG_VP9_HIGHBITDEPTH
-// No neon high bitdepth functions.
-#else
-INSTANTIATE_TEST_CASE_P(
-    NEON, Loop8Test6Param,
-    ::testing::Values(
-#if HAVE_NEON_ASM
-// Using #if inside the macro is unsupported on MSVS but the tests are not
-// currently built for MSVS with ARM and NEON.
-        make_tuple(&vp9_lpf_horizontal_16_neon,
-                   &vp9_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_16_neon,
-                   &vp9_lpf_horizontal_16_c, 8, 2),
-        make_tuple(&wrapper_vertical_16_neon,
-                   &wrapper_vertical_16_c, 8, 1),
-        make_tuple(&wrapper_vertical_16_dual_neon,
-                   &wrapper_vertical_16_dual_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_8_neon,
-                   &vp9_lpf_horizontal_8_c, 8, 1),
-        make_tuple(&vp9_lpf_vertical_8_neon,
-                   &vp9_lpf_vertical_8_c, 8, 1),
-#endif  // HAVE_NEON_ASM
-        make_tuple(&vp9_lpf_horizontal_4_neon,
-                   &vp9_lpf_horizontal_4_c, 8, 1),
-        make_tuple(&vp9_lpf_vertical_4_neon,
-                   &vp9_lpf_vertical_4_c, 8, 1)));
-INSTANTIATE_TEST_CASE_P(
-    NEON, Loop8Test9Param,
-    ::testing::Values(
-#if HAVE_NEON_ASM
-        make_tuple(&vp9_lpf_horizontal_8_dual_neon,
-                   &vp9_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_8_dual_neon,
-                   &vp9_lpf_vertical_8_dual_c, 8),
-#endif  // HAVE_NEON_ASM
-        make_tuple(&vp9_lpf_horizontal_4_dual_neon,
-                   &vp9_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_4_dual_neon,
-                   &vp9_lpf_vertical_4_dual_c, 8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_NEON
-
-#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
-INSTANTIATE_TEST_CASE_P(
-    MSA, Loop8Test6Param,
-    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_8_msa, &vp9_lpf_horizontal_8_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 2),
-        make_tuple(&vp9_lpf_vertical_8_msa, &vp9_lpf_vertical_8_c, 8, 1),
-        make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));
-
-INSTANTIATE_TEST_CASE_P(
-    MSA, Loop8Test9Param,
-    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_4_dual_msa,
-                   &vp9_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vp9_lpf_horizontal_8_dual_msa,
-                   &vp9_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_4_dual_msa,
-                   &vp9_lpf_vertical_4_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_8_dual_msa,
-                   &vp9_lpf_vertical_8_dual_c, 8)));
-#endif  // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
-
-}  // namespace
--- a/test/lpf_test.cc
+++ b/test/lpf_test.cc
@@ -0,0 +1,672 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cmath>
+#include <cstdlib>
+#include <string>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+// Horizontally and Vertically need 32x32: 8  Coeffs preceeding filtered section
+//                                         16 Coefs within filtered section
+//                                         8  Coeffs following filtered section
+const int kNumCoeffs = 1024;
+
+const int number_of_iterations = 10000;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef uint16_t Pixel;
+#define PIXEL_WIDTH 16
+
+typedef void (*loop_op_t)(Pixel *s, int p, const uint8_t *blimit,
+                          const uint8_t *limit, const uint8_t *thresh, int bd);
+typedef void (*dual_loop_op_t)(Pixel *s, int p, const uint8_t *blimit0,
+                               const uint8_t *limit0, const uint8_t *thresh0,
+                               const uint8_t *blimit1, const uint8_t *limit1,
+                               const uint8_t *thresh1, int bd);
+#else
+typedef uint8_t Pixel;
+#define PIXEL_WIDTH 8
+
+typedef void (*loop_op_t)(Pixel *s, int p, const uint8_t *blimit,
+                          const uint8_t *limit, const uint8_t *thresh);
+typedef void (*dual_loop_op_t)(Pixel *s, int p, const uint8_t *blimit0,
+                               const uint8_t *limit0, const uint8_t *thresh0,
+                               const uint8_t *blimit1, const uint8_t *limit1,
+                               const uint8_t *thresh1);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
+typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
+
+void InitInput(Pixel *s, Pixel *ref_s, ACMRandom *rnd, const uint8_t limit,
+               const int mask, const int32_t p, const int i) {
+  uint16_t tmp_s[kNumCoeffs];
+
+  for (int j = 0; j < kNumCoeffs;) {
+    const uint8_t val = rnd->Rand8();
+    if (val & 0x80) {  // 50% chance to choose a new value.
+      tmp_s[j] = rnd->Rand16();
+      j++;
+    } else {  // 50% chance to repeat previous value in row X times.
+      int k = 0;
+      while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
+        if (j < 1) {
+          tmp_s[j] = rnd->Rand16();
+        } else if (val & 0x20) {  // Increment by a value within the limit.
+          tmp_s[j] = tmp_s[j - 1] + (limit - 1);
+        } else {  // Decrement by a value within the limit.
+          tmp_s[j] = tmp_s[j - 1] - (limit - 1);
+        }
+        j++;
+      }
+    }
+  }
+
+  for (int j = 0; j < kNumCoeffs;) {
+    const uint8_t val = rnd->Rand8();
+    if (val & 0x80) {
+      j++;
+    } else {  // 50% chance to repeat previous value in column X times.
+      int k = 0;
+      while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
+        if (j < 1) {
+          tmp_s[j] = rnd->Rand16();
+        } else if (val & 0x20) {  // Increment by a value within the limit.
+          tmp_s[(j % 32) * 32 + j / 32] =
+              tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] + (limit - 1);
+        } else {  // Decrement by a value within the limit.
+          tmp_s[(j % 32) * 32 + j / 32] =
+              tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] - (limit - 1);
+        }
+        j++;
+      }
+    }
+  }
+
+  for (int j = 0; j < kNumCoeffs; j++) {
+    if (i % 2) {
+      s[j] = tmp_s[j] & mask;
+    } else {
+      s[j] = tmp_s[p * (j % p) + j / p] & mask;
+    }
+    ref_s[j] = s[j];
+  }
+}
+
+class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
+ public:
+  virtual ~Loop8Test6Param() {}
+  virtual void SetUp() {
+    loopfilter_op_ = GET_PARAM(0);
+    ref_loopfilter_op_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  int bit_depth_;
+  int mask_;
+  loop_op_t loopfilter_op_;
+  loop_op_t ref_loopfilter_op_;
+};
+
+class Loop8Test9Param : public ::testing::TestWithParam<dualloop8_param_t> {
+ public:
+  virtual ~Loop8Test9Param() {}
+  virtual void SetUp() {
+    loopfilter_op_ = GET_PARAM(0);
+    ref_loopfilter_op_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  int bit_depth_;
+  int mask_;
+  dual_loop_op_t loopfilter_op_;
+  dual_loop_op_t ref_loopfilter_op_;
+};
+
+TEST_P(Loop8Test6Param, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = number_of_iterations;
+  const int32_t p = kNumCoeffs / 32;
+  DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, s[kNumCoeffs]);
+  DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, ref_s[kNumCoeffs]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < count_test_block; ++i) {
+    int err_count = 0;
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = rnd.Rand8();
+    DECLARE_ALIGNED(16, const uint8_t,
+                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    InitInput(s, ref_s, &rnd, *limit, mask_, p, i);
+#if CONFIG_VP9_HIGHBITDEPTH
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_));
+#else
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh);
+    ASM_REGISTER_STATE_CHECK(
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      err_count += ref_s[j] != s[j];
+    }
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Loop8Test6Param, C output doesn't match SSE2 "
+         "loopfilter output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(Loop8Test6Param, ValueCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = number_of_iterations;
+  DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, s[kNumCoeffs]);
+  DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, ref_s[kNumCoeffs]);
+  int err_count_total = 0;
+  int first_failure = -1;
+
+  // NOTE: The code in vp9_loopfilter.c:update_sharpness computes mblim as a
+  // function of sharpness_lvl and the loopfilter lvl as:
+  // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
+  // ...
+  // memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
+  //        SIMD_WIDTH);
+  // This means that the largest value for mblim will occur when sharpness_lvl
+  // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER).
+  // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and
+  // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) =
+  // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4
+
+  for (int i = 0; i < count_test_block; ++i) {
+    int err_count = 0;
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = rnd.Rand8();
+    DECLARE_ALIGNED(16, const uint8_t,
+                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    int32_t p = kNumCoeffs / 32;
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      s[j] = rnd.Rand16() & mask_;
+      ref_s[j] = s[j];
+    }
+#if CONFIG_VP9_HIGHBITDEPTH
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_));
+#else
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh);
+    ASM_REGISTER_STATE_CHECK(
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      err_count += ref_s[j] != s[j];
+    }
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Loop8Test6Param, C output doesn't match SSE2 "
+         "loopfilter output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(Loop8Test9Param, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = number_of_iterations;
+  DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, s[kNumCoeffs]);
+  DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, ref_s[kNumCoeffs]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < count_test_block; ++i) {
+    int err_count = 0;
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = rnd.Rand8();
+    DECLARE_ALIGNED(16, const uint8_t,
+                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = rnd.Rand8();
+    DECLARE_ALIGNED(16, const uint8_t,
+                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    int32_t p = kNumCoeffs / 32;
+    const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1;
+    InitInput(s, ref_s, &rnd, limit, mask_, p, i);
+#if CONFIG_VP9_HIGHBITDEPTH
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
+                       limit1, thresh1, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
+                                            thresh0, blimit1, limit1, thresh1,
+                                            bit_depth_));
+#else
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
+                       limit1, thresh1);
+    ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
+                                            thresh0, blimit1, limit1, thresh1));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      err_count += ref_s[j] != s[j];
+    }
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Loop8Test9Param, C output doesn't match SSE2 "
+         "loopfilter output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(Loop8Test9Param, ValueCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = number_of_iterations;
+  DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, s[kNumCoeffs]);
+  DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, ref_s[kNumCoeffs]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < count_test_block; ++i) {
+    int err_count = 0;
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = rnd.Rand8();
+    DECLARE_ALIGNED(16, const uint8_t,
+                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    DECLARE_ALIGNED(16, const uint8_t,
+                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    tmp = rnd.Rand8();
+    DECLARE_ALIGNED(16, const uint8_t,
+                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+    int32_t p = kNumCoeffs / 32;  // TODO(pdlf) can we have non-square here?
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      s[j] = rnd.Rand16() & mask_;
+      ref_s[j] = s[j];
+    }
+#if CONFIG_VP9_HIGHBITDEPTH
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
+                       limit1, thresh1, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
+                                            thresh0, blimit1, limit1, thresh1,
+                                            bit_depth_));
+#else
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
+                       limit1, thresh1);
+    ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
+                                            thresh0, blimit1, limit1, thresh1));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    for (int j = 0; j < kNumCoeffs; ++j) {
+      err_count += ref_s[j] != s[j];
+    }
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Loop8Test9Param, C output doesn't match SSE2"
+         "loopfilter output. "
+      << "First failed at test case " << first_failure;
+}
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSE2
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Loop8Test6Param,
+    ::testing::Values(make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+                                 &vpx_highbd_lpf_horizontal_4_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+                                 &vpx_highbd_lpf_vertical_4_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+                                 &vpx_highbd_lpf_horizontal_8_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+                                 &vpx_highbd_lpf_vertical_8_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
+                                 &vpx_highbd_lpf_vertical_16_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+                                 &vpx_highbd_lpf_horizontal_4_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+                                 &vpx_highbd_lpf_vertical_4_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+                                 &vpx_highbd_lpf_horizontal_8_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+                                 &vpx_highbd_lpf_vertical_8_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
+                                 &vpx_highbd_lpf_vertical_16_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+                                 &vpx_highbd_lpf_horizontal_4_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+                                 &vpx_highbd_lpf_vertical_4_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+                                 &vpx_highbd_lpf_horizontal_8_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+                                 &vpx_highbd_lpf_vertical_8_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
+                                 &vpx_highbd_lpf_vertical_16_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
+                                 &vpx_highbd_lpf_vertical_16_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
+                                 &vpx_highbd_lpf_vertical_16_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
+                                 &vpx_highbd_lpf_vertical_16_dual_c, 12)));
+#else
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Loop8Test6Param,
+    ::testing::Values(
+        make_tuple(&vpx_lpf_horizontal_4_sse2, &vpx_lpf_horizontal_4_c, 8),
+        make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dual_sse2,
+                   &vpx_lpf_horizontal_16_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_4_sse2, &vpx_lpf_vertical_4_c, 8),
+        make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8),
+        make_tuple(&vpx_lpf_vertical_16_sse2, &vpx_lpf_vertical_16_c, 8),
+        make_tuple(&vpx_lpf_vertical_16_dual_sse2, &vpx_lpf_vertical_16_dual_c,
+                   8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
+
+#if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
+INSTANTIATE_TEST_CASE_P(
+    AVX2, Loop8Test6Param,
+    ::testing::Values(make_tuple(&vpx_lpf_horizontal_16_avx2,
+                                 &vpx_lpf_horizontal_16_c, 8),
+                      make_tuple(&vpx_lpf_horizontal_16_dual_avx2,
+                                 &vpx_lpf_horizontal_16_dual_c, 8)));
+#endif
+
+#if HAVE_SSE2
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Loop8Test9Param,
+    ::testing::Values(make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_4_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_8_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+                                 &vpx_highbd_lpf_vertical_4_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+                                 &vpx_highbd_lpf_vertical_8_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_4_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_8_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+                                 &vpx_highbd_lpf_vertical_4_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+                                 &vpx_highbd_lpf_vertical_8_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_4_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_8_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+                                 &vpx_highbd_lpf_vertical_4_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+                                 &vpx_highbd_lpf_vertical_8_dual_c, 12)));
+#else
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Loop8Test9Param,
+    ::testing::Values(make_tuple(&vpx_lpf_horizontal_4_dual_sse2,
+                                 &vpx_lpf_horizontal_4_dual_c, 8),
+                      make_tuple(&vpx_lpf_horizontal_8_dual_sse2,
+                                 &vpx_lpf_horizontal_8_dual_c, 8),
+                      make_tuple(&vpx_lpf_vertical_4_dual_sse2,
+                                 &vpx_lpf_vertical_4_dual_c, 8),
+                      make_tuple(&vpx_lpf_vertical_8_dual_sse2,
+                                 &vpx_lpf_vertical_8_dual_c, 8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
+
+#if HAVE_NEON
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    NEON, Loop8Test6Param,
+    ::testing::Values(make_tuple(&vpx_highbd_lpf_horizontal_4_neon,
+                                 &vpx_highbd_lpf_horizontal_4_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_4_neon,
+                                 &vpx_highbd_lpf_horizontal_4_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_4_neon,
+                                 &vpx_highbd_lpf_horizontal_4_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_neon,
+                                 &vpx_highbd_lpf_horizontal_8_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_neon,
+                                 &vpx_highbd_lpf_horizontal_8_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_neon,
+                                 &vpx_highbd_lpf_horizontal_8_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_neon,
+                                 &vpx_highbd_lpf_horizontal_16_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_neon,
+                                 &vpx_highbd_lpf_horizontal_16_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_neon,
+                                 &vpx_highbd_lpf_horizontal_16_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_neon,
+                                 &vpx_highbd_lpf_vertical_4_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_neon,
+                                 &vpx_highbd_lpf_vertical_4_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_neon,
+                                 &vpx_highbd_lpf_vertical_4_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_neon,
+                                 &vpx_highbd_lpf_vertical_8_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_neon,
+                                 &vpx_highbd_lpf_vertical_8_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_neon,
+                                 &vpx_highbd_lpf_vertical_8_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_neon,
+                                 &vpx_highbd_lpf_vertical_16_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_neon,
+                                 &vpx_highbd_lpf_vertical_16_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_neon,
+                                 &vpx_highbd_lpf_vertical_16_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_dual_neon,
+                                 &vpx_highbd_lpf_vertical_16_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_dual_neon,
+                                 &vpx_highbd_lpf_vertical_16_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_dual_neon,
+                                 &vpx_highbd_lpf_vertical_16_dual_c, 12)));
+INSTANTIATE_TEST_CASE_P(
+    NEON, Loop8Test9Param,
+    ::testing::Values(make_tuple(&vpx_highbd_lpf_horizontal_4_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_4_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_4_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_4_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_4_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_4_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_8_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_8_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_8_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_dual_neon,
+                                 &vpx_highbd_lpf_vertical_4_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_dual_neon,
+                                 &vpx_highbd_lpf_vertical_4_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_dual_neon,
+                                 &vpx_highbd_lpf_vertical_4_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_dual_neon,
+                                 &vpx_highbd_lpf_vertical_8_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_dual_neon,
+                                 &vpx_highbd_lpf_vertical_8_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_dual_neon,
+                                 &vpx_highbd_lpf_vertical_8_dual_c, 12)));
+#else
+INSTANTIATE_TEST_CASE_P(
+    NEON, Loop8Test6Param,
+    ::testing::Values(
+        make_tuple(&vpx_lpf_horizontal_16_neon, &vpx_lpf_horizontal_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dual_neon,
+                   &vpx_lpf_horizontal_16_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_16_neon, &vpx_lpf_vertical_16_c, 8),
+        make_tuple(&vpx_lpf_vertical_16_dual_neon, &vpx_lpf_vertical_16_dual_c,
+                   8),
+        make_tuple(&vpx_lpf_horizontal_8_neon, &vpx_lpf_horizontal_8_c, 8),
+        make_tuple(&vpx_lpf_vertical_8_neon, &vpx_lpf_vertical_8_c, 8),
+        make_tuple(&vpx_lpf_horizontal_4_neon, &vpx_lpf_horizontal_4_c, 8),
+        make_tuple(&vpx_lpf_vertical_4_neon, &vpx_lpf_vertical_4_c, 8)));
+INSTANTIATE_TEST_CASE_P(
+    NEON, Loop8Test9Param,
+    ::testing::Values(make_tuple(&vpx_lpf_horizontal_8_dual_neon,
+                                 &vpx_lpf_horizontal_8_dual_c, 8),
+                      make_tuple(&vpx_lpf_vertical_8_dual_neon,
+                                 &vpx_lpf_vertical_8_dual_c, 8),
+                      make_tuple(&vpx_lpf_horizontal_4_dual_neon,
+                                 &vpx_lpf_horizontal_4_dual_c, 8),
+                      make_tuple(&vpx_lpf_vertical_4_dual_neon,
+                                 &vpx_lpf_vertical_4_dual_c, 8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_NEON
+
+#if HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    DSPR2, Loop8Test6Param,
+    ::testing::Values(
+        make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8),
+        make_tuple(&vpx_lpf_horizontal_8_dspr2, &vpx_lpf_horizontal_8_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dspr2, &vpx_lpf_horizontal_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dual_dspr2,
+                   &vpx_lpf_horizontal_16_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8),
+        make_tuple(&vpx_lpf_vertical_8_dspr2, &vpx_lpf_vertical_8_c, 8),
+        make_tuple(&vpx_lpf_vertical_16_dspr2, &vpx_lpf_vertical_16_c, 8),
+        make_tuple(&vpx_lpf_vertical_16_dual_dspr2, &vpx_lpf_vertical_16_dual_c,
+                   8)));
+
+INSTANTIATE_TEST_CASE_P(
+    DSPR2, Loop8Test9Param,
+    ::testing::Values(make_tuple(&vpx_lpf_horizontal_4_dual_dspr2,
+                                 &vpx_lpf_horizontal_4_dual_c, 8),
+                      make_tuple(&vpx_lpf_horizontal_8_dual_dspr2,
+                                 &vpx_lpf_horizontal_8_dual_c, 8),
+                      make_tuple(&vpx_lpf_vertical_4_dual_dspr2,
+                                 &vpx_lpf_vertical_4_dual_c, 8),
+                      make_tuple(&vpx_lpf_vertical_8_dual_dspr2,
+                                 &vpx_lpf_vertical_8_dual_c, 8)));
+#endif  // HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH
+
+#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
+INSTANTIATE_TEST_CASE_P(
+    MSA, Loop8Test6Param,
+    ::testing::Values(
+        make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8),
+        make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dual_msa,
+                   &vpx_lpf_horizontal_16_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8),
+        make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8),
+        make_tuple(&vpx_lpf_vertical_16_msa, &vpx_lpf_vertical_16_c, 8)));
+
+INSTANTIATE_TEST_CASE_P(
+    MSA, Loop8Test9Param,
+    ::testing::Values(make_tuple(&vpx_lpf_horizontal_4_dual_msa,
+                                 &vpx_lpf_horizontal_4_dual_c, 8),
+                      make_tuple(&vpx_lpf_horizontal_8_dual_msa,
+                                 &vpx_lpf_horizontal_8_dual_c, 8),
+                      make_tuple(&vpx_lpf_vertical_4_dual_msa,
+                                 &vpx_lpf_vertical_4_dual_c, 8),
+                      make_tuple(&vpx_lpf_vertical_8_dual_msa,
+                                 &vpx_lpf_vertical_8_dual_c, 8)));
+#endif  // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
+
+}  // namespace
--- a/test/md5_helper.h
+++ b/test/md5_helper.h
@@ -17,9 +17,7 @@
 namespace libvpx_test {
 class MD5 {
 public:
-  MD5() {
-    MD5Init(&md5_);
-  }
+  MD5() { MD5Init(&md5_); }

  void Add(const vpx_image_t *img) {
    for (int plane = 0; plane < 3; ++plane) {
@@ -30,10 +28,13 @@ class MD5 {
      // This works only for chroma_shift of 0 and 1.
      const int bytes_per_sample =
          (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
-      const int h = plane ? (img->d_h + img->y_chroma_shift) >>
-                    img->y_chroma_shift : img->d_h;
-      const int w = (plane ? (img->d_w + img->x_chroma_shift) >>
-                     img->x_chroma_shift : img->d_w) * bytes_per_sample;
+      const int h =
+          plane ? (img->d_h + img->y_chroma_shift) >> img->y_chroma_shift
+                : img->d_h;
+      const int w =
+          (plane ? (img->d_w + img->x_chroma_shift) >> img->x_chroma_shift
+                 : img->d_w) *
+          bytes_per_sample;

      for (int y = 0; y < h; ++y) {
        MD5Update(&md5_, buf, w);
@@ -56,8 +57,8 @@ class MD5 {

    MD5Final(tmp, &ctx_tmp);
    for (int i = 0; i < 16; i++) {
-      res_[i * 2 + 0]  = hex[tmp[i] >> 4];
-      res_[i * 2 + 1]  = hex[tmp[i] & 0xf];
+      res_[i * 2 + 0] = hex[tmp[i] >> 4];
+      res_[i * 2 + 1] = hex[tmp[i] & 0xf];
    }
    res_[32] = 0;

--- a/test/minmax_test.cc
+++ b/test/minmax_test.cc
@@ -0,0 +1,130 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+
+namespace {
+
+using ::libvpx_test::ACMRandom;
+
+typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride, const uint8_t *b,
+                           int b_stride, int *min, int *max);
+
+class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> {
+ public:
+  virtual void SetUp() {
+    mm_func_ = GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+ protected:
+  MinMaxFunc mm_func_;
+  ACMRandom rnd_;
+};
+
+void reference_minmax(const uint8_t *a, int a_stride, const uint8_t *b,
+                      int b_stride, int *min_ret, int *max_ret) {
+  int min = 255;
+  int max = 0;
+  for (int i = 0; i < 8; i++) {
+    for (int j = 0; j < 8; j++) {
+      const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]);
+      if (min > diff) min = diff;
+      if (max < diff) max = diff;
+    }
+  }
+
+  *min_ret = min;
+  *max_ret = max;
+}
+
+TEST_P(MinMaxTest, MinValue) {
+  for (int i = 0; i < 64; i++) {
+    uint8_t a[64], b[64];
+    memset(a, 0, sizeof(a));
+    memset(b, 255, sizeof(b));
+    b[i] = i;  // Set a minimum difference of i.
+
+    int min, max;
+    ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+    EXPECT_EQ(255, max);
+    EXPECT_EQ(i, min);
+  }
+}
+
+TEST_P(MinMaxTest, MaxValue) {
+  for (int i = 0; i < 64; i++) {
+    uint8_t a[64], b[64];
+    memset(a, 0, sizeof(a));
+    memset(b, 0, sizeof(b));
+    b[i] = i;  // Set a maximum difference of i.
+
+    int min, max;
+    ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+    EXPECT_EQ(i, max);
+    EXPECT_EQ(0, min);
+  }
+}
+
+TEST_P(MinMaxTest, CompareReference) {
+  uint8_t a[64], b[64];
+  for (int j = 0; j < 64; j++) {
+    a[j] = rnd_.Rand8();
+    b[j] = rnd_.Rand8();
+  }
+
+  int min_ref, max_ref, min, max;
+  reference_minmax(a, 8, b, 8, &min_ref, &max_ref);
+  ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+  EXPECT_EQ(max_ref, max);
+  EXPECT_EQ(min_ref, min);
+}
+
+TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
+  uint8_t a[8 * 64], b[8 * 64];
+  for (int i = 0; i < 8 * 64; i++) {
+    a[i] = rnd_.Rand8();
+    b[i] = rnd_.Rand8();
+  }
+  for (int a_stride = 8; a_stride <= 64; a_stride += 8) {
+    for (int b_stride = 8; b_stride <= 64; b_stride += 8) {
+      int min_ref, max_ref, min, max;
+      reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
+      ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
+      EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride
+                              << " and b_stride = " << b_stride;
+      EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride
+                              << " and b_stride = " << b_stride;
+    }
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest,
+                        ::testing::Values(&vpx_minmax_8x8_sse2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
+                        ::testing::Values(&vpx_minmax_8x8_neon));
+#endif
+
+}  // namespace
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -12,331 +12,681 @@
 #include <stdlib.h>
 #include <string.h>

+#include <limits>
+
 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/vpx_timer.h"

 using libvpx_test::ACMRandom;

 namespace {
+
 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef std::tr1::tuple<FwdTxfmFunc,
-                        InvTxfmFunc,
-                        InvTxfmFunc,
-                        TX_SIZE, int> PartialInvTxfmParam;
+typedef void (*InvTxfmWithBdFunc)(const tran_low_t *in, uint8_t *out,
+                                  int stride, int bd);
+
+template <InvTxfmFunc fn>
+void wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
+  (void)bd;
+  fn(in, out, stride);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+template <InvTxfmWithBdFunc fn>
+void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
+  fn(in, CONVERT_TO_BYTEPTR(out), stride, bd);
+}
+#endif
+
+typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmWithBdFunc, InvTxfmWithBdFunc,
+                        TX_SIZE, int, int, int>
+    PartialInvTxfmParam;
 const int kMaxNumCoeffs = 1024;
+const int kCountTestBlock = 1000;
+
+// https://bugs.chromium.org/p/webm/issues/detail?id=1332
+// The functions specified do not pass with INT16_MIN/MAX. They fail at the
+// value specified, but pass when 1 is added/subtracted.
+int16_t MaxSupportedCoeff(InvTxfmWithBdFunc a) {
+#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
+  if (a == &wrapper<vpx_idct8x8_64_add_ssse3> ||
+      a == &wrapper<vpx_idct8x8_12_add_ssse3>) {
+    return 23625 - 1;
+  }
+#else
+  (void)a;
+#endif
+  return std::numeric_limits<int16_t>::max();
+}
+
+int16_t MinSupportedCoeff(InvTxfmWithBdFunc a) {
+#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
+  if (a == &wrapper<vpx_idct8x8_64_add_ssse3> ||
+      a == &wrapper<vpx_idct8x8_12_add_ssse3>) {
+    return -23625 + 1;
+  }
+#else
+  (void)a;
+#endif
+  return std::numeric_limits<int16_t>::min();
+}
+
 class PartialIDctTest : public ::testing::TestWithParam<PartialInvTxfmParam> {
 public:
  virtual ~PartialIDctTest() {}
  virtual void SetUp() {
+    rnd_.Reset(ACMRandom::DeterministicSeed());
    ftxfm_ = GET_PARAM(0);
    full_itxfm_ = GET_PARAM(1);
    partial_itxfm_ = GET_PARAM(2);
-    tx_size_  = GET_PARAM(3);
+    tx_size_ = GET_PARAM(3);
    last_nonzero_ = GET_PARAM(4);
+    bit_depth_ = GET_PARAM(5);
+    pixel_size_ = GET_PARAM(6);
+    mask_ = (1 << bit_depth_) - 1;
+
+    switch (tx_size_) {
+      case TX_4X4: size_ = 4; break;
+      case TX_8X8: size_ = 8; break;
+      case TX_16X16: size_ = 16; break;
+      case TX_32X32: size_ = 32; break;
+      default: FAIL() << "Wrong Size!"; break;
+    }
+
+    // Randomize stride_ to a value less than or equal to 1024
+    stride_ = rnd_(1024) + 1;
+    if (stride_ < size_) {
+      stride_ = size_;
+    }
+    // Align stride_ to 16 if it's bigger than 16.
+    if (stride_ > 16) {
+      stride_ &= ~15;
+    }
+
+    input_block_size_ = size_ * size_;
+    output_block_size_ = size_ * stride_;
+
+    input_block_ = reinterpret_cast<tran_low_t *>(
+        vpx_memalign(16, sizeof(*input_block_) * input_block_size_));
+    output_block_ = reinterpret_cast<uint8_t *>(
+        vpx_memalign(16, pixel_size_ * output_block_size_));
+    output_block_ref_ = reinterpret_cast<uint8_t *>(
+        vpx_memalign(16, pixel_size_ * output_block_size_));
  }

-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+  virtual void TearDown() {
+    vpx_free(input_block_);
+    input_block_ = NULL;
+    vpx_free(output_block_);
+    output_block_ = NULL;
+    vpx_free(output_block_ref_);
+    output_block_ref_ = NULL;
+    libvpx_test::ClearSystemState();
+  }
+
+  void InitMem() {
+    memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
+    if (pixel_size_ == 1) {
+      for (int j = 0; j < output_block_size_; ++j) {
+        output_block_[j] = output_block_ref_[j] = rnd_.Rand16() & mask_;
+      }
+    } else {
+      ASSERT_EQ(2, pixel_size_);
+      uint16_t *const output = reinterpret_cast<uint16_t *>(output_block_);
+      uint16_t *const output_ref =
+          reinterpret_cast<uint16_t *>(output_block_ref_);
+      for (int j = 0; j < output_block_size_; ++j) {
+        output[j] = output_ref[j] = rnd_.Rand16() & mask_;
+      }
+    }
+  }
+
+  void InitInput() {
+    const int max_coeff = 32766 / 4;
+    int max_energy_leftover = max_coeff * max_coeff;
+    for (int j = 0; j < last_nonzero_; ++j) {
+      int16_t coeff = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
+                                           (rnd_.Rand16() - 32768) / 65536);
+      max_energy_leftover -= coeff * coeff;
+      if (max_energy_leftover < 0) {
+        max_energy_leftover = 0;
+        coeff = 0;
+      }
+      input_block_[vp9_default_scan_orders[tx_size_].scan[j]] = coeff;
+    }
+  }

 protected:
  int last_nonzero_;
  TX_SIZE tx_size_;
+  tran_low_t *input_block_;
+  uint8_t *output_block_;
+  uint8_t *output_block_ref_;
+  int size_;
+  int stride_;
+  int pixel_size_;
+  int input_block_size_;
+  int output_block_size_;
+  int bit_depth_;
+  int mask_;
  FwdTxfmFunc ftxfm_;
-  InvTxfmFunc full_itxfm_;
-  InvTxfmFunc partial_itxfm_;
+  InvTxfmWithBdFunc full_itxfm_;
+  InvTxfmWithBdFunc partial_itxfm_;
+  ACMRandom rnd_;
 };

 TEST_P(PartialIDctTest, RunQuantCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int size;
-  switch (tx_size_) {
-    case TX_4X4:
-      size = 4;
-      break;
-    case TX_8X8:
-      size = 8;
-      break;
-    case TX_16X16:
-      size = 16;
-      break;
-    case TX_32X32:
-      size = 32;
-      break;
-    default:
-      FAIL() << "Wrong Size!";
-      break;
-  }
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
-
-  const int count_test_block = 1000;
-  const int block_size = size * size;
-
  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);

-  int max_error = 0;
-  for (int i = 0; i < count_test_block; ++i) {
-    // clear out destination buffer
-    memset(dst1, 0, sizeof(*dst1) * block_size);
-    memset(dst2, 0, sizeof(*dst2) * block_size);
-    memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
-    memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
-
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-255, 255].
-      if (i == 0) {
-        for (int j = 0; j < block_size; ++j)
-          input_extreme_block[j] = 255;
-      } else if (i == 1) {
-        for (int j = 0; j < block_size; ++j)
-          input_extreme_block[j] = -255;
-      } else {
-        for (int j = 0; j < block_size; ++j) {
-          input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
-        }
+  InitMem();
+  for (int i = 0; i < kCountTestBlock * kCountTestBlock; ++i) {
+    // Initialize a test block with input range [-mask_, mask_].
+    if (i == 0) {
+      for (int k = 0; k < input_block_size_; ++k) {
+        input_extreme_block[k] = mask_;
+      }
+    } else if (i == 1) {
+      for (int k = 0; k < input_block_size_; ++k) {
+        input_extreme_block[k] = -mask_;
+      }
+    } else {
+      for (int k = 0; k < input_block_size_; ++k) {
+        input_extreme_block[k] = rnd_.Rand8() % 2 ? mask_ : -mask_;
      }
-
-      ftxfm_(input_extreme_block, output_ref_block, size);
-
-      // quantization with maximum allowed step sizes
-      test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336;
-      for (int j = 1; j < last_nonzero_; ++j)
-        test_coef_block1[vp9_default_scan_orders[tx_size_].scan[j]]
-                         = (output_ref_block[j] / 1828) * 1828;
    }

-    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
-    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
+    ftxfm_(input_extreme_block, output_ref_block, size_);

-    for (int j = 0; j < block_size; ++j) {
-      const int diff = dst1[j] - dst2[j];
-      const int error = diff * diff;
-      if (max_error < error)
-        max_error = error;
+    // quantization with minimum allowed step sizes
+    input_block_[0] = (output_ref_block[0] / 4) * 4;
+    for (int k = 1; k < last_nonzero_; ++k) {
+      const int pos = vp9_default_scan_orders[tx_size_].scan[k];
+      input_block_[pos] = (output_ref_block[pos] / 4) * 4;
    }
+
+    ASM_REGISTER_STATE_CHECK(
+        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+    ASM_REGISTER_STATE_CHECK(
+        partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
+    ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                        pixel_size_ * output_block_size_))
+        << "Error: partial inverse transform produces different results";
  }
-
-  EXPECT_EQ(0, max_error)
-      << "Error: partial inverse transform produces different results";
 }

 TEST_P(PartialIDctTest, ResultsMatch) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int size;
-  switch (tx_size_) {
-    case TX_4X4:
-      size = 4;
-      break;
-    case TX_8X8:
-      size = 8;
-      break;
-    case TX_16X16:
-      size = 16;
-      break;
-    case TX_32X32:
-      size = 32;
-      break;
-    default:
-      FAIL() << "Wrong Size!";
-      break;
+  for (int i = 0; i < kCountTestBlock; ++i) {
+    InitMem();
+    InitInput();
+
+    ASM_REGISTER_STATE_CHECK(
+        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+    ASM_REGISTER_STATE_CHECK(
+        partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
+    ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                        pixel_size_ * output_block_size_))
+        << "Error: partial inverse transform produces different results";
  }
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
-  const int count_test_block = 1000;
-  const int max_coeff = 32766 / 4;
-  const int block_size = size * size;
-  int max_error = 0;
-  for (int i = 0; i < count_test_block; ++i) {
-    // clear out destination buffer
-    memset(dst1, 0, sizeof(*dst1) * block_size);
-    memset(dst2, 0, sizeof(*dst2) * block_size);
-    memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
-    memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
-    int max_energy_leftover = max_coeff * max_coeff;
+}
+
+TEST_P(PartialIDctTest, AddOutputBlock) {
+  for (int i = 0; i < kCountTestBlock; ++i) {
+    InitMem();
    for (int j = 0; j < last_nonzero_; ++j) {
-      int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
-                                          (rnd.Rand16() - 32768) / 65536);
-      max_energy_leftover -= coef * coef;
-      if (max_energy_leftover < 0) {
-        max_energy_leftover = 0;
-        coef = 0;
-      }
-      test_coef_block1[vp9_default_scan_orders[tx_size_].scan[j]] = coef;
+      input_block_[vp9_default_scan_orders[tx_size_].scan[j]] = 10;
    }

-    memcpy(test_coef_block2, test_coef_block1,
-           sizeof(*test_coef_block2) * block_size);
+    ASM_REGISTER_STATE_CHECK(
+        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+    ASM_REGISTER_STATE_CHECK(
+        partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
+    ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                        pixel_size_ * output_block_size_))
+        << "Error: Transform results are not correctly added to output.";
+  }
+}

-    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
-    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
+TEST_P(PartialIDctTest, SingleExtremeCoeff) {
+  const int16_t max_coeff = MaxSupportedCoeff(partial_itxfm_);
+  const int16_t min_coeff = MinSupportedCoeff(partial_itxfm_);
+  for (int i = 0; i < last_nonzero_; ++i) {
+    memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
+    // Run once for min and once for max.
+    for (int j = 0; j < 2; ++j) {
+      const int coeff = j ? min_coeff : max_coeff;

-    for (int j = 0; j < block_size; ++j) {
-      const int diff = dst1[j] - dst2[j];
-      const int error = diff * diff;
-      if (max_error < error)
-        max_error = error;
+      memset(output_block_, 0, pixel_size_ * output_block_size_);
+      memset(output_block_ref_, 0, pixel_size_ * output_block_size_);
+      input_block_[vp9_default_scan_orders[tx_size_].scan[i]] = coeff;
+
+      ASM_REGISTER_STATE_CHECK(
+          full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+      ASM_REGISTER_STATE_CHECK(
+          partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
+      ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                          pixel_size_ * output_block_size_))
+          << "Error: Fails with single coeff of " << coeff << " at " << i
+          << ".";
    }
  }
+}

-  EXPECT_EQ(0, max_error)
+TEST_P(PartialIDctTest, DISABLED_Speed) {
+  // Keep runtime stable with transform size.
+  const int kCountSpeedTestBlock = 500000000 / input_block_size_;
+  InitMem();
+  InitInput();
+
+  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
+    ASM_REGISTER_STATE_CHECK(
+        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+  }
+  vpx_usec_timer timer;
+  vpx_usec_timer_start(&timer);
+  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
+    partial_itxfm_(input_block_, output_block_, stride_, bit_depth_);
+  }
+  libvpx_test::ClearSystemState();
+  vpx_usec_timer_mark(&timer);
+  const int elapsed_time =
+      static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
+  printf("idct%dx%d_%d (bitdepth %d) time: %5d ms ", size_, size_,
+         last_nonzero_, bit_depth_, elapsed_time);
+
+  ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                      pixel_size_ * output_block_size_))
      << "Error: partial inverse transform produces different results";
 }
+
 using std::tr1::make_tuple;

-INSTANTIATE_TEST_CASE_P(
-    C, PartialIDctTest,
-    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_34_add_c,
-                   TX_32X32, 34),
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_1_add_c,
-                   TX_32X32, 1),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_c,
-                   TX_16X16, 10),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_1_add_c,
-                   TX_16X16, 1),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_c,
-                   TX_8X8, 12),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_1_add_c,
-                   TX_8X8, 1),
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_c,
-                   &vp9_idct4x4_1_add_c,
-                   TX_4X4, 1)));
+const PartialInvTxfmParam c_partial_idct_tests[] = {
+#if CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 12, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 12, 2),
+  make_tuple(&vpx_highbd_fdct32x32_c,
+             &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+             &highbd_wrapper<vpx_highbd_idct32x32_1_add_c>, TX_32X32, 1, 8, 2),
+  make_tuple(&vpx_highbd_fdct32x32_c,
+             &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+             &highbd_wrapper<vpx_highbd_idct32x32_1_add_c>, TX_32X32, 1, 10, 2),
+  make_tuple(&vpx_highbd_fdct32x32_c,
+             &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+             &highbd_wrapper<vpx_highbd_idct32x32_1_add_c>, TX_32X32, 1, 12, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, TX_16X16, 256, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, TX_16X16, 256, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, TX_16X16, 256, 12, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, TX_16X16, 10, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, TX_16X16, 10, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, TX_16X16, 10, 12, 2),
+  make_tuple(&vpx_highbd_fdct16x16_c,
+             &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+             &highbd_wrapper<vpx_highbd_idct16x16_1_add_c>, TX_16X16, 1, 8, 2),
+  make_tuple(&vpx_highbd_fdct16x16_c,
+             &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+             &highbd_wrapper<vpx_highbd_idct16x16_1_add_c>, TX_16X16, 1, 10, 2),
+  make_tuple(&vpx_highbd_fdct16x16_c,
+             &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+             &highbd_wrapper<vpx_highbd_idct16x16_1_add_c>, TX_16X16, 1, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>, TX_8X8, 64, 8, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>, TX_8X8, 64, 10, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>, TX_8X8, 64, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>, TX_8X8, 12, 8, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>, TX_8X8, 12, 10, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>, TX_8X8, 12, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>, TX_8X8, 1, 8, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>, TX_8X8, 1, 10, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>, TX_8X8, 1, 12, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>, TX_4X4, 16, 8, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>, TX_4X4, 16, 10, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>, TX_4X4, 16, 12, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>, TX_4X4, 1, 8, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>, TX_4X4, 1, 10, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>, TX_4X4, 1, 12, 2),
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_c>, TX_32X32, 1024, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_135_add_c>, TX_32X32, 135, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_34_add_c>, TX_32X32, 34, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1_add_c>, TX_32X32, 1, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_256_add_c>, TX_16X16, 256, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_10_add_c>, TX_16X16, 10, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_1_add_c>, TX_16X16, 1, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_64_add_c>, TX_8X8, 64, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_12_add_c>, TX_8X8, 12, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_1_add_c>, TX_8X8, 1, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_16_add_c>, TX_4X4, 16, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_1_add_c>, TX_4X4, 1, 8, 1)
+};

-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    NEON, PartialIDctTest,
-    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_1_add_neon,
-                   TX_32X32, 1),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_neon,
-                   TX_16X16, 10),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_1_add_neon,
-                   TX_16X16, 1),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_neon,
-                   TX_8X8, 12),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_1_add_neon,
-                   TX_8X8, 1),
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_c,
-                   &vp9_idct4x4_1_add_neon,
-                   TX_4X4, 1)));
-#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(C, PartialIDctTest,
+                        ::testing::ValuesIn(c_partial_idct_tests));

-#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    SSE2, PartialIDctTest,
-    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_34_add_sse2,
-                   TX_32X32, 34),
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_1_add_sse2,
-                   TX_32X32, 1),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_sse2,
-                   TX_16X16, 10),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_1_add_sse2,
-                   TX_16X16, 1),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_sse2,
-                   TX_8X8, 12),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_1_add_sse2,
-                   TX_8X8, 1),
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_c,
-                   &vp9_idct4x4_1_add_sse2,
-                   TX_4X4, 1)));
-#endif
+#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+const PartialInvTxfmParam neon_partial_idct_tests[] = {
+#if CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_neon>, TX_8X8, 64, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_64_add_neon>, TX_8X8, 64, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_64_add_neon>, TX_8X8, 64, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_12_add_neon>, TX_8X8, 12, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_12_add_neon>, TX_8X8, 12, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_12_add_neon>, TX_8X8, 12, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_1_add_neon>, TX_8X8, 1, 8, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_1_add_neon>, TX_8X8, 1, 10, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_1_add_neon>, TX_8X8, 1, 12, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_neon>, TX_4X4, 16, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+      &highbd_wrapper<vpx_highbd_idct4x4_16_add_neon>, TX_4X4, 16, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+      &highbd_wrapper<vpx_highbd_idct4x4_16_add_neon>, TX_4X4, 16, 12, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_1_add_neon>, TX_4X4, 1, 8, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_1_add_neon>, TX_4X4, 1, 10, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_1_add_neon>, TX_4X4, 1, 12, 2),
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_neon>, TX_32X32, 1024, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_135_add_neon>, TX_32X32, 135, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_34_add_neon>, TX_32X32, 34, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1_add_neon>, TX_32X32, 1, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_256_add_neon>, TX_16X16, 256, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_10_add_neon>, TX_16X16, 10, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_1_add_neon>, TX_16X16, 1, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_64_add_neon>, TX_8X8, 64, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_12_add_neon>, TX_8X8, 12, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_1_add_neon>, TX_8X8, 1, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_16_add_neon>, TX_4X4, 16, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_1_add_neon>, TX_4X4, 1, 8, 1)
+};

-#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
-    !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    SSSE3_64, PartialIDctTest,
-    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_ssse3,
-                   TX_8X8, 12)));
-#endif
+INSTANTIATE_TEST_CASE_P(NEON, PartialIDctTest,
+                        ::testing::ValuesIn(neon_partial_idct_tests));
+#endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE

-#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    MSA, PartialIDctTest,
-    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_34_add_msa,
-                   TX_32X32, 34),
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_1_add_msa,
-                   TX_32X32, 1),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_msa,
-                   TX_16X16, 10),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_1_add_msa,
-                   TX_16X16, 1),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_msa,
-                   TX_8X8, 10),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_1_add_msa,
-                   TX_8X8, 1),
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_c,
-                   &vp9_idct4x4_1_add_msa,
-                   TX_4X4, 1)));
-#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
+// 32x32_135_ is implemented using the 1024 version.
+const PartialInvTxfmParam sse2_partial_idct_tests[] = {
+#if CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_1_add_sse2>, TX_32X32, 1, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_1_add_sse2>, TX_32X32, 1, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_1_add_sse2>, TX_32X32, 1, 12, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 12, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_sse2>, TX_8X8, 64, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_64_add_sse2>, TX_8X8, 64, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_64_add_sse2>, TX_8X8, 64, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_12_add_sse2>, TX_8X8, 12, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_12_add_sse2>, TX_8X8, 12, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_12_add_sse2>, TX_8X8, 12, 12, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+      &highbd_wrapper<vpx_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+      &highbd_wrapper<vpx_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 12, 2),
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_sse2>, TX_32X32, 1024, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_sse2>, TX_32X32, 135, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_34_add_sse2>, TX_32X32, 34, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1_add_sse2>, TX_32X32, 1, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_1_add_sse2>, TX_16X16, 1, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_64_add_sse2>, TX_8X8, 64, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_12_add_sse2>, TX_8X8, 12, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_1_add_sse2>, TX_8X8, 1, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_1_add_sse2>, TX_4X4, 1, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2, PartialIDctTest,
+                        ::testing::ValuesIn(sse2_partial_idct_tests));
+
+#endif  // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
+const PartialInvTxfmParam ssse3_partial_idct_tests[] = {
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_ssse3>, TX_32X32, 1024, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_135_add_ssse3>, TX_32X32, 135, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_34_add_ssse3>, TX_32X32, 34, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_64_add_ssse3>, TX_8X8, 64, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_12_add_ssse3>, TX_8X8, 12, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(SSSE3, PartialIDctTest,
+                        ::testing::ValuesIn(ssse3_partial_idct_tests));
+#endif  // HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_DSPR2 && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
+const PartialInvTxfmParam dspr2_partial_idct_tests[] = {
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_dspr2>, TX_32X32, 1024, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_dspr2>, TX_32X32, 135, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_34_add_dspr2>, TX_32X32, 34, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1_add_dspr2>, TX_32X32, 1, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_256_add_dspr2>, TX_16X16, 256, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_10_add_dspr2>, TX_16X16, 10, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_1_add_dspr2>, TX_16X16, 1, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_64_add_dspr2>, TX_8X8, 64, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_12_add_dspr2>, TX_8X8, 12, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_1_add_dspr2>, TX_8X8, 1, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_16_add_dspr2>, TX_4X4, 16, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_1_add_dspr2>, TX_4X4, 1, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(DSPR2, PartialIDctTest,
+                        ::testing::ValuesIn(dspr2_partial_idct_tests));
+#endif  // HAVE_DSPR2 && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
+
+#if HAVE_MSA && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
+// 32x32_135_ is implemented using the 1024 version.
+const PartialInvTxfmParam msa_partial_idct_tests[] = {
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_msa>, TX_32X32, 1024, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_msa>, TX_32X32, 135, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_34_add_msa>, TX_32X32, 34, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1_add_msa>, TX_32X32, 1, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_256_add_msa>, TX_16X16, 256, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_10_add_msa>, TX_16X16, 10, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_1_add_msa>, TX_16X16, 1, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_64_add_msa>, TX_8X8, 64, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_12_add_msa>, TX_8X8, 12, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_1_add_msa>, TX_8X8, 1, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_16_add_msa>, TX_4X4, 16, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_1_add_msa>, TX_4X4, 1, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(MSA, PartialIDctTest,
+                        ::testing::ValuesIn(msa_partial_idct_tests));
+#endif  // HAVE_MSA && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH

 }  // namespace
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -7,38 +7,50 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
+#include <limits.h>
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"

-typedef void (*PostProcFunc)(unsigned char *src_ptr,
-                             unsigned char *dst_ptr,
-                             int src_pixels_per_line,
-                             int dst_pixels_per_line,
-                             int cols,
-                             unsigned char *flimit,
-                             int size);
+using libvpx_test::ACMRandom;
+
+typedef void (*VpxPostProcDownAndAcrossMbRowFunc)(
+    unsigned char *src_ptr, unsigned char *dst_ptr, int src_pixels_per_line,
+    int dst_pixels_per_line, int cols, unsigned char *flimit, int size);
+
+typedef void (*VpxMbPostProcAcrossIpFunc)(unsigned char *src, int pitch,
+                                          int rows, int cols, int flimit);
+
+typedef void (*VpxMbPostProcDownFunc)(unsigned char *dst, int pitch, int rows,
+                                      int cols, int flimit);

 namespace {

-class VP8PostProcessingFilterTest
-    : public ::testing::TestWithParam<PostProcFunc> {
+// Compute the filter level used in post proc from the loop filter strength
+int q2mbl(int x) {
+  if (x < 20) x = 20;
+
+  x = 50 + (x - 50) * 10 / 8;
+  return x * x / 3;
+}
+
+class VpxPostProcDownAndAcrossMbRowTest
+    : public ::testing::TestWithParam<VpxPostProcDownAndAcrossMbRowFunc> {
 public:
-  virtual void TearDown() {
-    libvpx_test::ClearSystemState();
-  }
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
 };

-// Test routine for the VP8 post-processing function
-// vp8_post_proc_down_and_across_mb_row_c.
+// Test routine for the VPx post-processing function
+// vpx_post_proc_down_and_across_mb_row_c.

-TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
+TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckFilterOutput) {
  // Size of the underlying data block that will be filtered.
-  const int block_width  = 16;
+  const int block_width = 16;
  const int block_height = 16;

  // 5-tap filter needs 2 padding rows above and below the block in the input.
@@ -53,14 +65,20 @@ TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
  const int output_stride = output_width;
  const int output_size = output_width * output_height;

-  uint8_t *const src_image =
-      reinterpret_cast<uint8_t*>(vpx_calloc(input_size, 1));
-  uint8_t *const dst_image =
-      reinterpret_cast<uint8_t*>(vpx_calloc(output_size, 1));
+  uint8_t *const src_image = new uint8_t[input_size];
+  ASSERT_TRUE(src_image != NULL);
+
+  // Though the left padding is only 8 bytes, the assembly code tries to
+  // read 16 bytes before the pointer.
+  uint8_t *const dst_image = new uint8_t[output_size + 8];
+  ASSERT_TRUE(dst_image != NULL);

  // Pointers to top-left pixel of block in the input and output images.
  uint8_t *const src_image_ptr = src_image + (input_stride << 1);
-  uint8_t *const dst_image_ptr = dst_image + 8;
+
+  // The assembly works in increments of 16. The first read may be offset by
+  // this amount.
+  uint8_t *const dst_image_ptr = dst_image + 16;
  uint8_t *const flimits =
      reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
  (void)memset(flimits, 255, block_width);
@@ -80,34 +98,518 @@ TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
  // Initialize pixels in the output to 99.
  (void)memset(dst_image, 99, output_size);

-  ASM_REGISTER_STATE_CHECK(
-      GetParam()(src_image_ptr, dst_image_ptr, input_stride,
-                 output_stride, block_width, flimits, 16));
+  ASM_REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr,
+                                      input_stride, output_stride, block_width,
+                                      flimits, 16));

-  static const uint8_t expected_data[block_height] = {
+  static const uint8_t kExpectedOutput[block_height] = {
    4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4
  };

  pixel_ptr = dst_image_ptr;
  for (int i = 0; i < block_height; ++i) {
    for (int j = 0; j < block_width; ++j) {
-      EXPECT_EQ(expected_data[i], pixel_ptr[j])
-          << "VP8PostProcessingFilterTest failed with invalid filter output";
+      ASSERT_EQ(kExpectedOutput[i], pixel_ptr[j]) << "at (" << i << ", " << j
+                                                  << ")";
    }
    pixel_ptr += output_stride;
  }

-  vpx_free(src_image);
-  vpx_free(dst_image);
+  delete[] src_image;
+  delete[] dst_image;
  vpx_free(flimits);
 };

-INSTANTIATE_TEST_CASE_P(C, VP8PostProcessingFilterTest,
-    ::testing::Values(vp8_post_proc_down_and_across_mb_row_c));
+TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) {
+  // Size of the underlying data block that will be filtered.
+  // Y blocks are always a multiple of 16 wide and exactly 16 high. U and V
+  // blocks are always a multiple of 8 wide and exactly 8 high.
+  const int block_width = 136;
+  const int block_height = 16;
+
+  // 5-tap filter needs 2 padding rows above and below the block in the input.
+  // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16.
+  const int input_width = block_width;
+  const int input_height = block_height + 4 + 8;
+  const int input_stride = input_width;
+  const int input_size = input_stride * input_height;
+
+  // Filter extends output block by 8 samples at left and right edges.
+  // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16.
+  const int output_width = block_width + 24;
+  const int output_height = block_height;
+  const int output_stride = output_width;
+  const int output_size = output_stride * output_height;
+
+  uint8_t *const src_image = new uint8_t[input_size];
+  ASSERT_TRUE(src_image != NULL);
+
+  // Though the left padding is only 8 bytes, the assembly code tries to
+  // read 16 bytes before the pointer.
+  uint8_t *const dst_image = new uint8_t[output_size + 8];
+  ASSERT_TRUE(dst_image != NULL);
+  uint8_t *const dst_image_ref = new uint8_t[output_size + 8];
+  ASSERT_TRUE(dst_image_ref != NULL);
+
+  // Pointers to top-left pixel of block in the input and output images.
+  uint8_t *const src_image_ptr = src_image + (input_stride << 1);
+
+  // The assembly works in increments of 16. The first read may be offset by
+  // this amount.
+  uint8_t *const dst_image_ptr = dst_image + 16;
+  uint8_t *const dst_image_ref_ptr = dst_image + 16;
+
+  // Filter values are set in blocks of 16 for Y and 8 for U/V. Each macroblock
+  // can have a different filter. SSE2 assembly reads flimits in blocks of 16 so
+  // it must be padded out.
+  const int flimits_width = block_width % 16 ? block_width + 8 : block_width;
+  uint8_t *const flimits =
+      reinterpret_cast<uint8_t *>(vpx_memalign(16, flimits_width));
+
+  ACMRandom rnd;
+  rnd.Reset(ACMRandom::DeterministicSeed());
+  // Initialize pixels in the input:
+  //   block pixels to random values.
+  //   border pixels to value 10.
+  (void)memset(src_image, 10, input_size);
+  uint8_t *pixel_ptr = src_image_ptr;
+  for (int i = 0; i < block_height; ++i) {
+    for (int j = 0; j < block_width; ++j) {
+      pixel_ptr[j] = rnd.Rand8();
+    }
+    pixel_ptr += input_stride;
+  }
+
+  for (int blocks = 0; blocks < block_width; blocks += 8) {
+    (void)memset(flimits, 0, sizeof(*flimits) * flimits_width);
+
+    for (int f = 0; f < 255; f++) {
+      (void)memset(flimits + blocks, f, sizeof(*flimits) * 8);
+
+      (void)memset(dst_image, 0, output_size);
+      (void)memset(dst_image_ref, 0, output_size);
+
+      vpx_post_proc_down_and_across_mb_row_c(
+          src_image_ptr, dst_image_ref_ptr, input_stride, output_stride,
+          block_width, flimits, block_height);
+      ASM_REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr,
+                                          input_stride, output_stride,
+                                          block_width, flimits, 16));
+
+      for (int i = 0; i < block_height; ++i) {
+        for (int j = 0; j < block_width; ++j) {
+          ASSERT_EQ(dst_image_ref_ptr[j + i * output_stride],
+                    dst_image_ptr[j + i * output_stride])
+              << "at (" << i << ", " << j << ")";
+        }
+      }
+    }
+  }
+
+  delete[] src_image;
+  delete[] dst_image;
+  delete[] dst_image_ref;
+  vpx_free(flimits);
+}
+
+class VpxMbPostProcAcrossIpTest
+    : public ::testing::TestWithParam<VpxMbPostProcAcrossIpFunc> {
+ public:
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void SetCols(unsigned char *s, int rows, int cols, int src_width) {
+    for (int r = 0; r < rows; r++) {
+      for (int c = 0; c < cols; c++) {
+        s[c] = c;
+      }
+      s += src_width;
+    }
+  }
+
+  void RunComparison(const unsigned char *expected_output, unsigned char *src_c,
+                     int rows, int cols, int src_pitch) {
+    for (int r = 0; r < rows; r++) {
+      for (int c = 0; c < cols; c++) {
+        ASSERT_EQ(expected_output[c], src_c[c]) << "at (" << r << ", " << c
+                                                << ")";
+      }
+      src_c += src_pitch;
+    }
+  }
+
+  void RunFilterLevel(unsigned char *s, int rows, int cols, int src_width,
+                      int filter_level, const unsigned char *expected_output) {
+    ASM_REGISTER_STATE_CHECK(
+        GetParam()(s, src_width, rows, cols, filter_level));
+    RunComparison(expected_output, s, rows, cols, src_width);
+  }
+};
+
+TEST_P(VpxMbPostProcAcrossIpTest, CheckLowFilterOutput) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_left_padding = 8;
+  const int src_right_padding = 17;
+  const int src_width = cols + src_left_padding + src_right_padding;
+  const int src_size = rows * src_width;
+
+  unsigned char *const src = new unsigned char[src_size];
+  ASSERT_TRUE(src != NULL);
+  memset(src, 10, src_size);
+  unsigned char *const s = src + src_left_padding;
+  SetCols(s, rows, cols, src_width);
+
+  unsigned char *expected_output = new unsigned char[rows * cols];
+  ASSERT_TRUE(expected_output != NULL);
+  SetCols(expected_output, rows, cols, cols);
+
+  RunFilterLevel(s, rows, cols, src_width, q2mbl(0), expected_output);
+  delete[] src;
+  delete[] expected_output;
+}
+
+TEST_P(VpxMbPostProcAcrossIpTest, CheckMediumFilterOutput) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_left_padding = 8;
+  const int src_right_padding = 17;
+  const int src_width = cols + src_left_padding + src_right_padding;
+  const int src_size = rows * src_width;
+
+  unsigned char *const src = new unsigned char[src_size];
+  ASSERT_TRUE(src != NULL);
+  memset(src, 10, src_size);
+  unsigned char *const s = src + src_left_padding;
+
+  SetCols(s, rows, cols, src_width);
+  static const unsigned char kExpectedOutput[cols] = {
+    2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13
+  };
+
+  RunFilterLevel(s, rows, cols, src_width, q2mbl(70), kExpectedOutput);
+
+  delete[] src;
+}
+
+TEST_P(VpxMbPostProcAcrossIpTest, CheckHighFilterOutput) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_left_padding = 8;
+  const int src_right_padding = 17;
+  const int src_width = cols + src_left_padding + src_right_padding;
+  const int src_size = rows * src_width;
+
+  unsigned char *const src = new unsigned char[src_size];
+  ASSERT_TRUE(src != NULL);
+  unsigned char *const s = src + src_left_padding;
+
+  memset(src, 10, src_size);
+  SetCols(s, rows, cols, src_width);
+  static const unsigned char kExpectedOutput[cols] = {
+    2, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13
+  };
+
+  RunFilterLevel(s, rows, cols, src_width, INT_MAX, kExpectedOutput);
+
+  memset(src, 10, src_size);
+  SetCols(s, rows, cols, src_width);
+  RunFilterLevel(s, rows, cols, src_width, q2mbl(100), kExpectedOutput);
+
+  delete[] src;
+}
+
+TEST_P(VpxMbPostProcAcrossIpTest, CheckCvsAssembly) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_left_padding = 8;
+  const int src_right_padding = 17;
+  const int src_width = cols + src_left_padding + src_right_padding;
+  const int src_size = rows * src_width;
+
+  unsigned char *const c_mem = new unsigned char[src_size];
+  unsigned char *const asm_mem = new unsigned char[src_size];
+  ASSERT_TRUE(c_mem != NULL);
+  ASSERT_TRUE(asm_mem != NULL);
+  unsigned char *const src_c = c_mem + src_left_padding;
+  unsigned char *const src_asm = asm_mem + src_left_padding;
+
+  // When level >= 100, the filter behaves the same as the level = INT_MAX
+  // When level < 20, it behaves the same as the level = 0
+  for (int level = 0; level < 100; level++) {
+    memset(c_mem, 10, src_size);
+    memset(asm_mem, 10, src_size);
+    SetCols(src_c, rows, cols, src_width);
+    SetCols(src_asm, rows, cols, src_width);
+
+    vpx_mbpost_proc_across_ip_c(src_c, src_width, rows, cols, q2mbl(level));
+    ASM_REGISTER_STATE_CHECK(
+        GetParam()(src_asm, src_width, rows, cols, q2mbl(level)));
+
+    RunComparison(src_c, src_asm, rows, cols, src_width);
+  }
+
+  delete[] c_mem;
+  delete[] asm_mem;
+}
+
+class VpxMbPostProcDownTest
+    : public ::testing::TestWithParam<VpxMbPostProcDownFunc> {
+ public:
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void SetRows(unsigned char *src_c, int rows, int cols) {
+    for (int r = 0; r < rows; r++) {
+      memset(src_c, r, cols);
+      src_c += cols;
+    }
+  }
+
+  void SetRandom(unsigned char *src_c, unsigned char *src_asm, int rows,
+                 int cols, int src_pitch) {
+    ACMRandom rnd;
+    rnd.Reset(ACMRandom::DeterministicSeed());
+
+    // Add some random noise to the input
+    for (int r = 0; r < rows; r++) {
+      for (int c = 0; c < cols; c++) {
+        const int noise = rnd(4);
+        src_c[c] = r + noise;
+        src_asm[c] = r + noise;
+      }
+      src_c += src_pitch;
+      src_asm += src_pitch;
+    }
+  }
+
+  void SetRandomSaturation(unsigned char *src_c, unsigned char *src_asm,
+                           int rows, int cols, int src_pitch) {
+    ACMRandom rnd;
+    rnd.Reset(ACMRandom::DeterministicSeed());
+
+    // Add some random noise to the input
+    for (int r = 0; r < rows; r++) {
+      for (int c = 0; c < cols; c++) {
+        const int noise = 3 * rnd(2);
+        src_c[c] = r + noise;
+        src_asm[c] = r + noise;
+      }
+      src_c += src_pitch;
+      src_asm += src_pitch;
+    }
+  }
+
+  void RunComparison(const unsigned char *expected_output, unsigned char *src_c,
+                     int rows, int cols, int src_pitch) {
+    for (int r = 0; r < rows; r++) {
+      for (int c = 0; c < cols; c++) {
+        ASSERT_EQ(expected_output[r * rows + c], src_c[c]) << "at (" << r
+                                                           << ", " << c << ")";
+      }
+      src_c += src_pitch;
+    }
+  }
+
+  void RunComparison(unsigned char *src_c, unsigned char *src_asm, int rows,
+                     int cols, int src_pitch) {
+    for (int r = 0; r < rows; r++) {
+      for (int c = 0; c < cols; c++) {
+        ASSERT_EQ(src_c[c], src_asm[c]) << "at (" << r << ", " << c << ")";
+      }
+      src_c += src_pitch;
+      src_asm += src_pitch;
+    }
+  }
+
+  void RunFilterLevel(unsigned char *s, int rows, int cols, int src_width,
+                      int filter_level, const unsigned char *expected_output) {
+    ASM_REGISTER_STATE_CHECK(
+        GetParam()(s, src_width, rows, cols, filter_level));
+    RunComparison(expected_output, s, rows, cols, src_width);
+  }
+};
+
+TEST_P(VpxMbPostProcDownTest, CheckHighFilterOutput) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_pitch = cols;
+  const int src_top_padding = 8;
+  const int src_bottom_padding = 17;
+
+  const int src_size = cols * (rows + src_top_padding + src_bottom_padding);
+  unsigned char *const c_mem = new unsigned char[src_size];
+  ASSERT_TRUE(c_mem != NULL);
+  memset(c_mem, 10, src_size);
+  unsigned char *const src_c = c_mem + src_top_padding * src_pitch;
+
+  SetRows(src_c, rows, cols);
+
+  static const unsigned char kExpectedOutput[rows * cols] = {
+    2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  2,
+    2,  3,  2,  2,  2,  2,  2,  2,  2,  3,  2,  2,  2,  3,  3,  3,  3,  3,  3,
+    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  3,  4,  4,  3,  3,  3,
+    4,  4,  3,  4,  4,  3,  3,  4,  5,  4,  4,  4,  4,  4,  4,  4,  5,  4,  4,
+    4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+    5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,
+    7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
+    8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  8,  9,  9,  8,  8,  8,  9,
+    9,  8,  9,  9,  8,  8,  8,  9,  9,  10, 10, 9,  9,  9,  10, 10, 9,  10, 10,
+    9,  9,  9,  10, 10, 10, 11, 10, 10, 10, 11, 10, 11, 10, 11, 10, 10, 10, 11,
+    10, 11, 11, 11, 11, 11, 11, 11, 12, 11, 11, 11, 11, 11, 11, 11, 12, 11, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 12,
+    13, 12, 13, 12, 12, 12, 13, 12, 13, 12, 13, 12, 13, 13, 13, 14, 13, 13, 13,
+    13, 13, 13, 13, 14, 13, 13, 13, 13
+  };
+
+  RunFilterLevel(src_c, rows, cols, src_pitch, INT_MAX, kExpectedOutput);
+
+  memset(c_mem, 10, src_size);
+  SetRows(src_c, rows, cols);
+  RunFilterLevel(src_c, rows, cols, src_pitch, q2mbl(100), kExpectedOutput);
+
+  delete[] c_mem;
+}
+
+TEST_P(VpxMbPostProcDownTest, CheckMediumFilterOutput) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_pitch = cols;
+  const int src_top_padding = 8;
+  const int src_bottom_padding = 17;
+
+  const int src_size = cols * (rows + src_top_padding + src_bottom_padding);
+  unsigned char *const c_mem = new unsigned char[src_size];
+  ASSERT_TRUE(c_mem != NULL);
+  memset(c_mem, 10, src_size);
+  unsigned char *const src_c = c_mem + src_top_padding * src_pitch;
+
+  SetRows(src_c, rows, cols);
+
+  static const unsigned char kExpectedOutput[rows * cols] = {
+    2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  2,
+    2,  3,  2,  2,  2,  2,  2,  2,  2,  3,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+    2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+    3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+    4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+    5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,
+    7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
+    8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 13, 12,
+    13, 12, 13, 12, 12, 12, 13, 12, 13, 12, 13, 12, 13, 13, 13, 14, 13, 13, 13,
+    13, 13, 13, 13, 14, 13, 13, 13, 13
+  };
+
+  RunFilterLevel(src_c, rows, cols, src_pitch, q2mbl(70), kExpectedOutput);
+
+  delete[] c_mem;
+}
+
+TEST_P(VpxMbPostProcDownTest, CheckLowFilterOutput) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_pitch = cols;
+  const int src_top_padding = 8;
+  const int src_bottom_padding = 17;
+
+  const int src_size = cols * (rows + src_top_padding + src_bottom_padding);
+  unsigned char *const c_mem = new unsigned char[src_size];
+  ASSERT_TRUE(c_mem != NULL);
+  memset(c_mem, 10, src_size);
+  unsigned char *const src_c = c_mem + src_top_padding * src_pitch;
+
+  SetRows(src_c, rows, cols);
+
+  unsigned char *expected_output = new unsigned char[rows * cols];
+  ASSERT_TRUE(expected_output != NULL);
+  SetRows(expected_output, rows, cols);
+
+  RunFilterLevel(src_c, rows, cols, src_pitch, q2mbl(0), expected_output);
+
+  delete[] c_mem;
+  delete[] expected_output;
+}
+
+TEST_P(VpxMbPostProcDownTest, CheckCvsAssembly) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_pitch = cols;
+  const int src_top_padding = 8;
+  const int src_bottom_padding = 17;
+  const int src_size = cols * (rows + src_top_padding + src_bottom_padding);
+  unsigned char *const c_mem = new unsigned char[src_size];
+  unsigned char *const asm_mem = new unsigned char[src_size];
+  ASSERT_TRUE(c_mem != NULL);
+  ASSERT_TRUE(asm_mem != NULL);
+  unsigned char *const src_c = c_mem + src_top_padding * src_pitch;
+  unsigned char *const src_asm = asm_mem + src_top_padding * src_pitch;
+
+  for (int level = 0; level < 100; level++) {
+    memset(c_mem, 10, src_size);
+    memset(asm_mem, 10, src_size);
+    SetRandom(src_c, src_asm, rows, cols, src_pitch);
+    vpx_mbpost_proc_down_c(src_c, src_pitch, rows, cols, q2mbl(level));
+    ASM_REGISTER_STATE_CHECK(
+        GetParam()(src_asm, src_pitch, rows, cols, q2mbl(level)));
+    RunComparison(src_c, src_asm, rows, cols, src_pitch);
+
+    memset(c_mem, 10, src_size);
+    memset(asm_mem, 10, src_size);
+    SetRandomSaturation(src_c, src_asm, rows, cols, src_pitch);
+    vpx_mbpost_proc_down_c(src_c, src_pitch, rows, cols, q2mbl(level));
+    ASM_REGISTER_STATE_CHECK(
+        GetParam()(src_asm, src_pitch, rows, cols, q2mbl(level)));
+    RunComparison(src_c, src_asm, rows, cols, src_pitch);
+  }
+
+  delete[] c_mem;
+  delete[] asm_mem;
+}
+
+INSTANTIATE_TEST_CASE_P(
+    C, VpxPostProcDownAndAcrossMbRowTest,
+    ::testing::Values(vpx_post_proc_down_and_across_mb_row_c));
+
+INSTANTIATE_TEST_CASE_P(C, VpxMbPostProcAcrossIpTest,
+                        ::testing::Values(vpx_mbpost_proc_across_ip_c));
+
+INSTANTIATE_TEST_CASE_P(C, VpxMbPostProcDownTest,
+                        ::testing::Values(vpx_mbpost_proc_down_c));

 #if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, VP8PostProcessingFilterTest,
-    ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2));
-#endif
+INSTANTIATE_TEST_CASE_P(
+    SSE2, VpxPostProcDownAndAcrossMbRowTest,
+    ::testing::Values(vpx_post_proc_down_and_across_mb_row_sse2));
+
+INSTANTIATE_TEST_CASE_P(SSE2, VpxMbPostProcAcrossIpTest,
+                        ::testing::Values(vpx_mbpost_proc_across_ip_sse2));
+
+INSTANTIATE_TEST_CASE_P(SSE2, VpxMbPostProcDownTest,
+                        ::testing::Values(vpx_mbpost_proc_down_sse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, VpxPostProcDownAndAcrossMbRowTest,
+    ::testing::Values(vpx_post_proc_down_and_across_mb_row_neon));
+
+INSTANTIATE_TEST_CASE_P(NEON, VpxMbPostProcAcrossIpTest,
+                        ::testing::Values(vpx_mbpost_proc_across_ip_neon));
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, VpxPostProcDownAndAcrossMbRowTest,
+    ::testing::Values(vpx_post_proc_down_and_across_mb_row_msa));
+
+INSTANTIATE_TEST_CASE_P(MSA, VpxMbPostProcAcrossIpTest,
+                        ::testing::Values(vpx_mbpost_proc_across_ip_msa));
+
+INSTANTIATE_TEST_CASE_P(MSA, VpxMbPostProcDownTest,
+                        ::testing::Values(vpx_mbpost_proc_down_msa));
+#endif  // HAVE_MSA

 }  // namespace
--- a/test/predict_test.cc
+++ b/test/predict_test.cc
@@ -0,0 +1,376 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp8_rtcd.h"
+#include "./vpx_config.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
+
+namespace {
+
+using libvpx_test::ACMRandom;
+using std::tr1::make_tuple;
+
+typedef void (*PredictFunc)(uint8_t *src_ptr, int src_pixels_per_line,
+                            int xoffset, int yoffset, uint8_t *dst_ptr,
+                            int dst_pitch);
+
+typedef std::tr1::tuple<int, int, PredictFunc> PredictParam;
+
+class PredictTestBase : public ::testing::TestWithParam<PredictParam> {
+ public:
+  PredictTestBase()
+      : width_(GET_PARAM(0)), height_(GET_PARAM(1)), predict_(GET_PARAM(2)),
+        src_(NULL), padded_dst_(NULL), dst_(NULL), dst_c_(NULL) {}
+
+  virtual void SetUp() {
+    src_ = new uint8_t[kSrcSize];
+    ASSERT_TRUE(src_ != NULL);
+
+    // padded_dst_ provides a buffer of kBorderSize around the destination
+    // memory to facilitate detecting out of bounds writes.
+    dst_stride_ = kBorderSize + width_ + kBorderSize;
+    padded_dst_size_ = dst_stride_ * (kBorderSize + height_ + kBorderSize);
+    padded_dst_ =
+        reinterpret_cast<uint8_t *>(vpx_memalign(16, padded_dst_size_));
+    ASSERT_TRUE(padded_dst_ != NULL);
+    dst_ = padded_dst_ + (kBorderSize * dst_stride_) + kBorderSize;
+
+    dst_c_ = new uint8_t[16 * 16];
+    ASSERT_TRUE(dst_c_ != NULL);
+
+    memset(src_, 0, kSrcSize);
+    memset(padded_dst_, 128, padded_dst_size_);
+    memset(dst_c_, 0, 16 * 16);
+  }
+
+  virtual void TearDown() {
+    delete[] src_;
+    src_ = NULL;
+    vpx_free(padded_dst_);
+    padded_dst_ = NULL;
+    dst_ = NULL;
+    delete[] dst_c_;
+    dst_c_ = NULL;
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  // Make reference arrays big enough for 16x16 functions. Six-tap filters need
+  // 5 extra pixels outside of the macroblock.
+  static const int kSrcStride = 21;
+  static const int kSrcSize = kSrcStride * kSrcStride;
+  static const int kBorderSize = 16;
+
+  int width_;
+  int height_;
+  PredictFunc predict_;
+  uint8_t *src_;
+  uint8_t *padded_dst_;
+  uint8_t *dst_;
+  int padded_dst_size_;
+  uint8_t *dst_c_;
+  int dst_stride_;
+
+  bool CompareBuffers(const uint8_t *a, int a_stride, const uint8_t *b,
+                      int b_stride) const {
+    for (int height = 0; height < height_; ++height) {
+      EXPECT_EQ(0, memcmp(a + height * a_stride, b + height * b_stride,
+                          sizeof(*a) * width_))
+          << "Row " << height << " does not match.";
+    }
+
+    return !HasFailure();
+  }
+
+  // Given a block of memory 'a' with size 'a_size', determine if all regions
+  // excepting block 'b' described by 'b_stride', 'b_height', and 'b_width'
+  // match pixel value 'c'.
+  bool CheckBorder(const uint8_t *a, int a_size, const uint8_t *b, int b_width,
+                   int b_height, int b_stride, uint8_t c) const {
+    const uint8_t *a_end = a + a_size;
+    const int b_size = (b_stride * b_height) + b_width;
+    const uint8_t *b_end = b + b_size;
+    const int left_border = (b_stride - b_width) / 2;
+    const int right_border = left_border + ((b_stride - b_width) % 2);
+
+    EXPECT_GE(b - left_border, a) << "'b' does not start within 'a'";
+    EXPECT_LE(b_end + right_border, a_end) << "'b' does not end within 'a'";
+
+    // Top border.
+    for (int pixel = 0; pixel < b - a - left_border; ++pixel) {
+      EXPECT_EQ(c, a[pixel]) << "Mismatch at " << pixel << " in top border.";
+    }
+
+    // Left border.
+    for (int height = 0; height < b_height; ++height) {
+      for (int width = left_border; width > 0; --width) {
+        EXPECT_EQ(c, b[height * b_stride - width])
+            << "Mismatch at row " << height << " column " << left_border - width
+            << " in left border.";
+      }
+    }
+
+    // Right border.
+    for (int height = 0; height < b_height; ++height) {
+      for (int width = b_width; width < b_width + right_border; ++width) {
+        EXPECT_EQ(c, b[height * b_stride + width])
+            << "Mismatch at row " << height << " column " << width - b_width
+            << " in right border.";
+      }
+    }
+
+    // Bottom border.
+    for (int pixel = static_cast<int>(b - a + b_size); pixel < a_size;
+         ++pixel) {
+      EXPECT_EQ(c, a[pixel]) << "Mismatch at " << pixel << " in bottom border.";
+    }
+
+    return !HasFailure();
+  }
+
+  void TestWithRandomData(PredictFunc reference) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    // Run tests for almost all possible offsets.
+    for (int xoffset = 0; xoffset < 8; ++xoffset) {
+      for (int yoffset = 0; yoffset < 8; ++yoffset) {
+        if (xoffset == 0 && yoffset == 0) {
+          // This represents a copy which is not required to be handled by this
+          // module.
+          continue;
+        }
+
+        for (int i = 0; i < kSrcSize; ++i) {
+          src_[i] = rnd.Rand8();
+        }
+        reference(&src_[kSrcStride * 2 + 2], kSrcStride, xoffset, yoffset,
+                  dst_c_, 16);
+
+        ASM_REGISTER_STATE_CHECK(predict_(&src_[kSrcStride * 2 + 2], kSrcStride,
+                                          xoffset, yoffset, dst_, dst_stride_));
+
+        ASSERT_TRUE(CompareBuffers(dst_c_, 16, dst_, dst_stride_));
+        ASSERT_TRUE(CheckBorder(padded_dst_, padded_dst_size_, dst_, width_,
+                                height_, dst_stride_, 128));
+      }
+    }
+  }
+
+  void TestWithUnalignedDst(PredictFunc reference) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    // Only the 4x4 need to be able to handle unaligned writes.
+    if (width_ == 4 && height_ == 4) {
+      for (int xoffset = 0; xoffset < 8; ++xoffset) {
+        for (int yoffset = 0; yoffset < 8; ++yoffset) {
+          if (xoffset == 0 && yoffset == 0) {
+            continue;
+          }
+          for (int i = 0; i < kSrcSize; ++i) {
+            src_[i] = rnd.Rand8();
+          }
+          reference(&src_[kSrcStride * 2 + 2], kSrcStride, xoffset, yoffset,
+                    dst_c_, 16);
+
+          for (int i = 1; i < 4; ++i) {
+            memset(padded_dst_, 128, padded_dst_size_);
+
+            ASM_REGISTER_STATE_CHECK(predict_(&src_[kSrcStride * 2 + 2],
+                                              kSrcStride, xoffset, yoffset,
+                                              dst_ + i, dst_stride_ + i));
+
+            ASSERT_TRUE(CompareBuffers(dst_c_, 16, dst_ + i, dst_stride_ + i));
+            ASSERT_TRUE(CheckBorder(padded_dst_, padded_dst_size_, dst_ + i,
+                                    width_, height_, dst_stride_ + i, 128));
+          }
+        }
+      }
+    }
+  }
+};
+
+class SixtapPredictTest : public PredictTestBase {};
+
+TEST_P(SixtapPredictTest, TestWithRandomData) {
+  TestWithRandomData(vp8_sixtap_predict16x16_c);
+}
+TEST_P(SixtapPredictTest, TestWithUnalignedDst) {
+  TestWithUnalignedDst(vp8_sixtap_predict16x16_c);
+}
+
+TEST_P(SixtapPredictTest, TestWithPresetData) {
+  // Test input
+  static const uint8_t kTestData[kSrcSize] = {
+    184, 4,   191, 82,  92,  41,  0,   1,   226, 236, 172, 20,  182, 42,  226,
+    177, 79,  94,  77,  179, 203, 206, 198, 22,  192, 19,  75,  17,  192, 44,
+    233, 120, 48,  168, 203, 141, 210, 203, 143, 180, 184, 59,  201, 110, 102,
+    171, 32,  182, 10,  109, 105, 213, 60,  47,  236, 253, 67,  55,  14,  3,
+    99,  247, 124, 148, 159, 71,  34,  114, 19,  177, 38,  203, 237, 239, 58,
+    83,  155, 91,  10,  166, 201, 115, 124, 5,   163, 104, 2,   231, 160, 16,
+    234, 4,   8,   103, 153, 167, 174, 187, 26,  193, 109, 64,  141, 90,  48,
+    200, 174, 204, 36,  184, 114, 237, 43,  238, 242, 207, 86,  245, 182, 247,
+    6,   161, 251, 14,  8,   148, 182, 182, 79,  208, 120, 188, 17,  6,   23,
+    65,  206, 197, 13,  242, 126, 128, 224, 170, 110, 211, 121, 197, 200, 47,
+    188, 207, 208, 184, 221, 216, 76,  148, 143, 156, 100, 8,   89,  117, 14,
+    112, 183, 221, 54,  197, 208, 180, 69,  176, 94,  180, 131, 215, 121, 76,
+    7,   54,  28,  216, 238, 249, 176, 58,  142, 64,  215, 242, 72,  49,  104,
+    87,  161, 32,  52,  216, 230, 4,   141, 44,  181, 235, 224, 57,  195, 89,
+    134, 203, 144, 162, 163, 126, 156, 84,  185, 42,  148, 145, 29,  221, 194,
+    134, 52,  100, 166, 105, 60,  140, 110, 201, 184, 35,  181, 153, 93,  121,
+    243, 227, 68,  131, 134, 232, 2,   35,  60,  187, 77,  209, 76,  106, 174,
+    15,  241, 227, 115, 151, 77,  175, 36,  187, 121, 221, 223, 47,  118, 61,
+    168, 105, 32,  237, 236, 167, 213, 238, 202, 17,  170, 24,  226, 247, 131,
+    145, 6,   116, 117, 121, 11,  194, 41,  48,  126, 162, 13,  93,  209, 131,
+    154, 122, 237, 187, 103, 217, 99,  60,  200, 45,  78,  115, 69,  49,  106,
+    200, 194, 112, 60,  56,  234, 72,  251, 19,  120, 121, 182, 134, 215, 135,
+    10,  114, 2,   247, 46,  105, 209, 145, 165, 153, 191, 243, 12,  5,   36,
+    119, 206, 231, 231, 11,  32,  209, 83,  27,  229, 204, 149, 155, 83,  109,
+    35,  93,  223, 37,  84,  14,  142, 37,  160, 52,  191, 96,  40,  204, 101,
+    77,  67,  52,  53,  43,  63,  85,  253, 147, 113, 226, 96,  6,   125, 179,
+    115, 161, 17,  83,  198, 101, 98,  85,  139, 3,   137, 75,  99,  178, 23,
+    201, 255, 91,  253, 52,  134, 60,  138, 131, 208, 251, 101, 48,  2,   227,
+    228, 118, 132, 245, 202, 75,  91,  44,  160, 231, 47,  41,  50,  147, 220,
+    74,  92,  219, 165, 89,  16
+  };
+
+  // Expected results for xoffset = 2 and yoffset = 2.
+  static const int kExpectedDstStride = 16;
+  static const uint8_t kExpectedDst[256] = {
+    117, 102, 74,  135, 42,  98,  175, 206, 70,  73,  222, 197, 50,  24,  39,
+    49,  38,  105, 90,  47,  169, 40,  171, 215, 200, 73,  109, 141, 53,  85,
+    177, 164, 79,  208, 124, 89,  212, 18,  81,  145, 151, 164, 217, 153, 91,
+    154, 102, 102, 159, 75,  164, 152, 136, 51,  213, 219, 186, 116, 193, 224,
+    186, 36,  231, 208, 84,  211, 155, 167, 35,  59,  42,  76,  216, 149, 73,
+    201, 78,  149, 184, 100, 96,  196, 189, 198, 188, 235, 195, 117, 129, 120,
+    129, 49,  25,  133, 113, 69,  221, 114, 70,  143, 99,  157, 108, 189, 140,
+    78,  6,   55,  65,  240, 255, 245, 184, 72,  90,  100, 116, 131, 39,  60,
+    234, 167, 33,  160, 88,  185, 200, 157, 159, 176, 127, 151, 138, 102, 168,
+    106, 170, 86,  82,  219, 189, 76,  33,  115, 197, 106, 96,  198, 136, 97,
+    141, 237, 151, 98,  137, 191, 185, 2,   57,  95,  142, 91,  255, 185, 97,
+    137, 76,  162, 94,  173, 131, 193, 161, 81,  106, 72,  135, 222, 234, 137,
+    66,  137, 106, 243, 210, 147, 95,  15,  137, 110, 85,  66,  16,  96,  167,
+    147, 150, 173, 203, 140, 118, 196, 84,  147, 160, 19,  95,  101, 123, 74,
+    132, 202, 82,  166, 12,  131, 166, 189, 170, 159, 85,  79,  66,  57,  152,
+    132, 203, 194, 0,   1,   56,  146, 180, 224, 156, 28,  83,  181, 79,  76,
+    80,  46,  160, 175, 59,  106, 43,  87,  75,  136, 85,  189, 46,  71,  200,
+    90
+  };
+
+  ASM_REGISTER_STATE_CHECK(
+      predict_(const_cast<uint8_t *>(kTestData) + kSrcStride * 2 + 2,
+               kSrcStride, 2, 2, dst_, dst_stride_));
+
+  ASSERT_TRUE(
+      CompareBuffers(kExpectedDst, kExpectedDstStride, dst_, dst_stride_));
+}
+
+INSTANTIATE_TEST_CASE_P(
+    C, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_c),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_c),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_c),
+                      make_tuple(4, 4, &vp8_sixtap_predict4x4_c)));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_neon),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_neon),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_neon),
+                      make_tuple(4, 4, &vp8_sixtap_predict4x4_neon)));
+#endif
+#if HAVE_MMX
+INSTANTIATE_TEST_CASE_P(
+    MMX, SixtapPredictTest,
+    ::testing::Values(make_tuple(4, 4, &vp8_sixtap_predict4x4_mmx)));
+#endif
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_sse2),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_sse2),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_sse2)));
+#endif
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_ssse3),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_ssse3),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_ssse3),
+                      make_tuple(4, 4, &vp8_sixtap_predict4x4_ssse3)));
+#endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_msa),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_msa),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_msa),
+                      make_tuple(4, 4, &vp8_sixtap_predict4x4_msa)));
+#endif
+
+class BilinearPredictTest : public PredictTestBase {};
+
+TEST_P(BilinearPredictTest, TestWithRandomData) {
+  TestWithRandomData(vp8_bilinear_predict16x16_c);
+}
+TEST_P(BilinearPredictTest, TestWithUnalignedDst) {
+  TestWithUnalignedDst(vp8_bilinear_predict16x16_c);
+}
+
+INSTANTIATE_TEST_CASE_P(
+    C, BilinearPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_c),
+                      make_tuple(8, 8, &vp8_bilinear_predict8x8_c),
+                      make_tuple(8, 4, &vp8_bilinear_predict8x4_c),
+                      make_tuple(4, 4, &vp8_bilinear_predict4x4_c)));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, BilinearPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_neon),
+                      make_tuple(8, 8, &vp8_bilinear_predict8x8_neon),
+                      make_tuple(8, 4, &vp8_bilinear_predict8x4_neon),
+                      make_tuple(4, 4, &vp8_bilinear_predict4x4_neon)));
+#endif
+#if HAVE_MMX
+INSTANTIATE_TEST_CASE_P(
+    MMX, BilinearPredictTest,
+    ::testing::Values(make_tuple(8, 4, &vp8_bilinear_predict8x4_mmx),
+                      make_tuple(4, 4, &vp8_bilinear_predict4x4_mmx)));
+#endif
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, BilinearPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_sse2),
+                      make_tuple(8, 8, &vp8_bilinear_predict8x8_sse2)));
+#endif
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, BilinearPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_ssse3),
+                      make_tuple(8, 8, &vp8_bilinear_predict8x8_ssse3)));
+#endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, BilinearPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_msa),
+                      make_tuple(8, 8, &vp8_bilinear_predict8x8_msa),
+                      make_tuple(8, 4, &vp8_bilinear_predict8x4_msa),
+                      make_tuple(4, 4, &vp8_bilinear_predict4x4_msa)));
+#endif
+}  // namespace
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@@ -11,13 +11,13 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./vp8_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
 #include "vp8/common/blockd.h"
 #include "vp8/common/onyx.h"
 #include "vp8/encoder/block.h"
@@ -192,4 +192,12 @@ INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest,
                        ::testing::Values(make_tuple(&vp8_fast_quantize_b_neon,
                                                     &vp8_fast_quantize_b_c)));
 #endif  // HAVE_NEON
+
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, QuantizeTest,
+    ::testing::Values(
+        make_tuple(&vp8_fast_quantize_b_msa, &vp8_fast_quantize_b_c),
+        make_tuple(&vp8_regular_quantize_b_msa, &vp8_regular_quantize_b_c)));
+#endif  // HAVE_MSA
 }  // namespace
--- a/test/realtime_test.cc
+++ b/test/realtime_test.cc
@@ -0,0 +1,63 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/video_source.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+namespace {
+
+const int kVideoSourceWidth = 320;
+const int kVideoSourceHeight = 240;
+const int kFramesToEncode = 2;
+
+class RealtimeTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+ protected:
+  RealtimeTest() : EncoderTest(GET_PARAM(0)), frame_packets_(0) {}
+  virtual ~RealtimeTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    cfg_.g_lag_in_frames = 0;
+    SetMode(::libvpx_test::kRealTime);
+  }
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+    // TODO(tomfinegan): We're changing the pass value here to make sure
+    // we get frames when real time mode is combined with |g_pass| set to
+    // VPX_RC_FIRST_PASS. This is necessary because EncoderTest::RunLoop() sets
+    // the pass value based on the mode passed into EncoderTest::SetMode(),
+    // which overrides the one specified in SetUp() above.
+    cfg_.g_pass = VPX_RC_FIRST_PASS;
+  }
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {
+    frame_packets_++;
+  }
+
+  int frame_packets_;
+};
+
+TEST_P(RealtimeTest, RealtimeFirstPassProducesFrames) {
+  ::libvpx_test::RandomVideoSource video;
+  video.SetSize(kVideoSourceWidth, kVideoSourceHeight);
+  video.set_limit(kFramesToEncode);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  EXPECT_EQ(kFramesToEncode, frame_packets_);
+}
+
+VP8_INSTANTIATE_TEST_CASE(RealtimeTest,
+                          ::testing::Values(::libvpx_test::kRealTime));
+VP9_INSTANTIATE_TEST_CASE(RealtimeTest,
+                          ::testing::Values(::libvpx_test::kRealTime));
+
+}  // namespace
--- a/Show More
+++ b/Show More