PRecompute token tab

Change-Id: I85e172352dacf7a990d8d9296e071f9d603fcb7b
Write tokens with rans
2015-10-14 18:03:36 -07:00 · 2015-10-14 17:57:56 -07:00 · 2015-09-14 11:40:34 -07:00 · 2015-09-11 16:57:15 -07:00 · 2015-09-11 16:37:01 -07:00 · 2015-09-11 14:17:59 -07:00
728 changed files with 113704 additions and 38866 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -30,14 +30,17 @@
 /examples/decode_with_partial_drops
 /examples/example_xma
 /examples/postproc
+/examples/resize_util
 /examples/set_maps
 /examples/simple_decoder
 /examples/simple_encoder
 /examples/twopass_encoder
 /examples/vp8_multi_resolution_encoder
 /examples/vp8cx_set_ref
+/examples/vp9_lossless_encoder
 /examples/vp9_spatial_scalable_encoder
 /examples/vpx_temporal_scalable_patterns
+/examples/vpx_temporal_svc_encoder
 /ivfdec
 /ivfdec.dox
 /ivfenc
@@ -45,12 +48,14 @@
 /libvpx.so*
 /libvpx.ver
 /samples.dox
+/test_intra_pred_speed
 /test_libvpx
 /vp8_api1_migration.dox
 /vp[89x]_rtcd.h
 /vpx.pc
 /vpx_config.c
 /vpx_config.h
+/vpx_dsp_rtcd.h
 /vpx_scale_rtcd.h
 /vpx_version.h
 /vpxdec
--- a/4
+++ b/4
@@ -59,6 +59,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    armv7-none-rvct
    armv7-win32-vs11
    armv7-win32-vs12
+    armv7-win32-vs14
    armv7s-darwin-gcc
    mips32-linux-gcc
    mips64-linux-gcc
@@ -85,6 +86,8 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-win32-vs10
    x86-win32-vs11
    x86-win32-vs12
+    x86-win32-vs14
+    x86_64-android-gcc
    x86_64-darwin9-gcc
    x86_64-darwin10-gcc
    x86_64-darwin11-gcc
@@ -101,6 +104,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86_64-win64-vs10
    x86_64-win64-vs11
    x86_64-win64-vs12
+    x86_64-win64-vs14
    generic-gnu

  The generic-gnu target, in conjunction with the CROSS environment variable,
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -67,6 +67,8 @@ else ifeq  ($(TARGET_ARCH_ABI),arm64-v8a)
  LOCAL_ARM_MODE := arm
 else ifeq ($(TARGET_ARCH_ABI),x86)
  include $(CONFIG_DIR)libs-x86-android-gcc.mk
+else ifeq ($(TARGET_ARCH_ABI),x86_64)
+  include $(CONFIG_DIR)libs-x86_64-android-gcc.mk
 else ifeq ($(TARGET_ARCH_ABI),mips)
  include $(CONFIG_DIR)libs-mips-android-gcc.mk
 else
@@ -164,17 +166,22 @@ endif

 # Add a dependency to force generation of the RTCD files.
 define rtcd_dep_template
+rtcd_dep_template_SRCS := $(addprefix $(LOCAL_PATH)/, $(LOCAL_SRC_FILES))
+rtcd_dep_template_SRCS := $$(rtcd_dep_template_SRCS:.neon=)
 ifeq ($(CONFIG_VP8), yes)
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h
+$$(rtcd_dep_template_SRCS): vp8_rtcd.h
 endif
 ifeq ($(CONFIG_VP9), yes)
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp9_rtcd.h
+$$(rtcd_dep_template_SRCS): vp9_rtcd.h
 endif
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_scale_rtcd.h
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_dsp_rtcd.h
+ifeq ($(CONFIG_VP10), yes)
+$$(rtcd_dep_template_SRCS): vp10_rtcd.h
+endif
+$$(rtcd_dep_template_SRCS): vpx_scale_rtcd.h
+$$(rtcd_dep_template_SRCS): vpx_dsp_rtcd.h

-ifeq ($(TARGET_ARCH_ABI),x86)
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_config.asm
+ifneq ($(findstring $(TARGET_ARCH_ABI),x86 x86_64),)
+$$(rtcd_dep_template_SRCS): vpx_config.asm
 endif
 endef

--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -140,6 +140,8 @@ $(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx $(STACKREALIGN)
 $(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx $(STACKREALIGN)
 $(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 $(STACKREALIGN)
 $(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(STACKREALIGN)
+$(BUILD_PFX)%vp9_reconintra.c.d: CFLAGS += $(STACKREALIGN)
+$(BUILD_PFX)%vp9_reconintra.c.o: CFLAGS += $(STACKREALIGN)

 $(BUILD_PFX)%.c.d: %.c
 	$(if $(quiet),@echo "    [DEP] $@")
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -1081,7 +1081,9 @@ EOF
          CROSS=${CROSS:-g}
          ;;
        os2)
+          disable_feature pic
          AS=${AS:-nasm}
+          add_ldflags -Zhigh-mem
          ;;
      esac

@@ -1323,12 +1325,6 @@ EOF
    add_cflags -D_LARGEFILE_SOURCE
    add_cflags -D_FILE_OFFSET_BITS=64
  fi
-
-  # append any user defined extra cflags
-  if [ -n "${extra_cflags}" ] ; then
-    check_add_cflags ${extra_cflags} || \
-    die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
-  fi
 }

 process_toolchain() {
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -73,6 +73,10 @@ generate_filter() {
                open_tag File RelativePath="$f"

                if [ "$pat" == "asm" ] && $asm_use_custom_step; then
+                    # Avoid object file name collisions, i.e. vpx_config.c and
+                    # vpx_config.asm produce the same object file without
+                    # this additional suffix.
+                    objf=${objf%.obj}_asm.obj
                    for plat in "${platforms[@]}"; do
                        for cfg in Debug Release; do
                            open_tag FileConfiguration \
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -19,13 +19,13 @@ show_help() {
    cat <<EOF
 Usage: ${self_basename} [options] file1 [file2 ...]

-This script generates a Visual Studio 2005 solution file from a list of project
+This script generates a Visual Studio solution file from a list of project
 files.

 Options:
    --help                      Print this message
    --out=outfile               Redirect output to a file
-    --ver=version               Version (7,8,9,10,11) of visual studio to generate for
+    --ver=version               Version (7,8,9,10,11,12,14) of visual studio to generate for
    --target=isa-os-cc          Target specifier
 EOF
    exit 1
@@ -255,7 +255,7 @@ for opt in "$@"; do
    ;;
    --ver=*) vs_ver="$optval"
             case $optval in
-             [789]|10|11|12)
+             [789]|10|11|12|14)
             ;;
             *) die Unrecognized Visual Studio Version in $opt
             ;;
@@ -300,12 +300,15 @@ case "${vs_ver:-8}" in
    12) sln_vers="12.00"
       sln_vers_str="Visual Studio 2013"
    ;;
+    14) sln_vers="14.00"
+       sln_vers_str="Visual Studio 2015"
+    ;;
 esac
 case "${vs_ver:-8}" in
    [789])
    sfx=vcproj
    ;;
-    10|11|12)
+    10|11|12|14)
    sfx=vcxproj
    ;;
 esac
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -34,7 +34,7 @@ Options:
    --name=project_name         Name of the project (required)
    --proj-guid=GUID            GUID to use for the project
    --module-def=filename       File containing export definitions (for DLLs)
-    --ver=version               Version (10,11,12) of visual studio to generate for
+    --ver=version               Version (10,11,12,14) of visual studio to generate for
    --src-path-bare=dir         Path to root of source tree
    -Ipath/to/include           Additional include directories
    -DFLAG[=value]              Preprocessor macros to define
@@ -168,7 +168,7 @@ for opt in "$@"; do
        --ver=*)
            vs_ver="$optval"
            case "$optval" in
-                10|11|12)
+                10|11|12|14)
                ;;
                *) die Unrecognized Visual Studio Version in $opt
                ;;
@@ -218,7 +218,7 @@ guid=${guid:-`generate_uuid`}
 asm_use_custom_step=false
 uses_asm=${uses_asm:-false}
 case "${vs_ver:-11}" in
-    10|11|12)
+    10|11|12|14)
       asm_use_custom_step=$uses_asm
    ;;
 esac
@@ -344,6 +344,9 @@ generate_vcxproj() {
                # has to enable AppContainerApplication as well.
                tag_content PlatformToolset v120
            fi
+            if [ "$vs_ver" = "14" ]; then
+                tag_content PlatformToolset v140
+            fi
            tag_content CharacterSet Unicode
            if [ "$config" = "Release" ]; then
                tag_content WholeProgramOptimization true
--- a/build/make/rtcd.pl
+++ b/build/make/rtcd.pl
@@ -319,13 +319,14 @@ EOF

  print <<EOF;
 #if HAVE_DSPR2
+void vpx_dsputil_static_init();
 #if CONFIG_VP8
 void dsputil_static_init();
-dsputil_static_init();
 #endif
-#if CONFIG_VP9
-void vp9_dsputil_static_init();
-vp9_dsputil_static_init();
+
+vpx_dsputil_static_init();
+#if CONFIG_VP8
+dsputil_static_init();
 #endif
 #endif
 }
--- a/codereview.settings
+++ b/codereview.settings
@@ -0,0 +1,4 @@
+# This file is used by gcl to get repository specific information.
+GERRIT_HOST: chromium-review.googlesource.com
+GERRIT_PORT: 29418
+CODE_REVIEW_SERVER: chromium-review.googlesource.com
--- a/20
+++ b/20
@@ -37,6 +37,7 @@ Advanced options:
  ${toggle_vp9_highbitdepth}      use VP9 high bit depth (10/12) profiles
  ${toggle_vp8}                   VP8 codec support
  ${toggle_vp9}                   VP9 codec support
+  ${toggle_vp10}                  VP10 codec support
  ${toggle_internal_stats}        output of encoder internal stats for debug, if supported (encoders)
  ${toggle_postproc}              postprocessing
  ${toggle_vp9_postproc}          vp9 specific postprocessing
@@ -106,6 +107,7 @@ all_platforms="${all_platforms} armv7-linux-gcc"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-none-rvct"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-win32-vs11"
 all_platforms="${all_platforms} armv7-win32-vs12"
+all_platforms="${all_platforms} armv7-win32-vs14"
 all_platforms="${all_platforms} armv7s-darwin-gcc"
 all_platforms="${all_platforms} mips32-linux-gcc"
 all_platforms="${all_platforms} mips64-linux-gcc"
@@ -132,6 +134,8 @@ all_platforms="${all_platforms} x86-win32-vs9"
 all_platforms="${all_platforms} x86-win32-vs10"
 all_platforms="${all_platforms} x86-win32-vs11"
 all_platforms="${all_platforms} x86-win32-vs12"
+all_platforms="${all_platforms} x86-win32-vs14"
+all_platforms="${all_platforms} x86_64-android-gcc"
 all_platforms="${all_platforms} x86_64-darwin9-gcc"
 all_platforms="${all_platforms} x86_64-darwin10-gcc"
 all_platforms="${all_platforms} x86_64-darwin11-gcc"
@@ -148,6 +152,7 @@ all_platforms="${all_platforms} x86_64-win64-vs9"
 all_platforms="${all_platforms} x86_64-win64-vs10"
 all_platforms="${all_platforms} x86_64-win64-vs11"
 all_platforms="${all_platforms} x86_64-win64-vs12"
+all_platforms="${all_platforms} x86_64-win64-vs14"
 all_platforms="${all_platforms} generic-gnu"

 # all_targets is a list of all targets that can be configured
@@ -187,6 +192,10 @@ fi
 # disable codecs when their source directory does not exist
 [ -d "${source_path}/vp8" ] || disable_feature vp8
 [ -d "${source_path}/vp9" ] || disable_feature vp9
+[ -d "${source_path}/vp10" ] || disable_feature vp10
+
+# disable vp10 codec by default
+disable_feature vp10

 # install everything except the sources, by default. sources will have
 # to be enabled when doing dist builds, since that's no longer a common
@@ -208,10 +217,13 @@ CODECS="
    vp8_decoder
    vp9_encoder
    vp9_decoder
+    vp10_encoder
+    vp10_decoder
 "
 CODEC_FAMILIES="
    vp8
    vp9
+    vp10
 "

 ARCH_LIST="
@@ -638,7 +650,7 @@ process_toolchain() {
                 VCPROJ_SFX=vcproj
                 gen_vcproj_cmd=${source_path}/build/make/gen_msvs_proj.sh
                 ;;
-             10|11|12)
+             10|11|12|14)
                 VCPROJ_SFX=vcxproj
                 gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
                 enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
@@ -704,6 +716,12 @@ EOF
    esac
    # libwebm needs to be linked with C++ standard library
    enabled webm_io && LD=${CXX}
+
+    # append any user defined extra cflags
+    if [ -n "${extra_cflags}" ] ; then
+        check_add_cflags ${extra_cflags} || \
+        die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
+    fi
 }


--- a/examples.mk
+++ b/examples.mk
@@ -22,17 +22,18 @@ LIBYUV_SRCS +=  third_party/libyuv/include/libyuv/basic_types.h  \
                third_party/libyuv/source/planar_functions.cc \
                third_party/libyuv/source/row_any.cc \
                third_party/libyuv/source/row_common.cc \
+                third_party/libyuv/source/row_gcc.cc \
                third_party/libyuv/source/row_mips.cc \
                third_party/libyuv/source/row_neon.cc \
                third_party/libyuv/source/row_neon64.cc \
-                third_party/libyuv/source/row_posix.cc \
                third_party/libyuv/source/row_win.cc \
                third_party/libyuv/source/scale.cc \
+                third_party/libyuv/source/scale_any.cc \
                third_party/libyuv/source/scale_common.cc \
+                third_party/libyuv/source/scale_gcc.cc \
                third_party/libyuv/source/scale_mips.cc \
                third_party/libyuv/source/scale_neon.cc \
                third_party/libyuv/source/scale_neon64.cc \
-                third_party/libyuv/source/scale_posix.cc \
                third_party/libyuv/source/scale_win.cc \

 LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \
@@ -324,8 +325,8 @@ endif
 # the makefiles). We may want to revisit this.
 define vcproj_template
 $(1): $($(1:.$(VCPROJ_SFX)=).SRCS) vpx.$(VCPROJ_SFX)
-	@echo "    [vcproj] $$@"
-	$$(GEN_VCPROJ)\
+	$(if $(quiet),@echo "    [vcproj] $$@")
+	$(qexec)$$(GEN_VCPROJ)\
            --exe\
            --target=$$(TOOLCHAIN)\
            --name=$$(@:.$(VCPROJ_SFX)=)\
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -25,6 +25,7 @@
 #include "../tools_common.h"
 #include "../video_writer.h"

+#include "../vpx_ports/vpx_timer.h"
 #include "vpx/svc_context.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
@@ -564,6 +565,8 @@ int main(int argc, const char **argv) {
  double sum_bitrate2 = 0.0;
  double framerate  = 30.0;
 #endif
+  struct vpx_usec_timer timer;
+  int64_t cx_time = 0;
  memset(&svc_ctx, 0, sizeof(svc_ctx));
  svc_ctx.log_print = 1;
  exec_name = argv[0];
@@ -632,6 +635,9 @@ int main(int argc, const char **argv) {
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
  if (svc_ctx.threads)
    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
+  if (svc_ctx.speed >= 5)
+    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+

  // Encode frames
  while (!end_of_stream) {
@@ -643,9 +649,12 @@ int main(int argc, const char **argv) {
      end_of_stream = 1;
    }

+    vpx_usec_timer_start(&timer);
    res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
                         pts, frame_duration, svc_ctx.speed >= 5 ?
                         VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
+    vpx_usec_timer_mark(&timer);
+    cx_time += vpx_usec_timer_elapsed(&timer);

    printf("%s", vpx_svc_get_message(&svc_ctx));
    if (res != VPX_CODEC_OK) {
@@ -784,6 +793,10 @@ int main(int argc, const char **argv) {
    }
  }
 #endif
+  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
+         frame_cnt,
+         1000 * (float)cx_time / (double)(frame_cnt * 1000000),
+         1000000 * (double)frame_cnt / (double)cx_time);
  vpx_img_free(&raw);
  // display average size, psnr
  printf("%s", vpx_svc_dump_statistics(&svc_ctx));
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -692,6 +692,7 @@ int main(int argc, char **argv) {
    vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
+    vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
    if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0))
      die_codec(&codec, "Failed to set SVC");
--- a/libs.mk
+++ b/libs.mk
@@ -50,7 +50,10 @@ CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS))
 include $(SRC_PATH_BARE)/vpx_dsp/vpx_dsp.mk
 CODEC_SRCS-yes += $(addprefix vpx_dsp/,$(call enabled,DSP_SRCS))

-ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
+include $(SRC_PATH_BARE)/vpx_util/vpx_util.mk
+CODEC_SRCS-yes += $(addprefix vpx_util/,$(call enabled,UTIL_SRCS))
+
+ifeq ($(CONFIG_VP8),yes)
  VP8_PREFIX=vp8/
  include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
 endif
@@ -73,7 +76,7 @@ ifeq ($(CONFIG_VP8_DECODER),yes)
  CODEC_DOC_SECTIONS += vp8 vp8_decoder
 endif

-ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
+ifeq ($(CONFIG_VP9),yes)
  VP9_PREFIX=vp9/
  include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk
 endif
@@ -106,6 +109,40 @@ endif
 VP9_PREFIX=vp9/
 $(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra

+#  VP10 make file
+ifeq ($(CONFIG_VP10),yes)
+  VP10_PREFIX=vp10/
+  include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10_common.mk
+endif
+
+ifeq ($(CONFIG_VP10_ENCODER),yes)
+  VP10_PREFIX=vp10/
+  include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10cx.mk
+  CODEC_SRCS-yes += $(addprefix $(VP10_PREFIX),$(call enabled,VP10_CX_SRCS))
+  CODEC_EXPORTS-yes += $(addprefix $(VP10_PREFIX),$(VP10_CX_EXPORTS))
+  CODEC_SRCS-yes += $(VP10_PREFIX)vp10cx.mk vpx/vp8.h vpx/vp8cx.h
+  INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
+  INSTALL-LIBS-$(CONFIG_SPATIAL_SVC) += include/vpx/svc_context.h
+  INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP10_PREFIX)/%
+  CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
+  CODEC_DOC_SECTIONS += vp9 vp9_encoder
+endif
+
+ifeq ($(CONFIG_VP10_DECODER),yes)
+  VP10_PREFIX=vp10/
+  include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10dx.mk
+  CODEC_SRCS-yes += $(addprefix $(VP10_PREFIX),$(call enabled,VP10_DX_SRCS))
+  CODEC_EXPORTS-yes += $(addprefix $(VP10_PREFIX),$(VP10_DX_EXPORTS))
+  CODEC_SRCS-yes += $(VP10_PREFIX)vp10dx.mk vpx/vp8.h vpx/vp8dx.h
+  INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
+  INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP10_PREFIX)/%
+  CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h
+  CODEC_DOC_SECTIONS += vp9 vp9_decoder
+endif
+
+VP10_PREFIX=vp10/
+$(BUILD_PFX)$(VP10_PREFIX)%.c.o: CFLAGS += -Wextra
+
 ifeq ($(CONFIG_ENCODERS),yes)
  CODEC_DOC_SECTIONS += encoder
 endif
--- a/test/blockiness_test.cc
+++ b/test/blockiness_test.cc
@@ -8,10 +8,11 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-
-#include <string.h>
 #include <limits.h>
 #include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vpx_config.h"
 #if CONFIG_VP9_ENCODER
@@ -22,7 +23,6 @@
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"

 #include "vpx_mem/vpx_mem.h"

--- a/test/borders_test.cc
+++ b/test/borders_test.cc
@@ -80,4 +80,7 @@ TEST_P(BordersTest, TestLowBitrate) {

 VP9_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values(
    ::libvpx_test::kTwoPassGood));
+
+VP10_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values(
+    ::libvpx_test::kTwoPassGood));
 }  // namespace
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -13,10 +13,10 @@
 #include "./vpx_config.h"
 #include "vpx/vpx_decoder.h"
 #include "vpx/vpx_encoder.h"
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
 #include "vpx/vp8cx.h"
 #endif
-#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER || CONFIG_VP10_DECODER
 #include "vpx/vp8dx.h"
 #endif

@@ -233,6 +233,8 @@ class VP9CodecFactory : public CodecFactory {
                                               int usage) const {
 #if CONFIG_VP9_ENCODER
    return vpx_codec_enc_config_default(&vpx_codec_vp9_cx_algo, cfg, usage);
+#elif CONFIG_VP10_ENCODER
+    return vpx_codec_enc_config_default(&vpx_codec_vp10_cx_algo, cfg, usage);
 #else
    return VPX_CODEC_INCAPABLE;
 #endif
@@ -251,7 +253,96 @@ const libvpx_test::VP9CodecFactory kVP9;
 #define VP9_INSTANTIATE_TEST_CASE(test, ...)
 #endif  // CONFIG_VP9

+/*
+ * VP10 Codec Definitions
+ */
+#if CONFIG_VP10
+class VP10Decoder : public Decoder {
+ public:
+  VP10Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
+      : Decoder(cfg, deadline) {}
+
+  VP10Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
+              unsigned long deadline)  // NOLINT
+      : Decoder(cfg, flag, deadline) {}
+
+ protected:
+  virtual vpx_codec_iface_t* CodecInterface() const {
+#if CONFIG_VP10_DECODER
+    return &vpx_codec_vp10_dx_algo;
+#else
+    return NULL;
+#endif
+  }
+};
+
+class VP10Encoder : public Encoder {
+ public:
+  VP10Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline,
+              const unsigned long init_flags, TwopassStatsStore *stats)
+      : Encoder(cfg, deadline, init_flags, stats) {}
+
+ protected:
+  virtual vpx_codec_iface_t* CodecInterface() const {
+#if CONFIG_VP10_ENCODER
+    return &vpx_codec_vp10_cx_algo;
+#else
+    return NULL;
+#endif
+  }
+};
+
+class VP10CodecFactory : public CodecFactory {
+ public:
+  VP10CodecFactory() : CodecFactory() {}
+
+  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 unsigned long deadline) const {
+    return CreateDecoder(cfg, 0, deadline);
+  }
+
+  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 const vpx_codec_flags_t flags,
+                                 unsigned long deadline) const {  // NOLINT
+#if CONFIG_VP10_DECODER
+    return new VP10Decoder(cfg, flags, deadline);
+#else
+    return NULL;
+#endif
+  }
+
+  virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
+                                 unsigned long deadline,
+                                 const unsigned long init_flags,
+                                 TwopassStatsStore *stats) const {
+#if CONFIG_VP10_ENCODER
+    return new VP10Encoder(cfg, deadline, init_flags, stats);
+#else
+    return NULL;
+#endif
+  }
+
+  virtual vpx_codec_err_t DefaultEncoderConfig(vpx_codec_enc_cfg_t *cfg,
+                                               int usage) const {
+#if CONFIG_VP10_ENCODER
+    return vpx_codec_enc_config_default(&vpx_codec_vp10_cx_algo, cfg, usage);
+#else
+    return VPX_CODEC_INCAPABLE;
+#endif
+  }
+};
+
+const libvpx_test::VP10CodecFactory kVP10;
+
+#define VP10_INSTANTIATE_TEST_CASE(test, ...)\
+  INSTANTIATE_TEST_CASE_P(VP10, test, \
+      ::testing::Combine( \
+          ::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
+               &libvpx_test::kVP10)), \
+          __VA_ARGS__))
+#else
+#define VP10_INSTANTIATE_TEST_CASE(test, ...)
+#endif  // CONFIG_VP10

 }  // namespace libvpx_test
-
 #endif  // TEST_CODEC_FACTORY_H_
--- a/test/consistency_test.cc
+++ b/test/consistency_test.cc
@@ -8,10 +8,11 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-
-#include <string.h>
 #include <limits.h>
 #include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vpx_config.h"
 #if CONFIG_VP9_ENCODER
@@ -22,12 +23,11 @@
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "vp9/encoder/vp9_ssim.h"
+#include "vpx_dsp/ssim.h"
 #include "vpx_mem/vpx_mem.h"

 extern "C"
-double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch,
+double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch,
                            uint8_t *img2, int img2_pitch,
                            int width, int height,
                            Ssimv *sv2, Metrics *m,
@@ -65,7 +65,7 @@ class ConsistencyTestBase : public ::testing::Test {
    vpx_free(reference_data_[1]);
    reference_data_[1] = NULL;

-    delete ssim_array_;
+    delete[] ssim_array_;
  }

  virtual void TearDown() {
@@ -144,7 +144,7 @@ class ConsistencyVP9Test
  double CheckConsistency(int frame) {
    EXPECT_LT(frame, 2)<< "Frame to check has to be less than 2.";
    return
-        vp9_get_ssim_metrics(source_data_[frame], source_stride_,
+        vpx_get_ssim_metrics(source_data_[frame], source_stride_,
                             reference_data_[frame], reference_stride_,
                             width_, height_, ssim_array_, &metrics_, 1);
  }
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -9,16 +9,20 @@
 */

 #include <string.h>
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
+
 #include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_filter.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"

@@ -37,9 +41,14 @@ struct ConvolveFunctions {
                    ConvolveFunc h8, ConvolveFunc h8_avg,
                    ConvolveFunc v8, ConvolveFunc v8_avg,
                    ConvolveFunc hv8, ConvolveFunc hv8_avg,
+                    ConvolveFunc sh8, ConvolveFunc sh8_avg,
+                    ConvolveFunc sv8, ConvolveFunc sv8_avg,
+                    ConvolveFunc shv8, ConvolveFunc shv8_avg,
                    int bd)
      : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
-        v8_avg_(v8_avg), hv8_avg_(hv8_avg), use_highbd_(bd) {}
+        v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8),
+        sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg),
+        use_highbd_(bd) {}

  ConvolveFunc copy_;
  ConvolveFunc avg_;
@@ -49,6 +58,12 @@ struct ConvolveFunctions {
  ConvolveFunc h8_avg_;
  ConvolveFunc v8_avg_;
  ConvolveFunc hv8_avg_;
+  ConvolveFunc sh8_;        // scaled horiz
+  ConvolveFunc sv8_;        // scaled vert
+  ConvolveFunc shv8_;       // scaled horiz/vert
+  ConvolveFunc sh8_avg_;    // scaled avg horiz
+  ConvolveFunc sv8_avg_;    // scaled avg vert
+  ConvolveFunc shv8_avg_;   // scaled avg horiz/vert
  int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
 };

@@ -593,8 +608,8 @@ TEST_P(ConvolveTest, CopyHoriz) {
  DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};

  ASM_REGISTER_STATE_CHECK(
-      UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
-                Width(), Height()));
+      UUT_->sh8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
+                 Width(), Height()));

  CheckGuardBlocks();

@@ -611,8 +626,8 @@ TEST_P(ConvolveTest, CopyVert) {
  DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};

  ASM_REGISTER_STATE_CHECK(
-      UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
-                Width(), Height()));
+      UUT_->sv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
+                 Width(), Height()));

  CheckGuardBlocks();

@@ -629,8 +644,8 @@ TEST_P(ConvolveTest, Copy2D) {
  DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};

  ASM_REGISTER_STATE_CHECK(
-      UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
-                 Width(), Height()));
+      UUT_->shv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8,
+                  16, Width(), Height()));

  CheckGuardBlocks();

@@ -647,7 +662,7 @@ const int kNumFilters = 16;
 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
    const InterpKernel *filters =
-        vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
+        vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
    for (int i = 0; i < kNumFilters; i++) {
      const int p0 = filters[i][0] + filters[i][1];
      const int p1 = filters[i][2] + filters[i][3];
@@ -685,9 +700,7 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {

  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
    const InterpKernel *filters =
-        vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
-    const InterpKernel *const eighttap_smooth =
-        vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
+        vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];

    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
@@ -696,7 +709,7 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
                                   ref, kOutputStride,
                                   Width(), Height());

-        if (filters == eighttap_smooth || (filter_x && filter_y))
+        if (filter_x && filter_y)
          ASM_REGISTER_STATE_CHECK(
              UUT_->hv8_(in, kInputStride, out, kOutputStride,
                         filters[filter_x], 16, filters[filter_y], 16,
@@ -706,11 +719,16 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
              UUT_->v8_(in, kInputStride, out, kOutputStride,
                        kInvalidFilter, 16, filters[filter_y], 16,
                        Width(), Height()));
-        else
+        else if (filter_x)
          ASM_REGISTER_STATE_CHECK(
              UUT_->h8_(in, kInputStride, out, kOutputStride,
                        filters[filter_x], 16, kInvalidFilter, 16,
                        Width(), Height()));
+        else
+          ASM_REGISTER_STATE_CHECK(
+              UUT_->copy_(in, kInputStride, out, kOutputStride,
+                          kInvalidFilter, 0, kInvalidFilter, 0,
+                          Width(), Height()));

        CheckGuardBlocks();

@@ -764,9 +782,7 @@ TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {

  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
    const InterpKernel *filters =
-        vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
-    const InterpKernel *const eighttap_smooth =
-        vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
+        vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];

    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
@@ -775,7 +791,7 @@ TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
                                           ref, kOutputStride,
                                           Width(), Height());

-        if (filters == eighttap_smooth || (filter_x && filter_y))
+        if (filter_x && filter_y)
          ASM_REGISTER_STATE_CHECK(
              UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
                             filters[filter_x], 16, filters[filter_y], 16,
@@ -783,13 +799,18 @@ TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
        else if (filter_y)
          ASM_REGISTER_STATE_CHECK(
              UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
-                            filters[filter_x], 16, filters[filter_y], 16,
+                            kInvalidFilter, 16, filters[filter_y], 16,
+                            Width(), Height()));
+        else if (filter_x)
+          ASM_REGISTER_STATE_CHECK(
+              UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
+                            filters[filter_x], 16, kInvalidFilter, 16,
                            Width(), Height()));
        else
          ASM_REGISTER_STATE_CHECK(
-              UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
-                            filters[filter_x], 16, filters[filter_y], 16,
-                            Width(), Height()));
+              UUT_->avg_(in, kInputStride, out, kOutputStride,
+                          kInvalidFilter, 0, kInvalidFilter, 0,
+                          Width(), Height()));

        CheckGuardBlocks();

@@ -863,16 +884,14 @@ TEST_P(ConvolveTest, FilterExtremes) {

      for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
        const InterpKernel *filters =
-            vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
-        const InterpKernel *const eighttap_smooth =
-            vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
+            vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
        for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
          for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
            wrapper_filter_block2d_8_c(in, kInputStride,
                                       filters[filter_x], filters[filter_y],
                                       ref, kOutputStride,
                                       Width(), Height());
-            if (filters == eighttap_smooth || (filter_x && filter_y))
+            if (filter_x && filter_y)
              ASM_REGISTER_STATE_CHECK(
                  UUT_->hv8_(in, kInputStride, out, kOutputStride,
                             filters[filter_x], 16, filters[filter_y], 16,
@@ -882,11 +901,16 @@ TEST_P(ConvolveTest, FilterExtremes) {
                  UUT_->v8_(in, kInputStride, out, kOutputStride,
                            kInvalidFilter, 16, filters[filter_y], 16,
                            Width(), Height()));
-            else
+            else if (filter_x)
              ASM_REGISTER_STATE_CHECK(
                  UUT_->h8_(in, kInputStride, out, kOutputStride,
                            filters[filter_x], 16, kInvalidFilter, 16,
                            Width(), Height()));
+            else
+              ASM_REGISTER_STATE_CHECK(
+                  UUT_->copy_(in, kInputStride, out, kOutputStride,
+                              kInvalidFilter, 0, kInvalidFilter, 0,
+                              Width(), Height()));

            for (int y = 0; y < Height(); ++y)
              for (int x = 0; x < Width(); ++x)
@@ -902,132 +926,22 @@ TEST_P(ConvolveTest, FilterExtremes) {
  }
 }

-DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
-    { 0,   0,   0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0, 128},
-    { 0,   0,   0, 128},
-    { 0,   0, 128},
-    { 0, 128},
-    { 128},
-    { 0,   0,   0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0, 128},
-    { 0,   0,   0, 128},
-    { 0,   0, 128},
-    { 0, 128},
-    { 128}
-};
-
-/* This test exercises the horizontal and vertical filter functions. */
-TEST_P(ConvolveTest, ChangeFilterWorks) {
-  uint8_t* const in = input();
-  uint8_t* const out = output();
-
-  /* Assume that the first input sample is at the 8/16th position. */
-  const int kInitialSubPelOffset = 8;
-
-  /* Filters are 8-tap, so the first filter tap will be applied to the pixel
-   * at position -3 with respect to the current filtering position. Since
-   * kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,
-   * which is non-zero only in the last tap. So, applying the filter at the
-   * current input position will result in an output equal to the pixel at
-   * offset +4 (-3 + 7) with respect to the current filtering position.
-   */
-  const int kPixelSelected = 4;
-
-  /* Assume that each output pixel requires us to step on by 17/16th pixels in
-   * the input.
-   */
-  const int kInputPixelStep = 17;
-
-  /* The filters are setup in such a way that the expected output produces
-   * sets of 8 identical output samples. As the filter position moves to the
-   * next 1/16th pixel position the only active (=128) filter tap moves one
-   * position to the left, resulting in the same input pixel being replicated
-   * in to the output for 8 consecutive samples. After each set of 8 positions
-   * the filters select a different input pixel. kFilterPeriodAdjust below
-   * computes which input pixel is written to the output for a specified
-   * x or y position.
-   */
-
-  /* Test the horizontal filter. */
-  ASM_REGISTER_STATE_CHECK(
-      UUT_->h8_(in, kInputStride, out, kOutputStride,
-                kChangeFilters[kInitialSubPelOffset],
-                kInputPixelStep, NULL, 0, Width(), Height()));
-
-  for (int x = 0; x < Width(); ++x) {
-    const int kFilterPeriodAdjust = (x >> 3) << 3;
-    const int ref_x =
-        kPixelSelected + ((kInitialSubPelOffset
-            + kFilterPeriodAdjust * kInputPixelStep)
-                          >> SUBPEL_BITS);
-    ASSERT_EQ(lookup(in, ref_x), lookup(out, x))
-        << "x == " << x << "width = " << Width();
-  }
-
-  /* Test the vertical filter. */
-  ASM_REGISTER_STATE_CHECK(
-      UUT_->v8_(in, kInputStride, out, kOutputStride,
-                NULL, 0, kChangeFilters[kInitialSubPelOffset],
-                kInputPixelStep, Width(), Height()));
-
-  for (int y = 0; y < Height(); ++y) {
-    const int kFilterPeriodAdjust = (y >> 3) << 3;
-    const int ref_y =
-        kPixelSelected + ((kInitialSubPelOffset
-            + kFilterPeriodAdjust * kInputPixelStep)
-                          >> SUBPEL_BITS);
-    ASSERT_EQ(lookup(in, ref_y * kInputStride), lookup(out, y * kInputStride))
-        << "y == " << y;
-  }
-
-  /* Test the horizontal and vertical filters in combination. */
-  ASM_REGISTER_STATE_CHECK(
-      UUT_->hv8_(in, kInputStride, out, kOutputStride,
-                 kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
-                 kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
-                 Width(), Height()));
-
-  for (int y = 0; y < Height(); ++y) {
-    const int kFilterPeriodAdjustY = (y >> 3) << 3;
-    const int ref_y =
-        kPixelSelected + ((kInitialSubPelOffset
-            + kFilterPeriodAdjustY * kInputPixelStep)
-                          >> SUBPEL_BITS);
-    for (int x = 0; x < Width(); ++x) {
-      const int kFilterPeriodAdjustX = (x >> 3) << 3;
-      const int ref_x =
-          kPixelSelected + ((kInitialSubPelOffset
-              + kFilterPeriodAdjustX * kInputPixelStep)
-                            >> SUBPEL_BITS);
-
-      ASSERT_EQ(lookup(in, ref_y * kInputStride + ref_x),
-                lookup(out, y * kOutputStride + x))
-          << "x == " << x << ", y == " << y;
-    }
-  }
-}
-
 /* This test exercises that enough rows and columns are filtered with every
   possible initial fractional positions and scaling steps. */
 TEST_P(ConvolveTest, CheckScalingFiltering) {
  uint8_t* const in = input();
  uint8_t* const out = output();
-  const InterpKernel *const eighttap = vp9_get_interp_kernel(EIGHTTAP);
+  const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP];

  SetConstantInput(127);

  for (int frac = 0; frac < 16; ++frac) {
    for (int step = 1; step <= 32; ++step) {
      /* Test the horizontal and vertical filters in combination. */
-      ASM_REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
-                                          eighttap[frac], step,
-                                          eighttap[frac], step,
-                                          Width(), Height()));
+      ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
+                                           eighttap[frac], step,
+                                           eighttap[frac], step,
+                                           Width(), Height()));

      CheckGuardBlocks();

@@ -1054,7 +968,7 @@ void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
-  vp9_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
+  vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
                                  filter_x_stride, filter_y, filter_y_stride,
                                  w, h, 8);
 }
@@ -1066,7 +980,7 @@ void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                     const int16_t *filter_y,
                                     int filter_y_stride,
                                     int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
                                      filter_x, filter_x_stride,
                                      filter_y, filter_y_stride, w, h, 8);
 }
@@ -1078,7 +992,7 @@ void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                const int16_t *filter_y,
                                int filter_y_stride,
                                int w, int h) {
-  vp9_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
                                 filter_x, filter_x_stride,
                                 filter_y, filter_y_stride, w, h, 8);
 }
@@ -1090,7 +1004,7 @@ void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                    const int16_t *filter_y,
                                    int filter_y_stride,
                                    int w, int h) {
-  vp9_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
                                     filter_x, filter_x_stride,
                                     filter_y, filter_y_stride, w, h, 8);
 }
@@ -1102,7 +1016,7 @@ void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                           const int16_t *filter_y,
                           int filter_y_stride,
                           int w, int h) {
-  vp9_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
                            filter_x, filter_x_stride,
                            filter_y, filter_y_stride, w, h, 8);
 }
@@ -1114,7 +1028,7 @@ void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_y,
                               int filter_y_stride,
                               int w, int h) {
-  vp9_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
                                filter_x, filter_x_stride,
                                filter_y, filter_y_stride, w, h, 8);
 }
@@ -1126,7 +1040,7 @@ void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
-  vp9_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 10);
 }
@@ -1138,7 +1052,7 @@ void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                      const int16_t *filter_y,
                                      int filter_y_stride,
                                      int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
                                      filter_x, filter_x_stride,
                                      filter_y, filter_y_stride, w, h, 10);
 }
@@ -1150,7 +1064,7 @@ void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
-  vp9_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
                                 filter_x, filter_x_stride,
                                 filter_y, filter_y_stride, w, h, 10);
 }
@@ -1162,7 +1076,7 @@ void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                     const int16_t *filter_y,
                                     int filter_y_stride,
                                     int w, int h) {
-  vp9_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
                                     filter_x, filter_x_stride,
                                     filter_y, filter_y_stride, w, h, 10);
 }
@@ -1174,7 +1088,7 @@ void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                            const int16_t *filter_y,
                            int filter_y_stride,
                            int w, int h) {
-  vp9_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
                            filter_x, filter_x_stride,
                            filter_y, filter_y_stride, w, h, 10);
 }
@@ -1186,7 +1100,7 @@ void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                const int16_t *filter_y,
                                int filter_y_stride,
                                int w, int h) {
-  vp9_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
                                filter_x, filter_x_stride,
                                filter_y, filter_y_stride, w, h, 10);
 }
@@ -1198,7 +1112,7 @@ void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
-  vp9_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 12);
 }
@@ -1210,7 +1124,7 @@ void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                      const int16_t *filter_y,
                                      int filter_y_stride,
                                      int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
                                      filter_x, filter_x_stride,
                                      filter_y, filter_y_stride, w, h, 12);
 }
@@ -1222,7 +1136,7 @@ void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
-  vp9_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
                                 filter_x, filter_x_stride,
                                 filter_y, filter_y_stride, w, h, 12);
 }
@@ -1234,7 +1148,7 @@ void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                     const int16_t *filter_y,
                                     int filter_y_stride,
                                     int w, int h) {
-  vp9_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
                                     filter_x, filter_x_stride,
                                     filter_y, filter_y_stride, w, h, 12);
 }
@@ -1246,7 +1160,7 @@ void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                            const int16_t *filter_y,
                            int filter_y_stride,
                            int w, int h) {
-  vp9_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
                            filter_x, filter_x_stride,
                            filter_y, filter_y_stride, w, h, 12);
 }
@@ -1258,7 +1172,7 @@ void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                const int16_t *filter_y,
                                int filter_y_stride,
                                int w, int h) {
-  vp9_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
                                filter_x, filter_x_stride,
                                filter_y, filter_y_stride, w, h, 12);
 }
@@ -1271,7 +1185,7 @@ void wrap_convolve_copy_c_8(const uint8_t *src, ptrdiff_t src_stride,
                            const int16_t *filter_y,
                            int filter_y_stride,
                            int w, int h) {
-  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
                             filter_x, filter_x_stride,
                             filter_y, filter_y_stride, w, h, 8);
 }
@@ -1283,7 +1197,7 @@ void wrap_convolve_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
                           const int16_t *filter_y,
                           int filter_y_stride,
                           int w, int h) {
-  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
                            filter_x, filter_x_stride,
                            filter_y, filter_y_stride, w, h, 8);
 }
@@ -1295,7 +1209,7 @@ void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 8);
 }
@@ -1307,7 +1221,7 @@ void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                   filter_x, filter_x_stride,
                                   filter_y, filter_y_stride, w, h, 8);
 }
@@ -1319,7 +1233,7 @@ void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 8);
 }
@@ -1331,7 +1245,7 @@ void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
-  vp9_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 8);
 }
@@ -1343,7 +1257,7 @@ void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride,
                        const int16_t *filter_y,
                        int filter_y_stride,
                        int w, int h) {
-  vp9_highbd_convolve8_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
                         filter_x, filter_x_stride,
                         filter_y, filter_y_stride, w, h, 8);
 }
@@ -1355,7 +1269,7 @@ void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
                            const int16_t *filter_y,
                            int filter_y_stride,
                            int w, int h) {
-  vp9_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
                             filter_x, filter_x_stride,
                             filter_y, filter_y_stride, w, h, 8);
 }
@@ -1367,7 +1281,7 @@ void wrap_convolve_copy_c_10(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
                             filter_x, filter_x_stride,
                             filter_y, filter_y_stride, w, h, 10);
 }
@@ -1379,7 +1293,7 @@ void wrap_convolve_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
                            const int16_t *filter_y,
                            int filter_y_stride,
                            int w, int h) {
-  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
                            filter_x, filter_x_stride,
                            filter_y, filter_y_stride, w, h, 10);
 }
@@ -1391,7 +1305,7 @@ void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_y,
                               int filter_y_stride,
                               int w, int h) {
-  vp9_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 10);
 }
@@ -1403,7 +1317,7 @@ void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_y,
                                   int filter_y_stride,
                                   int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                   filter_x, filter_x_stride,
                                   filter_y, filter_y_stride, w, h, 10);
 }
@@ -1415,7 +1329,7 @@ void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 10);
 }
@@ -1427,7 +1341,7 @@ void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
-  vp9_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 10);
 }
@@ -1439,7 +1353,7 @@ void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride,
                         const int16_t *filter_y,
                         int filter_y_stride,
                         int w, int h) {
-  vp9_highbd_convolve8_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
                         filter_x, filter_x_stride,
                         filter_y, filter_y_stride, w, h, 10);
 }
@@ -1451,7 +1365,7 @@ void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
                             filter_x, filter_x_stride,
                             filter_y, filter_y_stride, w, h, 10);
 }
@@ -1463,7 +1377,7 @@ void wrap_convolve_copy_c_12(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
                             filter_x, filter_x_stride,
                             filter_y, filter_y_stride, w, h, 12);
 }
@@ -1475,7 +1389,7 @@ void wrap_convolve_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
                            const int16_t *filter_y,
                            int filter_y_stride,
                            int w, int h) {
-  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
                            filter_x, filter_x_stride,
                            filter_y, filter_y_stride, w, h, 12);
 }
@@ -1487,7 +1401,7 @@ void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_y,
                               int filter_y_stride,
                               int w, int h) {
-  vp9_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 12);
 }
@@ -1499,7 +1413,7 @@ void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_y,
                                   int filter_y_stride,
                                   int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                   filter_x, filter_x_stride,
                                   filter_y, filter_y_stride, w, h, 12);
 }
@@ -1511,7 +1425,7 @@ void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 12);
 }
@@ -1523,7 +1437,7 @@ void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
-  vp9_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 12);
 }
@@ -1535,7 +1449,7 @@ void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride,
                         const int16_t *filter_y,
                         int filter_y_stride,
                         int w, int h) {
-  vp9_highbd_convolve8_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
                         filter_x, filter_x_stride,
                         filter_y, filter_y_stride, w, h, 12);
 }
@@ -1547,7 +1461,7 @@ void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
                             filter_x, filter_x_stride,
                             filter_y, filter_y_stride, w, h, 12);
 }
@@ -1556,6 +1470,9 @@ const ConvolveFunctions convolve8_c(
    wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
    wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
    wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
+    wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
+    wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
+    wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
    wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
 INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_c),
@@ -1575,6 +1492,9 @@ const ConvolveFunctions convolve10_c(
    wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
+    wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
+    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
+    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
    wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
 INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve10_c),
@@ -1594,6 +1514,9 @@ const ConvolveFunctions convolve12_c(
    wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
+    wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
+    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
+    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
    wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
 INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve12_c),
@@ -1613,10 +1536,13 @@ INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
 #else

 const ConvolveFunctions convolve8_c(
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
-    vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
-    vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
-    vp9_convolve8_c, vp9_convolve8_avg_c, 0);
+    vpx_convolve_copy_c, vpx_convolve_avg_c,
+    vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c,
+    vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c,
+    vpx_convolve8_c, vpx_convolve8_avg_c,
+    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
+    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
+    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);

 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_c),
@@ -1640,16 +1566,25 @@ const ConvolveFunctions convolve8_sse2(
    wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
+    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
+    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
+    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
 const ConvolveFunctions convolve10_sse2(
    wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
+    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
+    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
+    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
 const ConvolveFunctions convolve12_sse2(
    wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
+    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
+    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
+    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_sse2),
@@ -1693,10 +1628,17 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
    make_tuple(64, 64, &convolve12_sse2)));
 #else
 const ConvolveFunctions convolve8_sse2(
-    vp9_convolve_copy_sse2, vp9_convolve_avg_sse2,
-    vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
-    vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
-    vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0);
+#if CONFIG_USE_X86INC
+    vpx_convolve_copy_sse2, vpx_convolve_avg_sse2,
+#else
+    vpx_convolve_copy_c, vpx_convolve_avg_c,
+#endif  // CONFIG_USE_X86INC
+    vpx_convolve8_horiz_sse2, vpx_convolve8_avg_horiz_sse2,
+    vpx_convolve8_vert_sse2, vpx_convolve8_avg_vert_sse2,
+    vpx_convolve8_sse2, vpx_convolve8_avg_sse2,
+    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
+    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
+    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);

 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_sse2),
@@ -1717,10 +1659,13 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(

 #if HAVE_SSSE3
 const ConvolveFunctions convolve8_ssse3(
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
-    vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
-    vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
-    vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0);
+    vpx_convolve_copy_c, vpx_convolve_avg_c,
+    vpx_convolve8_horiz_ssse3, vpx_convolve8_avg_horiz_ssse3,
+    vpx_convolve8_vert_ssse3, vpx_convolve8_avg_vert_ssse3,
+    vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3,
+    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
+    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
+    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);

 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_ssse3),
@@ -1740,10 +1685,13 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(

 #if HAVE_AVX2 && HAVE_SSSE3
 const ConvolveFunctions convolve8_avx2(
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
-    vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
-    vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
-    vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0);
+    vpx_convolve_copy_c, vpx_convolve_avg_c,
+    vpx_convolve8_horiz_avx2, vpx_convolve8_avg_horiz_ssse3,
+    vpx_convolve8_vert_avx2, vpx_convolve8_avg_vert_ssse3,
+    vpx_convolve8_avx2, vpx_convolve8_avg_ssse3,
+    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
+    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
+    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);

 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_avx2),
@@ -1764,16 +1712,22 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
 #if HAVE_NEON
 #if HAVE_NEON_ASM
 const ConvolveFunctions convolve8_neon(
-    vp9_convolve_copy_neon, vp9_convolve_avg_neon,
-    vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
-    vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
-    vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
+    vpx_convolve_copy_neon, vpx_convolve_avg_neon,
+    vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
+    vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
+    vpx_convolve8_neon, vpx_convolve8_avg_neon,
+    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
+    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
+    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
 #else  // HAVE_NEON
 const ConvolveFunctions convolve8_neon(
-    vp9_convolve_copy_neon, vp9_convolve_avg_neon,
-    vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
-    vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
-    vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
+    vpx_convolve_copy_neon, vpx_convolve_avg_neon,
+    vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
+    vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
+    vpx_convolve8_neon, vpx_convolve8_avg_neon,
+    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
+    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
+    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
 #endif  // HAVE_NEON_ASM

 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
@@ -1794,10 +1748,13 @@ INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(

 #if HAVE_DSPR2
 const ConvolveFunctions convolve8_dspr2(
-    vp9_convolve_copy_dspr2, vp9_convolve_avg_dspr2,
-    vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2,
-    vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2,
-    vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0);
+    vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2,
+    vpx_convolve8_horiz_dspr2, vpx_convolve8_avg_horiz_dspr2,
+    vpx_convolve8_vert_dspr2, vpx_convolve8_avg_vert_dspr2,
+    vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2,
+    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
+    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
+    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);

 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_dspr2),
@@ -1817,10 +1774,13 @@ INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(

 #if HAVE_MSA
 const ConvolveFunctions convolve8_msa(
-    vp9_convolve_copy_msa, vp9_convolve_avg_msa,
-    vp9_convolve8_horiz_msa, vp9_convolve8_avg_horiz_msa,
-    vp9_convolve8_vert_msa, vp9_convolve8_avg_vert_msa,
-    vp9_convolve8_msa, vp9_convolve8_avg_msa, 0);
+    vpx_convolve_copy_msa, vpx_convolve_avg_msa,
+    vpx_convolve8_horiz_msa, vpx_convolve8_avg_horiz_msa,
+    vpx_convolve8_vert_msa, vpx_convolve8_avg_vert_msa,
+    vpx_convolve8_msa, vpx_convolve8_avg_msa,
+    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
+    vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
+    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);

 INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_msa),
--- a/test/cpu_speed_test.cc
+++ b/test/cpu_speed_test.cc
@@ -140,4 +140,9 @@ VP9_INSTANTIATE_TEST_CASE(
    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
                      ::libvpx_test::kRealTime),
    ::testing::Range(0, 9));
+
+VP10_INSTANTIATE_TEST_CASE(
+    CpuSpeedTest,
+    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
+    ::testing::Range(0, 3));
 }  // namespace
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -761,6 +761,8 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
    first_drop_ = 0;
    bits_total_ = 0;
    duration_ = 0.0;
+    mismatch_psnr_ = 0.0;
+    mismatch_nframes_ = 0;
  }
  virtual void BeginPassHook(unsigned int /*pass*/) {
  }
@@ -781,6 +783,7 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
      encoder->Control(VP8E_SET_CPUUSED, speed_setting_);
      encoder->Control(VP9E_SET_TILE_COLUMNS, 0);
      encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 300);
+      encoder->Control(VP9E_SET_TILE_COLUMNS, (cfg_.g_threads >> 1));
    }
    const vpx_rational_t tb = video->timebase();
    timebase_ = static_cast<double>(tb.num) / tb.den;
@@ -816,6 +819,18 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
      file_datarate_ = file_size_in_kb / duration_;
    }
  }
+
+  virtual void MismatchHook(const vpx_image_t *img1,
+                            const vpx_image_t *img2) {
+    double mismatch_psnr = compute_psnr(img1, img2);
+    mismatch_psnr_ += mismatch_psnr;
+    ++mismatch_nframes_;
+  }
+
+  unsigned int GetMismatchFrames() {
+    return mismatch_nframes_;
+  }
+
  vpx_codec_pts_t last_pts_;
  int64_t bits_in_buffer_model_;
  double timebase_;
@@ -828,6 +843,8 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
  size_t bits_in_last_frame_;
  vpx_svc_extra_cfg_t svc_params_;
  int speed_setting_;
+  double mismatch_psnr_;
+  int mismatch_nframes_;
 };
 static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
    const vpx_svc_extra_cfg_t *svc_params,
@@ -867,7 +884,7 @@ static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
 }

 // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
-// 3 temporal layers.
+// 3 temporal layers. Run CIF clip with 1 thread.
 TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
@@ -882,6 +899,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) {
  cfg_.ts_rate_decimator[1] = 2;
  cfg_.ts_rate_decimator[2] = 1;
  cfg_.g_error_resilient = 1;
+  cfg_.g_threads = 1;
  cfg_.temporal_layering_mode = 3;
  svc_params_.scaling_factor_num[0] = 144;
  svc_params_.scaling_factor_den[0] = 288;
@@ -905,9 +923,50 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) {
            << " The datarate for the file exceeds the target by too much!";
    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
        << " The datarate for the file is lower than the target by too much!";
+    EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
  }
 }

+// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
+// 3 temporal layers. Run HD clip with 4 threads.
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc4threads) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.ss_number_layers = 2;
+  cfg_.ts_number_layers = 3;
+  cfg_.ts_rate_decimator[0] = 4;
+  cfg_.ts_rate_decimator[1] = 2;
+  cfg_.ts_rate_decimator[2] = 1;
+  cfg_.g_error_resilient = 1;
+  cfg_.g_threads = 4;
+  cfg_.temporal_layering_mode = 3;
+  svc_params_.scaling_factor_num[0] = 144;
+  svc_params_.scaling_factor_den[0] = 288;
+  svc_params_.scaling_factor_num[1] = 288;
+  svc_params_.scaling_factor_den[1] = 288;
+  // TODO(wonkap/marpan): No frame drop for now, we need to implement correct
+  // frame dropping for SVC.
+  cfg_.rc_dropframe_thresh = 0;
+  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720,
+                                       30, 1, 0, 300);
+  cfg_.rc_target_bitrate = 800;
+  ResetModel();
+  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+      cfg_.ts_number_layers, cfg_.temporal_layering_mode,
+      cfg_.rc_target_bitrate);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85)
+          << " The datarate for the file exceeds the target by too much!";
+  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
+      << " The datarate for the file is lower than the target by too much!";
+  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+}
+
 VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES);
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
                          ::testing::Values(::libvpx_test::kOnePassGood,
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -13,12 +13,13 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
@@ -39,30 +40,6 @@ static int round(double x) {
 #endif

 const int kNumCoeffs = 256;
-const double PI = 3.1415926535898;
-void reference2_16x16_idct_2d(double *input, double *output) {
-  double x;
-  for (int l = 0; l < 16; ++l) {
-    for (int k = 0; k < 16; ++k) {
-      double s = 0;
-      for (int i = 0; i < 16; ++i) {
-        for (int j = 0; j < 16; ++j) {
-          x = cos(PI * j * (l + 0.5) / 16.0) *
-              cos(PI * i * (k + 0.5) / 16.0) *
-              input[i * 16 + j] / 256;
-          if (i != 0)
-            x *= sqrt(2.0);
-          if (j != 0)
-            x *= sqrt(2.0);
-          s += x;
-        }
-      }
-      output[k*16+l] = s;
-    }
-  }
-}
-
-
 const double C1 = 0.995184726672197;
 const double C2 = 0.98078528040323;
 const double C3 = 0.956940335732209;
@@ -271,12 +248,12 @@ typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>

 void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
                   int /*tx_type*/) {
-  vp9_fdct16x16_c(in, out, stride);
+  vpx_fdct16x16_c(in, out, stride);
 }

 void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
                   int /*tx_type*/) {
-  vp9_idct16x16_256_add_c(in, dest, stride);
+  vpx_idct16x16_256_add_c(in, dest, stride);
 }

 void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
@@ -291,11 +268,11 @@ void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_256_add_c(in, out, stride, 10);
+  vpx_highbd_idct16x16_256_add_c(in, out, stride, 10);
 }

 void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_256_add_c(in, out, stride, 12);
+  vpx_highbd_idct16x16_256_add_c(in, out, stride, 12);
 }

 void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
@@ -317,28 +294,28 @@ void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 }

 void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_10_add_c(in, out, stride, 10);
+  vpx_highbd_idct16x16_10_add_c(in, out, stride, 10);
 }

 void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_10_add_c(in, out, stride, 12);
+  vpx_highbd_idct16x16_10_add_c(in, out, stride, 12);
 }

 #if HAVE_SSE2
 void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
 }

 void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
 }

 void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
 }

 void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
 }
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -821,14 +798,14 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
+        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
@@ -861,16 +838,16 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_neon, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8)));
 #endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_sse2,
-                   &vp9_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct16x16_sse2,
+                   &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
@@ -888,27 +865,19 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct16x16_sse2,
+        make_tuple(&vpx_highbd_fdct16x16_sse2,
                   &idct16x16_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct16x16_c,
+        make_tuple(&vpx_highbd_fdct16x16_c,
                   &idct16x16_256_add_10_sse2, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct16x16_sse2,
+        make_tuple(&vpx_highbd_fdct16x16_sse2,
                   &idct16x16_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct16x16_c,
+        make_tuple(&vpx_highbd_fdct16x16_c,
                   &idct16x16_256_add_12_sse2, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct16x16_sse2,
-                   &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct16x16_sse2,
+                   &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 1, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 2, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 3, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 1, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 2, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 3, VPX_BITS_12),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
@@ -933,8 +902,8 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    MSA, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_msa,
-                   &vp9_idct16x16_256_add_msa, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct16x16_msa,
+                   &vpx_idct16x16_256_add_msa, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    MSA, Trans16x16HT,
    ::testing::Values(
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -13,13 +13,14 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
@@ -81,15 +82,15 @@ typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 8);
+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 8);
 }

 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 10);
+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
 }

 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 12);
+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH

@@ -194,7 +195,7 @@ TEST_P(Trans32x32Test, CoeffCheck) {
      input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);

    const int stride = 32;
-    vp9_fdct32x32_c(input_block, output_ref_block, stride);
+    vpx_fdct32x32_c(input_block, output_ref_block, stride);
    ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));

    if (version_ == 0) {
@@ -231,7 +232,7 @@ TEST_P(Trans32x32Test, MemCheck) {
    }

    const int stride = 32;
-    vp9_fdct32x32_c(input_extreme_block, output_ref_block, stride);
+    vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
    ASM_REGISTER_STATE_CHECK(
        fwd_txfm_(input_extreme_block, output_block, stride));

@@ -314,61 +315,61 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct32x32_c,
+        make_tuple(&vpx_highbd_fdct32x32_c,
                   &idct32x32_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct32x32_rd_c,
+        make_tuple(&vpx_highbd_fdct32x32_rd_c,
                   &idct32x32_10, 1, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct32x32_c,
+        make_tuple(&vpx_highbd_fdct32x32_c,
                   &idct32x32_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct32x32_rd_c,
+        make_tuple(&vpx_highbd_fdct32x32_rd_c,
                   &idct32x32_12, 1, VPX_BITS_12),
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_c,
-                   &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_c,
+                   &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_c,
-                   &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_c,
+                   &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_c,
-                   &vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_c,
+                   &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
 #endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_sse2,
-                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_sse2,
-                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+        make_tuple(&vpx_fdct32x32_sse2,
+                   &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_sse2,
+                   &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
+        make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
                   VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
+        make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
+        make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
                   VPX_BITS_12),
-        make_tuple(&vp9_fdct32x32_sse2, &vp9_idct32x32_1024_add_c, 0,
+        make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
                   VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_sse2, &vp9_idct32x32_1024_add_c, 1,
+        make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
                   VPX_BITS_8)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -376,19 +377,19 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    AVX2, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_avx2,
-                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_avx2,
-                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+        make_tuple(&vpx_fdct32x32_avx2,
+                   &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_avx2,
+                   &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
 #endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    MSA, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_msa,
-                   &vp9_idct32x32_1024_add_msa, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_msa,
-                   &vp9_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
+        make_tuple(&vpx_fdct32x32_msa,
+                   &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_msa,
+                   &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/decode_api_test.cc
+++ b/test/decode_api_test.cc
@@ -7,10 +7,11 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
+
 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "test/ivf_video_source.h"
 #include "./vpx_config.h"
+#include "test/ivf_video_source.h"
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

@@ -25,6 +26,9 @@ TEST(DecodeAPI, InvalidParams) {
 #endif
 #if CONFIG_VP9_DECODER
    &vpx_codec_vp9_dx_algo,
+#endif
+#if CONFIG_VP10_DECODER
+    &vpx_codec_vp10_dx_algo,
 #endif
  };
  uint8_t buf[1] = {0};
@@ -129,8 +133,13 @@ TEST(DecodeAPI, Vp9InvalidDecode) {
  vpx_codec_ctx_t dec;
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0));
  const uint32_t frame_size = static_cast<uint32_t>(video.frame_size());
+#if CONFIG_VP9_HIGHBITDEPTH
  EXPECT_EQ(VPX_CODEC_MEM_ERROR,
            vpx_codec_decode(&dec, video.cxdata(), frame_size, NULL, 0));
+#else
+  EXPECT_EQ(VPX_CODEC_UNSUP_BITSTREAM,
+            vpx_codec_decode(&dec, video.cxdata(), frame_size, NULL, 0));
+#endif
  vpx_codec_iter_t iter = NULL;
  EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter));

--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -7,9 +7,11 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
 #include "test/register_state_check.h"
 #include "test/video_source.h"

--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -10,13 +10,14 @@

 #include <string>

+#include "third_party/googletest/src/include/gtest/gtest.h"
+
 #include "./vpx_config.h"
 #include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
 #include "test/decode_test_driver.h"
+#include "test/encode_test_driver.h"
 #include "test/register_state_check.h"
 #include "test/video_source.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"

 namespace libvpx_test {
 void Encoder::InitEncoder(VideoSource *video) {
@@ -41,6 +42,15 @@ void Encoder::InitEncoder(VideoSource *video) {
                               log2_tile_columns);
      ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
    } else
+#endif
+#if CONFIG_VP10_ENCODER
+    if (CodecInterface() == &vpx_codec_vp10_cx_algo) {
+      // Default to 1 tile column for VP10.
+      const int log2_tile_columns = 0;
+      res = vpx_codec_control_(&encoder_, VP9E_SET_TILE_COLUMNS,
+                               log2_tile_columns);
+      ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+    } else
 #endif
    {
 #if CONFIG_VP8_ENCODER
@@ -185,6 +195,7 @@ void EncoderTest::RunLoop(VideoSource *video) {

    video->Begin();
    encoder->InitEncoder(video);
+    ASSERT_FALSE(::testing::Test::HasFatalFailure());

    unsigned long dec_init_flags = 0;  // NOLINT
    // Use fragment decoder if encoder outputs partitions.
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -13,12 +13,13 @@
 #include <string>
 #include <vector>

-#include "./vpx_config.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "vpx/vpx_encoder.h"
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
+
+#include "./vpx_config.h"
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
 #include "vpx/vp8cx.h"
 #endif
+#include "vpx/vpx_encoder.h"

 namespace libvpx_test {

@@ -137,7 +138,7 @@ class Encoder {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
  void Control(int ctrl_id, vpx_active_map_t *arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -20,10 +20,11 @@ const int kMaxErrorFrames = 12;
 const int kMaxDroppableFrames = 12;

 class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+    public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, bool> {
 protected:
  ErrorResilienceTestLarge()
      : EncoderTest(GET_PARAM(0)),
+        svc_support_(GET_PARAM(2)),
        psnr_(0.0),
        nframes_(0),
        mismatch_psnr_(0.0),
@@ -193,6 +194,8 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
     pattern_switch_ = frame_switch;
   }

+  bool svc_support_;
+
 private:
  double psnr_;
  unsigned int nframes_;
@@ -302,6 +305,10 @@ TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
 // two layer temporal pattern. The base layer does not predict from the top
 // layer, so successful decoding is expected.
 TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) {
+  // This test doesn't run if SVC is not supported.
+  if (!svc_support_)
+    return;
+
  const vpx_rational timebase = { 33333333, 1000000000 };
  cfg_.g_timebase = timebase;
  cfg_.rc_target_bitrate = 500;
@@ -347,6 +354,10 @@ TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) {
 // for a two layer temporal pattern, where at some point in the
 // sequence, the LAST ref is not used anymore.
 TEST_P(ErrorResilienceTestLarge, 2LayersNoRefLast) {
+  // This test doesn't run if SVC is not supported.
+  if (!svc_support_)
+    return;
+
  const vpx_rational timebase = { 33333333, 1000000000 };
  cfg_.g_timebase = timebase;
  cfg_.rc_target_bitrate = 500;
@@ -579,8 +590,13 @@ TEST_P(ErrorResilienceTestLargeCodecControls, CodecControl3TemporalLayers) {
  }
 }

-VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
+                          ::testing::Values(true));
 VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLargeCodecControls,
                          ONE_PASS_TEST_MODES);
-VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
+                          ::testing::Values(true));
+// SVC-related tests don't run for VP10 since SVC is not supported.
+VP10_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
+                           ::testing::Values(false));
 }  // namespace
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -13,12 +13,13 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
@@ -40,7 +41,7 @@ typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param;

 void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
                 int tx_type) {
-  vp9_fdct4x4_c(in, out, stride);
+  vpx_fdct4x4_c(in, out, stride);
 }

 void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
@@ -54,11 +55,11 @@ void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct4x4_16_add_c(in, out, stride, 10);
+  vpx_highbd_idct4x4_16_add_c(in, out, stride, 10);
 }

 void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct4x4_16_add_c(in, out, stride, 12);
+  vpx_highbd_idct4x4_16_add_c(in, out, stride, 12);
 }

 void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
@@ -70,20 +71,20 @@ void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 }

 void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_iwht4x4_16_add_c(in, out, stride, 10);
+  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 10);
 }

 void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_iwht4x4_16_add_c(in, out, stride, 12);
+  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 12);
 }

 #if HAVE_SSE2
 void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
 }

 void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
 }
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -418,14 +419,14 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
+        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
@@ -460,20 +461,20 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4WHT,
    ::testing::Values(
-        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_neon, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct4x4_c,
+                   &vpx_idct4x4_16_add_neon, 0, VPX_BITS_8)));
 #endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -491,15 +492,23 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    MMX, Trans4x4WHT,
    ::testing::Values(
-        make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fwht4x4_mmx, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+#endif
+
+#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && \
+    !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans4x4WHT,
+    ::testing::Values(
+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));
 #endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_sse2,
-                   &vp9_idct4x4_16_add_sse2, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct4x4_sse2,
+                   &vpx_idct4x4_16_add_sse2, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4HT,
    ::testing::Values(
@@ -513,24 +522,16 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct4x4_c,    &idct4x4_10_sse2, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct4x4_c,    &idct4x4_12_sse2, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct4x4_sse2,      &vp9_idct4x4_16_add_c, 0,
+        make_tuple(&vpx_highbd_fdct4x4_c,    &idct4x4_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct4x4_c,    &idct4x4_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vpx_fdct4x4_sse2,      &vpx_idct4x4_16_add_c, 0,
                   VPX_BITS_8)));

 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4HT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 1, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 2, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 3, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 1, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 2, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 3, VPX_BITS_12),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
@@ -541,7 +542,7 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    MSA, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_msa, &vp9_idct4x4_16_add_msa, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct4x4_msa, &vpx_idct4x4_16_add_msa, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    MSA, Trans4x4HT,
    ::testing::Values(
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -13,12 +13,13 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
@@ -82,7 +83,7 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],


 void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
-  vp9_fdct8x8_c(in, out, stride);
+  vpx_fdct8x8_c(in, out, stride);
 }

 void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
@@ -91,11 +92,11 @@ void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_64_add_c(in, out, stride, 10);
+  vpx_highbd_idct8x8_64_add_c(in, out, stride, 10);
 }

 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_64_add_c(in, out, stride, 12);
+  vpx_highbd_idct8x8_64_add_c(in, out, stride, 12);
 }

 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
@@ -107,28 +108,28 @@ void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 }

 void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_10_add_c(in, out, stride, 10);
+  vpx_highbd_idct8x8_10_add_c(in, out, stride, 10);
 }

 void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_10_add_c(in, out, stride, 12);
+  vpx_highbd_idct8x8_10_add_c(in, out, stride, 12);
 }

 #if HAVE_SSE2
 void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
 }

 void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
 }

 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
 }

 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
 }
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -657,14 +658,14 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8),
-        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
+        make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
+        make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
@@ -684,8 +685,6 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
 #else
-// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
-// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8HT,
    ::testing::Values(
@@ -696,12 +695,10 @@ INSTANTIATE_TEST_CASE_P(
 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
-// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    NEON, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_neon, &vp9_idct8x8_64_add_neon, 0,
+        make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 0,
                   VPX_BITS_8)));
 #endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -716,12 +713,10 @@ INSTANTIATE_TEST_CASE_P(
 #endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-// TODO(jingning): re-enable after these handle the expanded range [0, 65535]
-// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0,
+        make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 0,
                   VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8HT,
@@ -736,18 +731,16 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8),
-        make_tuple(&vp9_highbd_fdct8x8_c,
+        make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
+        make_tuple(&vpx_highbd_fdct8x8_c,
                   &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct8x8_sse2,
+        make_tuple(&vpx_highbd_fdct8x8_sse2,
                   &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct8x8_c,
+        make_tuple(&vpx_highbd_fdct8x8_c,
                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct8x8_sse2,
+        make_tuple(&vpx_highbd_fdct8x8_sse2,
                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));

-// TODO(jingning): re-enable after these handle the expanded range [0, 65535]
-// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8HT,
    ::testing::Values(
@@ -771,14 +764,12 @@ INSTANTIATE_TEST_CASE_P(
                   &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

-#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
-    !CONFIG_EMULATE_HARDWARE
-// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
-// returned from Rand16().
+#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \
+    !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSSE3, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0,
+        make_tuple(&vpx_fdct8x8_ssse3, &vpx_idct8x8_64_add_ssse3, 0,
                   VPX_BITS_8)));
 #endif

@@ -786,7 +777,7 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    MSA, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_msa, &vp9_idct8x8_64_add_msa, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct8x8_msa, &vpx_idct8x8_64_add_msa, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    MSA, FwdTrans8x8HT,
    ::testing::Values(
--- a/test/frame_size_tests.cc
+++ b/test/frame_size_tests.cc
@@ -74,7 +74,7 @@ TEST_F(VP9FrameSizeTestsLarge, ValidSizes) {
  // size or almost 1 gig of memory.
  // In total the allocations will exceed 2GiB which may cause a failure with
  // mingw + wine, use a smaller size in that case.
-#if defined(_WIN32) && !defined(_WIN64)
+#if defined(_WIN32) && !defined(_WIN64) || defined(__OS2__)
  video.SetSize(4096, 3072);
 #else
  video.SetSize(4096, 4096);
--- a/test/idct8x8_test.cc
+++ b/test/idct8x8_test.cc
@@ -14,8 +14,7 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "./vp9_rtcd.h"
-
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "vpx/vpx_integer.h"

@@ -68,43 +67,6 @@ void reference_dct_2d(int16_t input[64], double output[64]) {
    output[i] *= 2;
 }

-void reference_idct_1d(double input[8], double output[8]) {
-  const double kPi = 3.141592653589793238462643383279502884;
-  const double kSqrt2 = 1.414213562373095048801688724209698;
-  for (int k = 0; k < 8; k++) {
-    output[k] = 0.0;
-    for (int n = 0; n < 8; n++) {
-      output[k] += input[n]*cos(kPi*(2*k+1)*n/16.0);
-      if (n == 0)
-        output[k] = output[k]/kSqrt2;
-    }
-  }
-}
-
-void reference_idct_2d(double input[64], int16_t output[64]) {
-  double out[64], out2[64];
-  // First transform rows
-  for (int i = 0; i < 8; ++i) {
-    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j)
-      temp_in[j] = input[j + i*8];
-    reference_idct_1d(temp_in, temp_out);
-    for (int j = 0; j < 8; ++j)
-      out[j + i*8] = temp_out[j];
-  }
-  // Then transform columns
-  for (int i = 0; i < 8; ++i) {
-    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j)
-      temp_in[j] = out[j*8 + i];
-    reference_idct_1d(temp_in, temp_out);
-    for (int j = 0; j < 8; ++j)
-      out2[j*8 + i] = temp_out[j];
-  }
-  for (int i = 0; i < 64; ++i)
-    output[i] = round(out2[i]/32);
-}
-
 TEST(VP9Idct8x8Test, AccuracyCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = 10000;
@@ -125,7 +87,7 @@ TEST(VP9Idct8x8Test, AccuracyCheck) {
    reference_dct_2d(input, output_r);
    for (int j = 0; j < 64; ++j)
      coeff[j] = round(output_r[j]);
-    vp9_idct8x8_64_add_c(coeff, dst, 8);
+    vpx_idct8x8_64_add_c(coeff, dst, 8);
    for (int j = 0; j < 64; ++j) {
      const int diff = dst[j] - src[j];
      const int error = diff * diff;
--- a/test/idct_test.cc
+++ b/test/idct_test.cc
@@ -10,10 +10,11 @@

 #include "./vpx_config.h"
 #include "./vp8_rtcd.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
+
 #include "third_party/googletest/src/include/gtest/gtest.h"

+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
 #include "vpx/vpx_integer.h"

 typedef void (*IdctFunc)(int16_t *input, unsigned char *pred_ptr,
@@ -113,4 +114,8 @@ INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c));
 INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
                        ::testing::Values(vp8_short_idct4x4llm_mmx));
 #endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, IDCTTest,
+                        ::testing::Values(vp8_short_idct4x4llm_msa));
+#endif
 }
--- a/test/intrapred_test.cc
+++ b/test/intrapred_test.cc
@@ -8,15 +8,15 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-
 #include <string.h>
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
+
 #include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vpx_config.h"
 #include "./vp8_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
 #include "vp8/common/blockd.h"
 #include "vpx_mem/vpx_mem.h"

@@ -299,6 +299,11 @@ INSTANTIATE_TEST_CASE_P(NEON, IntraPredYTest,
                        ::testing::Values(
                            vp8_build_intra_predictors_mby_s_neon));
 #endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, IntraPredYTest,
+                        ::testing::Values(
+                            vp8_build_intra_predictors_mby_s_msa));
+#endif

 typedef void (*IntraPredUvFunc)(MACROBLOCKD *x,
                                uint8_t *uabove_row,
@@ -392,5 +397,10 @@ INSTANTIATE_TEST_CASE_P(NEON, IntraPredUVTest,
                        ::testing::Values(
                            vp8_build_intra_predictors_mbuv_s_neon));
 #endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, IntraPredUVTest,
+                        ::testing::Values(
+                            vp8_build_intra_predictors_mbuv_s_msa));
+#endif

 }  // namespace
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -112,7 +112,9 @@ TEST_P(InvalidFileTest, ReturnCode) {

 const DecodeParam kVP9InvalidFileTests[] = {
  {1, "invalid-vp90-02-v2.webm"},
+#if CONFIG_VP9_HIGHBITDEPTH
  {1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf"},
+#endif
  {1, "invalid-vp90-03-v3.webm"},
  {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf"},
  {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf"},
@@ -143,7 +145,7 @@ TEST_P(InvalidFileInvalidPeekTest, ReturnCode) {
 }

 const DecodeParam kVP9InvalidFileInvalidPeekTests[] = {
-  {1, "invalid-vp90-01-v2.webm"},
+  {1, "invalid-vp90-01-v3.webm"},
 };

 VP9_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -13,13 +13,13 @@
 #include <string>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_loopfilter.h"
 #include "vpx/vpx_integer.h"
@@ -60,49 +60,49 @@ typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
 void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
                              const uint8_t *limit, const uint8_t *thresh,
                              int count, int bd) {
-  vp9_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
+  vpx_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
 }

 void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
                           const uint8_t *limit, const uint8_t *thresh,
                           int count, int bd) {
-  vp9_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
+  vpx_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
 }

 void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit,
                                   const uint8_t *limit, const uint8_t *thresh,
                                   int count, int bd) {
-  vp9_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
+  vpx_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
 }

 void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit,
                                const uint8_t *limit, const uint8_t *thresh,
                                int count, int bd) {
-  vp9_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
+  vpx_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
 }
 #else
 void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit,
                              const uint8_t *limit, const uint8_t *thresh,
                              int count) {
-  vp9_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
 }

 void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
                           const uint8_t *limit, const uint8_t *thresh,
                           int count) {
-  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
 }

 void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit,
                                   const uint8_t *limit, const uint8_t *thresh,
                                   int count) {
-  vp9_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
 }

 void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
                                const uint8_t *limit, const uint8_t *thresh,
                                int count) {
-  vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_SSE2
@@ -114,25 +114,25 @@ void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
 void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
                              const uint8_t *limit, const uint8_t *thresh,
                              int count) {
-  vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
 }

 void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
                           const uint8_t *limit, const uint8_t *thresh,
                           int count) {
-  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
 }

 void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
                                   const uint8_t *limit, const uint8_t *thresh,
                                   int count) {
-  vp9_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
 }

 void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
                                const uint8_t *limit, const uint8_t *thresh,
                                int count) {
-  vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_NEON_ASM
@@ -141,13 +141,13 @@ void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
 void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,
                             const uint8_t *limit, const uint8_t *thresh,
                             int count) {
-  vp9_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
 }

 void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
                           const uint8_t *limit, const uint8_t *thresh,
                           int count) {
-  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
 }
 #endif  // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)

@@ -534,46 +534,46 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    SSE2, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, 8, 1),
-        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, 8, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, 8, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 8, 2),
-        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, 8, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+                   &vpx_highbd_lpf_horizontal_4_c, 8, 1),
+        make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+                   &vpx_highbd_lpf_vertical_4_c, 8, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+                   &vpx_highbd_lpf_horizontal_8_c, 8, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                   &vpx_highbd_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                   &vpx_highbd_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+                   &vpx_highbd_lpf_vertical_8_c, 8, 1),
        make_tuple(&wrapper_vertical_16_sse2,
                   &wrapper_vertical_16_c, 8, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, 10, 1),
-        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, 10, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, 10, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 10, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 10, 2),
-        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, 10, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+                   &vpx_highbd_lpf_horizontal_4_c, 10, 1),
+        make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+                   &vpx_highbd_lpf_vertical_4_c, 10, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+                   &vpx_highbd_lpf_horizontal_8_c, 10, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                   &vpx_highbd_lpf_horizontal_16_c, 10, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                   &vpx_highbd_lpf_horizontal_16_c, 10, 2),
+        make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+                   &vpx_highbd_lpf_vertical_8_c, 10, 1),
        make_tuple(&wrapper_vertical_16_sse2,
                   &wrapper_vertical_16_c, 10, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, 12, 1),
-        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, 12, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, 12, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 12, 1),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 12, 2),
-        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, 12, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+                   &vpx_highbd_lpf_horizontal_4_c, 12, 1),
+        make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+                   &vpx_highbd_lpf_vertical_4_c, 12, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+                   &vpx_highbd_lpf_horizontal_8_c, 12, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                   &vpx_highbd_lpf_horizontal_16_c, 12, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                   &vpx_highbd_lpf_horizontal_16_c, 12, 2),
+        make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+                   &vpx_highbd_lpf_vertical_8_c, 12, 1),
        make_tuple(&wrapper_vertical_16_sse2,
                   &wrapper_vertical_16_c, 12, 1),
        make_tuple(&wrapper_vertical_16_dual_sse2,
@@ -586,11 +586,13 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    SSE2, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 2),
-        make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8, 1),
-        make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1)));
+        make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1),
+        make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1),
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, 8, 1)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif

@@ -598,8 +600,8 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    AVX2, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8,
+        make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8,
                   2)));
 #endif

@@ -608,42 +610,42 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    SSE2, Loop8Test9Param,
    ::testing::Values(
-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
-                   &vp9_highbd_lpf_vertical_4_dual_c, 8),
-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
-                   &vp9_highbd_lpf_vertical_8_dual_c, 8),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_4_dual_c, 10),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_8_dual_c, 10),
-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
-                   &vp9_highbd_lpf_vertical_4_dual_c, 10),
-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
-                   &vp9_highbd_lpf_vertical_8_dual_c, 10),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_4_dual_c, 12),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_8_dual_c, 12),
-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
-                   &vp9_highbd_lpf_vertical_4_dual_c, 12),
-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
-                   &vp9_highbd_lpf_vertical_8_dual_c, 12)));
+        make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+                   &vpx_highbd_lpf_horizontal_4_dual_c, 8),
+        make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+                   &vpx_highbd_lpf_horizontal_8_dual_c, 8),
+        make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+                   &vpx_highbd_lpf_vertical_4_dual_c, 8),
+        make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+                   &vpx_highbd_lpf_vertical_8_dual_c, 8),
+        make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+                   &vpx_highbd_lpf_horizontal_4_dual_c, 10),
+        make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+                   &vpx_highbd_lpf_horizontal_8_dual_c, 10),
+        make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+                   &vpx_highbd_lpf_vertical_4_dual_c, 10),
+        make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+                   &vpx_highbd_lpf_vertical_8_dual_c, 10),
+        make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+                   &vpx_highbd_lpf_horizontal_4_dual_c, 12),
+        make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+                   &vpx_highbd_lpf_horizontal_8_dual_c, 12),
+        make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+                   &vpx_highbd_lpf_vertical_4_dual_c, 12),
+        make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+                   &vpx_highbd_lpf_vertical_8_dual_c, 12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    SSE2, Loop8Test9Param,
    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_4_dual_sse2,
-                   &vp9_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vp9_lpf_horizontal_8_dual_sse2,
-                   &vp9_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_4_dual_sse2,
-                   &vp9_lpf_vertical_4_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_8_dual_sse2,
-                   &vp9_lpf_vertical_8_dual_c, 8)));
+        make_tuple(&vpx_lpf_horizontal_4_dual_sse2,
+                   &vpx_lpf_horizontal_4_dual_c, 8),
+        make_tuple(&vpx_lpf_horizontal_8_dual_sse2,
+                   &vpx_lpf_horizontal_8_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_4_dual_sse2,
+                   &vpx_lpf_vertical_4_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_8_dual_sse2,
+                   &vpx_lpf_vertical_8_dual_c, 8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif

@@ -657,36 +659,36 @@ INSTANTIATE_TEST_CASE_P(
 #if HAVE_NEON_ASM
 // Using #if inside the macro is unsupported on MSVS but the tests are not
 // currently built for MSVS with ARM and NEON.
-        make_tuple(&vp9_lpf_horizontal_16_neon,
-                   &vp9_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_16_neon,
-                   &vp9_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&vpx_lpf_horizontal_16_neon,
+                   &vpx_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_16_neon,
+                   &vpx_lpf_horizontal_16_c, 8, 2),
        make_tuple(&wrapper_vertical_16_neon,
                   &wrapper_vertical_16_c, 8, 1),
        make_tuple(&wrapper_vertical_16_dual_neon,
                   &wrapper_vertical_16_dual_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_8_neon,
-                   &vp9_lpf_horizontal_8_c, 8, 1),
-        make_tuple(&vp9_lpf_vertical_8_neon,
-                   &vp9_lpf_vertical_8_c, 8, 1),
 #endif  // HAVE_NEON_ASM
-        make_tuple(&vp9_lpf_horizontal_4_neon,
-                   &vp9_lpf_horizontal_4_c, 8, 1),
-        make_tuple(&vp9_lpf_vertical_4_neon,
-                   &vp9_lpf_vertical_4_c, 8, 1)));
+        make_tuple(&vpx_lpf_horizontal_8_neon,
+                   &vpx_lpf_horizontal_8_c, 8, 1),
+        make_tuple(&vpx_lpf_vertical_8_neon,
+                   &vpx_lpf_vertical_8_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_4_neon,
+                   &vpx_lpf_horizontal_4_c, 8, 1),
+        make_tuple(&vpx_lpf_vertical_4_neon,
+                   &vpx_lpf_vertical_4_c, 8, 1)));
 INSTANTIATE_TEST_CASE_P(
    NEON, Loop8Test9Param,
    ::testing::Values(
 #if HAVE_NEON_ASM
-        make_tuple(&vp9_lpf_horizontal_8_dual_neon,
-                   &vp9_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_8_dual_neon,
-                   &vp9_lpf_vertical_8_dual_c, 8),
+        make_tuple(&vpx_lpf_horizontal_8_dual_neon,
+                   &vpx_lpf_horizontal_8_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_8_dual_neon,
+                   &vpx_lpf_vertical_8_dual_c, 8),
 #endif  // HAVE_NEON_ASM
-        make_tuple(&vp9_lpf_horizontal_4_dual_neon,
-                   &vp9_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_4_dual_neon,
-                   &vp9_lpf_vertical_4_dual_c, 8)));
+        make_tuple(&vpx_lpf_horizontal_4_dual_neon,
+                   &vpx_lpf_horizontal_4_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_4_dual_neon,
+                   &vpx_lpf_vertical_4_dual_c, 8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_NEON

@@ -694,23 +696,23 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    MSA, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_8_msa, &vp9_lpf_horizontal_8_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 2),
-        make_tuple(&vp9_lpf_vertical_8_msa, &vp9_lpf_vertical_8_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1),
        make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));

 INSTANTIATE_TEST_CASE_P(
    MSA, Loop8Test9Param,
    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_4_dual_msa,
-                   &vp9_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vp9_lpf_horizontal_8_dual_msa,
-                   &vp9_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_4_dual_msa,
-                   &vp9_lpf_vertical_4_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_8_dual_msa,
-                   &vp9_lpf_vertical_8_dual_c, 8)));
+        make_tuple(&vpx_lpf_horizontal_4_dual_msa,
+                   &vpx_lpf_horizontal_4_dual_c, 8),
+        make_tuple(&vpx_lpf_horizontal_8_dual_msa,
+                   &vpx_lpf_horizontal_8_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_4_dual_msa,
+                   &vpx_lpf_vertical_4_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_8_dual_msa,
+                   &vpx_lpf_vertical_8_dual_c, 8)));
 #endif  // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)

 }  // namespace
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -13,12 +13,13 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_integer.h"
@@ -201,62 +202,62 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_34_add_c,
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_34_add_c,
                   TX_32X32, 34),
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_1_add_c,
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_1_add_c,
                   TX_32X32, 1),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_c,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_10_add_c,
                   TX_16X16, 10),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_1_add_c,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_1_add_c,
                   TX_16X16, 1),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_c,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_12_add_c,
                   TX_8X8, 12),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_1_add_c,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_1_add_c,
                   TX_8X8, 1),
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_c,
-                   &vp9_idct4x4_1_add_c,
+        make_tuple(&vpx_fdct4x4_c,
+                   &vpx_idct4x4_16_add_c,
+                   &vpx_idct4x4_1_add_c,
                   TX_4X4, 1)));

 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_1_add_neon,
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_1_add_neon,
                   TX_32X32, 1),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_neon,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_10_add_neon,
                   TX_16X16, 10),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_1_add_neon,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_1_add_neon,
                   TX_16X16, 1),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_neon,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_12_add_neon,
                   TX_8X8, 12),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_1_add_neon,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_1_add_neon,
                   TX_8X8, 1),
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_c,
-                   &vp9_idct4x4_1_add_neon,
+        make_tuple(&vpx_fdct4x4_c,
+                   &vpx_idct4x4_16_add_c,
+                   &vpx_idct4x4_1_add_neon,
                   TX_4X4, 1)));
 #endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

@@ -264,44 +265,44 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    SSE2, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_34_add_sse2,
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_34_add_sse2,
                   TX_32X32, 34),
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_1_add_sse2,
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_1_add_sse2,
                   TX_32X32, 1),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_sse2,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_10_add_sse2,
                   TX_16X16, 10),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_1_add_sse2,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_1_add_sse2,
                   TX_16X16, 1),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_sse2,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_12_add_sse2,
                   TX_8X8, 12),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_1_add_sse2,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_1_add_sse2,
                   TX_8X8, 1),
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_c,
-                   &vp9_idct4x4_1_add_sse2,
+        make_tuple(&vpx_fdct4x4_c,
+                   &vpx_idct4x4_16_add_c,
+                   &vpx_idct4x4_1_add_sse2,
                   TX_4X4, 1)));
 #endif

-#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
-    !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \
+    !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSSE3_64, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_ssse3,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_12_add_ssse3,
                   TX_8X8, 12)));
 #endif

@@ -309,33 +310,33 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    MSA, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_34_add_msa,
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_34_add_msa,
                   TX_32X32, 34),
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_1_add_msa,
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_1_add_msa,
                   TX_32X32, 1),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_msa,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_10_add_msa,
                   TX_16X16, 10),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_1_add_msa,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_1_add_msa,
                   TX_16X16, 1),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_msa,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_12_add_msa,
                   TX_8X8, 10),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_1_add_msa,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_1_add_msa,
                   TX_8X8, 1),
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_c,
-                   &vp9_idct4x4_1_add_msa,
+        make_tuple(&vpx_fdct4x4_c,
+                   &vpx_idct4x4_16_add_c,
+                   &vpx_idct4x4_1_add_msa,
                   TX_4X4, 1)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -110,4 +110,9 @@ INSTANTIATE_TEST_CASE_P(SSE2, VP8PostProcessingFilterTest,
    ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2));
 #endif

+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, VP8PostProcessingFilterTest,
+    ::testing::Values(vp8_post_proc_down_and_across_mb_row_msa));
+#endif
+
 }  // namespace
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@@ -11,13 +11,13 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./vp8_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
 #include "vp8/common/blockd.h"
 #include "vp8/common/onyx.h"
 #include "vp8/encoder/block.h"
@@ -192,4 +192,12 @@ INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest,
                        ::testing::Values(make_tuple(&vp8_fast_quantize_b_neon,
                                                     &vp8_fast_quantize_b_c)));
 #endif  // HAVE_NEON
+
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, QuantizeTest,
+    ::testing::Values(
+        make_tuple(&vp8_fast_quantize_b_msa, &vp8_fast_quantize_b_c),
+        make_tuple(&vp8_regular_quantize_b_msa, &vp8_regular_quantize_b_c)));
+#endif  // HAVE_MSA
 }  // namespace
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -96,7 +96,7 @@ class RegisterStateCheck {

 extern "C" {
 // Save the d8-d15 registers into store.
-void vp9_push_neon(int64_t *store);
+void vpx_push_neon(int64_t *store);
 }

 namespace libvpx_test {
@@ -111,7 +111,7 @@ class RegisterStateCheck {

 private:
  static bool StoreRegisters(int64_t store[8]) {
-    vp9_push_neon(store);
+    vpx_push_neon(store);
    return true;
  }

@@ -119,7 +119,7 @@ class RegisterStateCheck {
  bool Check() const {
    if (!initialized_) return false;
    int64_t post_store[8];
-    vp9_push_neon(post_store);
+    vpx_push_neon(post_store);
    for (int i = 0; i < 8; ++i) {
      EXPECT_EQ(pre_store_[i], post_store[i]) << "d"
          << i + 8 << " has been modified";
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -13,18 +13,17 @@
 #include <limits.h>
 #include <stdio.h>

+#include "third_party/googletest/src/include/gtest/gtest.h"
+
 #include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_ports/mem.h"
-
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
 #include "vpx/vpx_codec.h"
-
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"

 typedef unsigned int (*SadMxNFunc)(const uint8_t *src_ptr,
                                   int src_stride,
@@ -1114,4 +1113,98 @@ const SadMxNx4Param x4d_avx2_tests[] = {
 INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
 #endif  // HAVE_AVX2

+//------------------------------------------------------------------------------
+// MIPS functions
+#if HAVE_MSA
+const SadMxNFunc sad64x64_msa = vpx_sad64x64_msa;
+const SadMxNFunc sad64x32_msa = vpx_sad64x32_msa;
+const SadMxNFunc sad32x64_msa = vpx_sad32x64_msa;
+const SadMxNFunc sad32x32_msa = vpx_sad32x32_msa;
+const SadMxNFunc sad32x16_msa = vpx_sad32x16_msa;
+const SadMxNFunc sad16x32_msa = vpx_sad16x32_msa;
+const SadMxNFunc sad16x16_msa = vpx_sad16x16_msa;
+const SadMxNFunc sad16x8_msa = vpx_sad16x8_msa;
+const SadMxNFunc sad8x16_msa = vpx_sad8x16_msa;
+const SadMxNFunc sad8x8_msa = vpx_sad8x8_msa;
+const SadMxNFunc sad8x4_msa = vpx_sad8x4_msa;
+const SadMxNFunc sad4x8_msa = vpx_sad4x8_msa;
+const SadMxNFunc sad4x4_msa = vpx_sad4x4_msa;
+const SadMxNParam msa_tests[] = {
+  make_tuple(64, 64, sad64x64_msa, -1),
+  make_tuple(64, 32, sad64x32_msa, -1),
+  make_tuple(32, 64, sad32x64_msa, -1),
+  make_tuple(32, 32, sad32x32_msa, -1),
+  make_tuple(32, 16, sad32x16_msa, -1),
+  make_tuple(16, 32, sad16x32_msa, -1),
+  make_tuple(16, 16, sad16x16_msa, -1),
+  make_tuple(16, 8, sad16x8_msa, -1),
+  make_tuple(8, 16, sad8x16_msa, -1),
+  make_tuple(8, 8, sad8x8_msa, -1),
+  make_tuple(8, 4, sad8x4_msa, -1),
+  make_tuple(4, 8, sad4x8_msa, -1),
+  make_tuple(4, 4, sad4x4_msa, -1),
+};
+INSTANTIATE_TEST_CASE_P(MSA, SADTest, ::testing::ValuesIn(msa_tests));
+
+const SadMxNAvgFunc sad64x64_avg_msa = vpx_sad64x64_avg_msa;
+const SadMxNAvgFunc sad64x32_avg_msa = vpx_sad64x32_avg_msa;
+const SadMxNAvgFunc sad32x64_avg_msa = vpx_sad32x64_avg_msa;
+const SadMxNAvgFunc sad32x32_avg_msa = vpx_sad32x32_avg_msa;
+const SadMxNAvgFunc sad32x16_avg_msa = vpx_sad32x16_avg_msa;
+const SadMxNAvgFunc sad16x32_avg_msa = vpx_sad16x32_avg_msa;
+const SadMxNAvgFunc sad16x16_avg_msa = vpx_sad16x16_avg_msa;
+const SadMxNAvgFunc sad16x8_avg_msa = vpx_sad16x8_avg_msa;
+const SadMxNAvgFunc sad8x16_avg_msa = vpx_sad8x16_avg_msa;
+const SadMxNAvgFunc sad8x8_avg_msa = vpx_sad8x8_avg_msa;
+const SadMxNAvgFunc sad8x4_avg_msa = vpx_sad8x4_avg_msa;
+const SadMxNAvgFunc sad4x8_avg_msa = vpx_sad4x8_avg_msa;
+const SadMxNAvgFunc sad4x4_avg_msa = vpx_sad4x4_avg_msa;
+const SadMxNAvgParam avg_msa_tests[] = {
+  make_tuple(64, 64, sad64x64_avg_msa, -1),
+  make_tuple(64, 32, sad64x32_avg_msa, -1),
+  make_tuple(32, 64, sad32x64_avg_msa, -1),
+  make_tuple(32, 32, sad32x32_avg_msa, -1),
+  make_tuple(32, 16, sad32x16_avg_msa, -1),
+  make_tuple(16, 32, sad16x32_avg_msa, -1),
+  make_tuple(16, 16, sad16x16_avg_msa, -1),
+  make_tuple(16, 8, sad16x8_avg_msa, -1),
+  make_tuple(8, 16, sad8x16_avg_msa, -1),
+  make_tuple(8, 8, sad8x8_avg_msa, -1),
+  make_tuple(8, 4, sad8x4_avg_msa, -1),
+  make_tuple(4, 8, sad4x8_avg_msa, -1),
+  make_tuple(4, 4, sad4x4_avg_msa, -1),
+};
+INSTANTIATE_TEST_CASE_P(MSA, SADavgTest, ::testing::ValuesIn(avg_msa_tests));
+
+const SadMxNx4Func sad64x64x4d_msa = vpx_sad64x64x4d_msa;
+const SadMxNx4Func sad64x32x4d_msa = vpx_sad64x32x4d_msa;
+const SadMxNx4Func sad32x64x4d_msa = vpx_sad32x64x4d_msa;
+const SadMxNx4Func sad32x32x4d_msa = vpx_sad32x32x4d_msa;
+const SadMxNx4Func sad32x16x4d_msa = vpx_sad32x16x4d_msa;
+const SadMxNx4Func sad16x32x4d_msa = vpx_sad16x32x4d_msa;
+const SadMxNx4Func sad16x16x4d_msa = vpx_sad16x16x4d_msa;
+const SadMxNx4Func sad16x8x4d_msa = vpx_sad16x8x4d_msa;
+const SadMxNx4Func sad8x16x4d_msa = vpx_sad8x16x4d_msa;
+const SadMxNx4Func sad8x8x4d_msa = vpx_sad8x8x4d_msa;
+const SadMxNx4Func sad8x4x4d_msa = vpx_sad8x4x4d_msa;
+const SadMxNx4Func sad4x8x4d_msa = vpx_sad4x8x4d_msa;
+const SadMxNx4Func sad4x4x4d_msa = vpx_sad4x4x4d_msa;
+const SadMxNx4Param x4d_msa_tests[] = {
+  make_tuple(64, 64, sad64x64x4d_msa, -1),
+  make_tuple(64, 32, sad64x32x4d_msa, -1),
+  make_tuple(32, 64, sad32x64x4d_msa, -1),
+  make_tuple(32, 32, sad32x32x4d_msa, -1),
+  make_tuple(32, 16, sad32x16x4d_msa, -1),
+  make_tuple(16, 32, sad16x32x4d_msa, -1),
+  make_tuple(16, 16, sad16x16x4d_msa, -1),
+  make_tuple(16, 8, sad16x8x4d_msa, -1),
+  make_tuple(8, 16, sad8x16x4d_msa, -1),
+  make_tuple(8, 8, sad8x8x4d_msa, -1),
+  make_tuple(8, 4, sad8x4x4d_msa, -1),
+  make_tuple(4, 8, sad4x8x4d_msa, -1),
+  make_tuple(4, 4, sad4x4x4d_msa, -1),
+};
+INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests));
+#endif  // HAVE_MSA
+
 }  // namespace
--- a/test/sixtap_predict_test.cc
+++ b/test/sixtap_predict_test.cc
@@ -11,13 +11,15 @@
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./vp8_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"

@@ -238,4 +240,16 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(8, 4, sixtap_8x4_ssse3),
        make_tuple(4, 4, sixtap_4x4_ssse3)));
 #endif
+#if HAVE_MSA
+const SixtapPredictFunc sixtap_16x16_msa = vp8_sixtap_predict16x16_msa;
+const SixtapPredictFunc sixtap_8x8_msa = vp8_sixtap_predict8x8_msa;
+const SixtapPredictFunc sixtap_8x4_msa = vp8_sixtap_predict8x4_msa;
+const SixtapPredictFunc sixtap_4x4_msa = vp8_sixtap_predict4x4_msa;
+INSTANTIATE_TEST_CASE_P(
+    MSA, SixtapPredictTest, ::testing::Values(
+        make_tuple(16, 16, sixtap_16x16_msa),
+        make_tuple(8, 8, sixtap_8x8_msa),
+        make_tuple(8, 4, sixtap_8x4_msa),
+        make_tuple(4, 4, sixtap_4x4_msa)));
+#endif
 }  // namespace
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -1,123 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
-#include "vp8/common/blockd.h"
-#include "vp8/encoder/block.h"
-#include "vpx_mem/vpx_mem.h"
-
-typedef void (*SubtractBlockFunc)(BLOCK *be, BLOCKD *bd, int pitch);
-
-namespace {
-
-class SubtractBlockTest : public ::testing::TestWithParam<SubtractBlockFunc> {
- public:
-  virtual void TearDown() {
-    libvpx_test::ClearSystemState();
-  }
-};
-
-using libvpx_test::ACMRandom;
-
-TEST_P(SubtractBlockTest, SimpleSubtract) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  BLOCK be;
-  BLOCKD bd;
-  // in libvpx, this stride is always 16
-  const int kDiffPredStride = 16;
-  const int kSrcStride[] = {32, 16, 8, 4, 0};
-  const int kBlockWidth = 4;
-  const int kBlockHeight = 4;
-
-  // Allocate... align to 16 for mmx/sse tests
-  uint8_t *source = reinterpret_cast<uint8_t*>(
-      vpx_memalign(16, kBlockHeight * kSrcStride[0] * sizeof(*source)));
-  be.src_diff = reinterpret_cast<int16_t*>(
-      vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*be.src_diff)));
-  bd.predictor = reinterpret_cast<unsigned char*>(
-      vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));
-
-  for (int i = 0; kSrcStride[i] > 0; ++i) {
-    // start at block0
-    be.src = 0;
-    be.base_src = &source;
-    be.src_stride = kSrcStride[i];
-
-    // set difference
-    int16_t *src_diff = be.src_diff;
-    for (int r = 0; r < kBlockHeight; ++r) {
-      for (int c = 0; c < kBlockWidth; ++c) {
-        src_diff[c] = static_cast<int16_t>(0xa5a5u);
-      }
-      src_diff += kDiffPredStride;
-    }
-
-    // set destination
-    uint8_t *base_src = *be.base_src;
-    for (int r = 0; r < kBlockHeight; ++r) {
-      for (int c = 0; c < kBlockWidth; ++c) {
-        base_src[c] = rnd.Rand8();
-      }
-      base_src += be.src_stride;
-    }
-
-    // set predictor
-    uint8_t *predictor = bd.predictor;
-    for (int r = 0; r < kBlockHeight; ++r) {
-      for (int c = 0; c < kBlockWidth; ++c) {
-        predictor[c] = rnd.Rand8();
-      }
-      predictor += kDiffPredStride;
-    }
-
-    ASM_REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride));
-
-    base_src = *be.base_src;
-    src_diff = be.src_diff;
-    predictor = bd.predictor;
-    for (int r = 0; r < kBlockHeight; ++r) {
-      for (int c = 0; c < kBlockWidth; ++c) {
-        EXPECT_EQ(base_src[c], (src_diff[c] + predictor[c])) << "r = " << r
-                                                             << ", c = " << c;
-      }
-      src_diff += kDiffPredStride;
-      predictor += kDiffPredStride;
-      base_src += be.src_stride;
-    }
-  }
-  vpx_free(be.src_diff);
-  vpx_free(source);
-  vpx_free(bd.predictor);
-}
-
-INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest,
-                        ::testing::Values(vp8_subtract_b_c));
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, SubtractBlockTest,
-                        ::testing::Values(vp8_subtract_b_neon));
-#endif
-
-#if HAVE_MMX
-INSTANTIATE_TEST_CASE_P(MMX, SubtractBlockTest,
-                        ::testing::Values(vp8_subtract_b_mmx));
-#endif
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, SubtractBlockTest,
-                        ::testing::Values(vp8_subtract_b_sse2));
-#endif
-
-}  // namespace
--- a/test/superframe_test.cc
+++ b/test/superframe_test.cc
@@ -94,4 +94,7 @@ TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) {

 VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Values(
    ::libvpx_test::kTwoPassGood));
+
+VP10_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Values(
+    ::libvpx_test::kTwoPassGood));
 }  // namespace
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -687,8 +687,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm.md5
 endif  # CONFIG_VP9_HIGHBITDEPTH

 # Invalid files for testing libvpx error checking.
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -6,8 +6,8 @@ b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv
 456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
 c123d1f9f02fb4143abb5e271916e3a3080de8f6 *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
 456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
-fe346136b9b8c1e6f6084cc106485706915795e4 *invalid-vp90-01-v2.webm
-25751f5d3b05ff03f0719ad42cd625348eb8961e *invalid-vp90-01-v2.webm.res
+fe346136b9b8c1e6f6084cc106485706915795e4 *invalid-vp90-01-v3.webm
+5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-vp90-01-v3.webm.res
 d78e2fceba5ac942246503ec8366f879c4775ca5 *invalid-vp90-02-v2.webm
 8e2eff4af87d2b561cce2365713269e301457ef3 *invalid-vp90-02-v2.webm.res
 df1a1453feb3c00d7d89746c7003b4163523bff3 *invalid-vp90-03-v3.webm
--- a/test/test.mk
+++ b/test/test.mk
@@ -91,6 +91,7 @@ endif
 ## shared library builds don't make these functions accessible.
 ##
 ifeq ($(CONFIG_SHARED),)
+LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += lpf_8_test.cc

 ## VP8
 ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
@@ -104,7 +105,6 @@ endif
 LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC)    += pp_filter_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += vp8_decrypt_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc
@@ -143,7 +143,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
@@ -159,13 +158,14 @@ endif
 ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes)
 LIBVPX_TEST_SRCS-$(HAVE_SSE2) += vp9_denoiser_sse2_test.cc
 endif
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_arf_freq_test.cc

 endif # VP9

 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += sad_test.cc

-TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9_DECODER) := test_intra_pred_speed.cc
-TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9_DECODER) += ../md5_utils.h ../md5_utils.c
+TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) := test_intra_pred_speed.cc
+TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) += ../md5_utils.h ../md5_utils.c

 endif # CONFIG_SHARED

--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -7,14 +7,14 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
-//  Test and time VP9 intra-predictor functions
+//  Test and time VPX intra-predictor functions

 #include <stdio.h>
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/md5_helper.h"
@@ -170,215 +170,215 @@ void TestIntraPred32(VpxPredFunc const *pred_funcs) {
 #define INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128, v, h, \
                        d45, d135, d117, d153, d207, d63, tm)               \
  TEST(arch, test_func) {                                                   \
-    static const VpxPredFunc vp9_intra_pred[] = {                           \
+    static const VpxPredFunc vpx_intra_pred[] = {                           \
        dc,   dc_left, dc_top, dc_128, v,   h, d45,                         \
        d135, d117,    d153,   d207,   d63, tm};                            \
-    test_func(vp9_intra_pred);                                              \
+    test_func(vpx_intra_pred);                                              \
  }

 // -----------------------------------------------------------------------------
 // 4x4

-INTRA_PRED_TEST(C, TestIntraPred4, vp9_dc_predictor_4x4_c,
-                vp9_dc_left_predictor_4x4_c, vp9_dc_top_predictor_4x4_c,
-                vp9_dc_128_predictor_4x4_c, vp9_v_predictor_4x4_c,
-                vp9_h_predictor_4x4_c, vp9_d45_predictor_4x4_c,
-                vp9_d135_predictor_4x4_c, vp9_d117_predictor_4x4_c,
-                vp9_d153_predictor_4x4_c, vp9_d207_predictor_4x4_c,
-                vp9_d63_predictor_4x4_c, vp9_tm_predictor_4x4_c)
+INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c,
+                vpx_dc_left_predictor_4x4_c, vpx_dc_top_predictor_4x4_c,
+                vpx_dc_128_predictor_4x4_c, vpx_v_predictor_4x4_c,
+                vpx_h_predictor_4x4_c, vpx_d45_predictor_4x4_c,
+                vpx_d135_predictor_4x4_c, vpx_d117_predictor_4x4_c,
+                vpx_d153_predictor_4x4_c, vpx_d207_predictor_4x4_c,
+                vpx_d63_predictor_4x4_c, vpx_tm_predictor_4x4_c)

-#if HAVE_SSE
-INTRA_PRED_TEST(SSE, TestIntraPred4, vp9_dc_predictor_4x4_sse,
-                vp9_dc_left_predictor_4x4_sse, vp9_dc_top_predictor_4x4_sse,
-                vp9_dc_128_predictor_4x4_sse, vp9_v_predictor_4x4_sse, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_4x4_sse)
-#endif  // HAVE_SSE
+#if HAVE_SSE && CONFIG_USE_X86INC
+INTRA_PRED_TEST(SSE, TestIntraPred4, vpx_dc_predictor_4x4_sse,
+                vpx_dc_left_predictor_4x4_sse, vpx_dc_top_predictor_4x4_sse,
+                vpx_dc_128_predictor_4x4_sse, vpx_v_predictor_4x4_sse, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_4x4_sse)
+#endif  // HAVE_SSE && CONFIG_USE_X86INC

-#if HAVE_SSSE3
+#if HAVE_SSSE3 && CONFIG_USE_X86INC
 INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL,
-                vp9_h_predictor_4x4_ssse3, vp9_d45_predictor_4x4_ssse3, NULL,
-                NULL, vp9_d153_predictor_4x4_ssse3,
-                vp9_d207_predictor_4x4_ssse3, vp9_d63_predictor_4x4_ssse3, NULL)
-#endif  // HAVE_SSSE3
+                vpx_h_predictor_4x4_ssse3, vpx_d45_predictor_4x4_ssse3, NULL,
+                NULL, vpx_d153_predictor_4x4_ssse3,
+                vpx_d207_predictor_4x4_ssse3, vpx_d63_predictor_4x4_ssse3, NULL)
+#endif  // HAVE_SSSE3 && CONFIG_USE_X86INC

 #if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TestIntraPred4, vp9_dc_predictor_4x4_dspr2, NULL, NULL,
-                NULL, NULL, vp9_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
-                NULL, NULL, vp9_tm_predictor_4x4_dspr2)
+INTRA_PRED_TEST(DSPR2, TestIntraPred4, vpx_dc_predictor_4x4_dspr2, NULL, NULL,
+                NULL, NULL, vpx_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
+                NULL, NULL, vpx_tm_predictor_4x4_dspr2)
 #endif  // HAVE_DSPR2

 #if HAVE_NEON
-INTRA_PRED_TEST(NEON, TestIntraPred4, vp9_dc_predictor_4x4_neon,
-                vp9_dc_left_predictor_4x4_neon, vp9_dc_top_predictor_4x4_neon,
-                vp9_dc_128_predictor_4x4_neon, vp9_v_predictor_4x4_neon,
-                vp9_h_predictor_4x4_neon, vp9_d45_predictor_4x4_neon,
-                vp9_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
-                vp9_tm_predictor_4x4_neon)
+INTRA_PRED_TEST(NEON, TestIntraPred4, vpx_dc_predictor_4x4_neon,
+                vpx_dc_left_predictor_4x4_neon, vpx_dc_top_predictor_4x4_neon,
+                vpx_dc_128_predictor_4x4_neon, vpx_v_predictor_4x4_neon,
+                vpx_h_predictor_4x4_neon, vpx_d45_predictor_4x4_neon,
+                vpx_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_4x4_neon)
 #endif  // HAVE_NEON

 #if HAVE_MSA
-INTRA_PRED_TEST(MSA, TestIntraPred4, vp9_dc_predictor_4x4_msa,
-                vp9_dc_left_predictor_4x4_msa, vp9_dc_top_predictor_4x4_msa,
-                vp9_dc_128_predictor_4x4_msa, vp9_v_predictor_4x4_msa,
-                vp9_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL,
-                NULL, vp9_tm_predictor_4x4_msa)
+INTRA_PRED_TEST(MSA, TestIntraPred4, vpx_dc_predictor_4x4_msa,
+                vpx_dc_left_predictor_4x4_msa, vpx_dc_top_predictor_4x4_msa,
+                vpx_dc_128_predictor_4x4_msa, vpx_v_predictor_4x4_msa,
+                vpx_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL,
+                NULL, vpx_tm_predictor_4x4_msa)
 #endif  // HAVE_MSA

 // -----------------------------------------------------------------------------
 // 8x8

-INTRA_PRED_TEST(C, TestIntraPred8, vp9_dc_predictor_8x8_c,
-                vp9_dc_left_predictor_8x8_c, vp9_dc_top_predictor_8x8_c,
-                vp9_dc_128_predictor_8x8_c, vp9_v_predictor_8x8_c,
-                vp9_h_predictor_8x8_c, vp9_d45_predictor_8x8_c,
-                vp9_d135_predictor_8x8_c, vp9_d117_predictor_8x8_c,
-                vp9_d153_predictor_8x8_c, vp9_d207_predictor_8x8_c,
-                vp9_d63_predictor_8x8_c, vp9_tm_predictor_8x8_c)
+INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c,
+                vpx_dc_left_predictor_8x8_c, vpx_dc_top_predictor_8x8_c,
+                vpx_dc_128_predictor_8x8_c, vpx_v_predictor_8x8_c,
+                vpx_h_predictor_8x8_c, vpx_d45_predictor_8x8_c,
+                vpx_d135_predictor_8x8_c, vpx_d117_predictor_8x8_c,
+                vpx_d153_predictor_8x8_c, vpx_d207_predictor_8x8_c,
+                vpx_d63_predictor_8x8_c, vpx_tm_predictor_8x8_c)

-#if HAVE_SSE
-INTRA_PRED_TEST(SSE, TestIntraPred8, vp9_dc_predictor_8x8_sse,
-                vp9_dc_left_predictor_8x8_sse, vp9_dc_top_predictor_8x8_sse,
-                vp9_dc_128_predictor_8x8_sse, vp9_v_predictor_8x8_sse, NULL,
+#if HAVE_SSE && CONFIG_USE_X86INC
+INTRA_PRED_TEST(SSE, TestIntraPred8, vpx_dc_predictor_8x8_sse,
+                vpx_dc_left_predictor_8x8_sse, vpx_dc_top_predictor_8x8_sse,
+                vpx_dc_128_predictor_8x8_sse, vpx_v_predictor_8x8_sse, NULL,
                NULL, NULL, NULL, NULL, NULL, NULL, NULL)
-#endif  // HAVE_SSE
+#endif  // HAVE_SSE && CONFIG_USE_X86INC

-#if HAVE_SSE2
+#if HAVE_SSE2 && CONFIG_USE_X86INC
 INTRA_PRED_TEST(SSE2, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_8x8_sse2)
-#endif  // HAVE_SSE2
+                NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2)
+#endif  // HAVE_SSE2 && CONFIG_USE_X86INC

-#if HAVE_SSSE3
+#if HAVE_SSSE3 && CONFIG_USE_X86INC
 INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL,
-                vp9_h_predictor_8x8_ssse3, vp9_d45_predictor_8x8_ssse3, NULL,
-                NULL, vp9_d153_predictor_8x8_ssse3,
-                vp9_d207_predictor_8x8_ssse3, vp9_d63_predictor_8x8_ssse3, NULL)
-#endif  // HAVE_SSSE3
+                vpx_h_predictor_8x8_ssse3, vpx_d45_predictor_8x8_ssse3, NULL,
+                NULL, vpx_d153_predictor_8x8_ssse3,
+                vpx_d207_predictor_8x8_ssse3, vpx_d63_predictor_8x8_ssse3, NULL)
+#endif  // HAVE_SSSE3 && CONFIG_USE_X86INC

 #if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TestIntraPred8, vp9_dc_predictor_8x8_dspr2, NULL, NULL,
-                NULL, NULL, vp9_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
-                NULL, NULL, vp9_tm_predictor_8x8_c)
+INTRA_PRED_TEST(DSPR2, TestIntraPred8, vpx_dc_predictor_8x8_dspr2, NULL, NULL,
+                NULL, NULL, vpx_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
+                NULL, NULL, vpx_tm_predictor_8x8_c)
 #endif  // HAVE_DSPR2

 #if HAVE_NEON
-INTRA_PRED_TEST(NEON, TestIntraPred8, vp9_dc_predictor_8x8_neon,
-                vp9_dc_left_predictor_8x8_neon, vp9_dc_top_predictor_8x8_neon,
-                vp9_dc_128_predictor_8x8_neon, vp9_v_predictor_8x8_neon,
-                vp9_h_predictor_8x8_neon, vp9_d45_predictor_8x8_neon, NULL,
-                NULL, NULL, NULL, NULL, vp9_tm_predictor_8x8_neon)
+INTRA_PRED_TEST(NEON, TestIntraPred8, vpx_dc_predictor_8x8_neon,
+                vpx_dc_left_predictor_8x8_neon, vpx_dc_top_predictor_8x8_neon,
+                vpx_dc_128_predictor_8x8_neon, vpx_v_predictor_8x8_neon,
+                vpx_h_predictor_8x8_neon, vpx_d45_predictor_8x8_neon, NULL,
+                NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_neon)

 #endif  // HAVE_NEON

 #if HAVE_MSA
-INTRA_PRED_TEST(MSA, TestIntraPred8, vp9_dc_predictor_8x8_msa,
-                vp9_dc_left_predictor_8x8_msa, vp9_dc_top_predictor_8x8_msa,
-                vp9_dc_128_predictor_8x8_msa, vp9_v_predictor_8x8_msa,
-                vp9_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL,
-                NULL, vp9_tm_predictor_8x8_msa)
+INTRA_PRED_TEST(MSA, TestIntraPred8, vpx_dc_predictor_8x8_msa,
+                vpx_dc_left_predictor_8x8_msa, vpx_dc_top_predictor_8x8_msa,
+                vpx_dc_128_predictor_8x8_msa, vpx_v_predictor_8x8_msa,
+                vpx_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL,
+                NULL, vpx_tm_predictor_8x8_msa)
 #endif  // HAVE_MSA

 // -----------------------------------------------------------------------------
 // 16x16

-INTRA_PRED_TEST(C, TestIntraPred16, vp9_dc_predictor_16x16_c,
-                vp9_dc_left_predictor_16x16_c, vp9_dc_top_predictor_16x16_c,
-                vp9_dc_128_predictor_16x16_c, vp9_v_predictor_16x16_c,
-                vp9_h_predictor_16x16_c, vp9_d45_predictor_16x16_c,
-                vp9_d135_predictor_16x16_c, vp9_d117_predictor_16x16_c,
-                vp9_d153_predictor_16x16_c, vp9_d207_predictor_16x16_c,
-                vp9_d63_predictor_16x16_c, vp9_tm_predictor_16x16_c)
+INTRA_PRED_TEST(C, TestIntraPred16, vpx_dc_predictor_16x16_c,
+                vpx_dc_left_predictor_16x16_c, vpx_dc_top_predictor_16x16_c,
+                vpx_dc_128_predictor_16x16_c, vpx_v_predictor_16x16_c,
+                vpx_h_predictor_16x16_c, vpx_d45_predictor_16x16_c,
+                vpx_d135_predictor_16x16_c, vpx_d117_predictor_16x16_c,
+                vpx_d153_predictor_16x16_c, vpx_d207_predictor_16x16_c,
+                vpx_d63_predictor_16x16_c, vpx_tm_predictor_16x16_c)

-#if HAVE_SSE2
-INTRA_PRED_TEST(SSE2, TestIntraPred16, vp9_dc_predictor_16x16_sse2,
-                vp9_dc_left_predictor_16x16_sse2,
-                vp9_dc_top_predictor_16x16_sse2,
-                vp9_dc_128_predictor_16x16_sse2, vp9_v_predictor_16x16_sse2,
+#if HAVE_SSE2 && CONFIG_USE_X86INC
+INTRA_PRED_TEST(SSE2, TestIntraPred16, vpx_dc_predictor_16x16_sse2,
+                vpx_dc_left_predictor_16x16_sse2,
+                vpx_dc_top_predictor_16x16_sse2,
+                vpx_dc_128_predictor_16x16_sse2, vpx_v_predictor_16x16_sse2,
                NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-                vp9_tm_predictor_16x16_sse2)
-#endif  // HAVE_SSE2
+                vpx_tm_predictor_16x16_sse2)
+#endif  // HAVE_SSE2 && CONFIG_USE_X86INC

-#if HAVE_SSSE3
+#if HAVE_SSSE3 && CONFIG_USE_X86INC
 INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL,
-                vp9_h_predictor_16x16_ssse3, vp9_d45_predictor_16x16_ssse3,
-                NULL, NULL, vp9_d153_predictor_16x16_ssse3,
-                vp9_d207_predictor_16x16_ssse3, vp9_d63_predictor_16x16_ssse3,
+                vpx_h_predictor_16x16_ssse3, vpx_d45_predictor_16x16_ssse3,
+                NULL, NULL, vpx_d153_predictor_16x16_ssse3,
+                vpx_d207_predictor_16x16_ssse3, vpx_d63_predictor_16x16_ssse3,
                NULL)
-#endif  // HAVE_SSSE3
+#endif  // HAVE_SSSE3 && CONFIG_USE_X86INC

 #if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TestIntraPred16, vp9_dc_predictor_16x16_dspr2, NULL,
-                NULL, NULL, NULL, vp9_h_predictor_16x16_dspr2, NULL, NULL, NULL,
+INTRA_PRED_TEST(DSPR2, TestIntraPred16, vpx_dc_predictor_16x16_dspr2, NULL,
+                NULL, NULL, NULL, vpx_h_predictor_16x16_dspr2, NULL, NULL, NULL,
                NULL, NULL, NULL, NULL)
 #endif  // HAVE_DSPR2

 #if HAVE_NEON
-INTRA_PRED_TEST(NEON, TestIntraPred16, vp9_dc_predictor_16x16_neon,
-                vp9_dc_left_predictor_16x16_neon,
-                vp9_dc_top_predictor_16x16_neon,
-                vp9_dc_128_predictor_16x16_neon, vp9_v_predictor_16x16_neon,
-                vp9_h_predictor_16x16_neon, vp9_d45_predictor_16x16_neon, NULL,
-                NULL, NULL, NULL, NULL, vp9_tm_predictor_16x16_neon)
+INTRA_PRED_TEST(NEON, TestIntraPred16, vpx_dc_predictor_16x16_neon,
+                vpx_dc_left_predictor_16x16_neon,
+                vpx_dc_top_predictor_16x16_neon,
+                vpx_dc_128_predictor_16x16_neon, vpx_v_predictor_16x16_neon,
+                vpx_h_predictor_16x16_neon, vpx_d45_predictor_16x16_neon, NULL,
+                NULL, NULL, NULL, NULL, vpx_tm_predictor_16x16_neon)
 #endif  // HAVE_NEON

 #if HAVE_MSA
-INTRA_PRED_TEST(MSA, TestIntraPred16, vp9_dc_predictor_16x16_msa,
-                vp9_dc_left_predictor_16x16_msa, vp9_dc_top_predictor_16x16_msa,
-                vp9_dc_128_predictor_16x16_msa, vp9_v_predictor_16x16_msa,
-                vp9_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL,
-                NULL, vp9_tm_predictor_16x16_msa)
+INTRA_PRED_TEST(MSA, TestIntraPred16, vpx_dc_predictor_16x16_msa,
+                vpx_dc_left_predictor_16x16_msa, vpx_dc_top_predictor_16x16_msa,
+                vpx_dc_128_predictor_16x16_msa, vpx_v_predictor_16x16_msa,
+                vpx_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL,
+                NULL, vpx_tm_predictor_16x16_msa)
 #endif  // HAVE_MSA

 // -----------------------------------------------------------------------------
 // 32x32

-INTRA_PRED_TEST(C, TestIntraPred32, vp9_dc_predictor_32x32_c,
-                vp9_dc_left_predictor_32x32_c, vp9_dc_top_predictor_32x32_c,
-                vp9_dc_128_predictor_32x32_c, vp9_v_predictor_32x32_c,
-                vp9_h_predictor_32x32_c, vp9_d45_predictor_32x32_c,
-                vp9_d135_predictor_32x32_c, vp9_d117_predictor_32x32_c,
-                vp9_d153_predictor_32x32_c, vp9_d207_predictor_32x32_c,
-                vp9_d63_predictor_32x32_c, vp9_tm_predictor_32x32_c)
+INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c,
+                vpx_dc_left_predictor_32x32_c, vpx_dc_top_predictor_32x32_c,
+                vpx_dc_128_predictor_32x32_c, vpx_v_predictor_32x32_c,
+                vpx_h_predictor_32x32_c, vpx_d45_predictor_32x32_c,
+                vpx_d135_predictor_32x32_c, vpx_d117_predictor_32x32_c,
+                vpx_d153_predictor_32x32_c, vpx_d207_predictor_32x32_c,
+                vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c)

-#if HAVE_SSE2
+#if HAVE_SSE2 && CONFIG_USE_X86INC
 #if ARCH_X86_64
-INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
-                vp9_dc_left_predictor_32x32_sse2,
-                vp9_dc_top_predictor_32x32_sse2,
-                vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
+INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
+                vpx_dc_left_predictor_32x32_sse2,
+                vpx_dc_top_predictor_32x32_sse2,
+                vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2,
                NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-                vp9_tm_predictor_32x32_sse2)
+                vpx_tm_predictor_32x32_sse2)
 #else
-INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
-                vp9_dc_left_predictor_32x32_sse2,
-                vp9_dc_top_predictor_32x32_sse2,
-                vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
+INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
+                vpx_dc_left_predictor_32x32_sse2,
+                vpx_dc_top_predictor_32x32_sse2,
+                vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2,
                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
 #endif  // ARCH_X86_64
-#endif  // HAVE_SSE2
+#endif  // HAVE_SSE2 && CONFIG_USE_X86INC

-#if HAVE_SSSE3
+#if HAVE_SSSE3 && CONFIG_USE_X86INC
 INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL,
-                vp9_h_predictor_32x32_ssse3, vp9_d45_predictor_32x32_ssse3,
-                NULL, NULL, vp9_d153_predictor_32x32_ssse3,
-                vp9_d207_predictor_32x32_ssse3, vp9_d63_predictor_32x32_ssse3,
+                vpx_h_predictor_32x32_ssse3, vpx_d45_predictor_32x32_ssse3,
+                NULL, NULL, vpx_d153_predictor_32x32_ssse3,
+                vpx_d207_predictor_32x32_ssse3, vpx_d63_predictor_32x32_ssse3,
                NULL)
-#endif  // HAVE_SSSE3
+#endif  // HAVE_SSSE3 && CONFIG_USE_X86INC

 #if HAVE_NEON
-INTRA_PRED_TEST(NEON, TestIntraPred32, vp9_dc_predictor_32x32_neon,
-                vp9_dc_left_predictor_32x32_neon,
-                vp9_dc_top_predictor_32x32_neon,
-                vp9_dc_128_predictor_32x32_neon, vp9_v_predictor_32x32_neon,
-                vp9_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
-                vp9_tm_predictor_32x32_neon)
+INTRA_PRED_TEST(NEON, TestIntraPred32, vpx_dc_predictor_32x32_neon,
+                vpx_dc_left_predictor_32x32_neon,
+                vpx_dc_top_predictor_32x32_neon,
+                vpx_dc_128_predictor_32x32_neon, vpx_v_predictor_32x32_neon,
+                vpx_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_32x32_neon)
 #endif  // HAVE_NEON

 #if HAVE_MSA
-INTRA_PRED_TEST(MSA, TestIntraPred32, vp9_dc_predictor_32x32_msa,
-                vp9_dc_left_predictor_32x32_msa, vp9_dc_top_predictor_32x32_msa,
-                vp9_dc_128_predictor_32x32_msa, vp9_v_predictor_32x32_msa,
-                vp9_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL,
-                NULL, vp9_tm_predictor_32x32_msa)
+INTRA_PRED_TEST(MSA, TestIntraPred32, vpx_dc_predictor_32x32_msa,
+                vpx_dc_left_predictor_32x32_msa, vpx_dc_top_predictor_32x32_msa,
+                vpx_dc_128_predictor_32x32_msa, vpx_v_predictor_32x32_msa,
+                vpx_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL,
+                NULL, vpx_tm_predictor_32x32_msa)
 #endif  // HAVE_MSA

 #include "test/test_libvpx.cc"
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@@ -8,6 +8,9 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include <string>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
 #include "./vpx_config.h"
 #if ARCH_X86 || ARCH_X86_64
 #include "vpx_ports/x86.h"
@@ -22,8 +25,8 @@ extern void vp9_rtcd();
 extern void vpx_dsp_rtcd();
 extern void vpx_scale_rtcd();
 }
-#include "third_party/googletest/src/include/gtest/gtest.h"

+#if ARCH_X86 || ARCH_X86_64
 static void append_negative_gtest_filter(const char *str) {
  std::string filter = ::testing::FLAGS_gtest_filter;
  // Negative patterns begin with one '-' followed by a ':' separated list.
@@ -31,6 +34,7 @@ static void append_negative_gtest_filter(const char *str) {
  filter += str;
  ::testing::FLAGS_gtest_filter = filter;
 }
+#endif  // ARCH_X86 || ARCH_X86_64

 int main(int argc, char **argv) {
  ::testing::InitGoogleTest(&argc, argv);
@@ -53,7 +57,7 @@ int main(int argc, char **argv) {
    append_negative_gtest_filter(":AVX.*:AVX/*");
  if (!(simd_caps & HAS_AVX2))
    append_negative_gtest_filter(":AVX2.*:AVX2/*");
-#endif
+#endif  // ARCH_X86 || ARCH_X86_64

 #if !CONFIG_SHARED
 // Shared library builds don't support whitebox tests
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -135,6 +135,7 @@ TEST_P(TestVectorTest, MD5Match) {

 // Test VP8 decode in serial mode with single thread.
 // NOTE: VP8 only support serial mode.
+#if CONFIG_VP8_DECODER
 VP8_INSTANTIATE_TEST_CASE(
    TestVectorTest,
    ::testing::Combine(
@@ -143,8 +144,10 @@ VP8_INSTANTIATE_TEST_CASE(
        ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
                            libvpx_test::kVP8TestVectors +
                                libvpx_test::kNumVP8TestVectors)));
+#endif  // CONFIG_VP8_DECODER

 // Test VP9 decode in serial mode with single thread.
+#if CONFIG_VP9_DECODER
 VP9_INSTANTIATE_TEST_CASE(
    TestVectorTest,
    ::testing::Combine(
@@ -154,8 +157,6 @@ VP9_INSTANTIATE_TEST_CASE(
                            libvpx_test::kVP9TestVectors +
                                libvpx_test::kNumVP9TestVectors)));

-
-#if CONFIG_VP9_DECODER
 // Test VP9 decode in frame parallel mode with different number of threads.
 INSTANTIATE_TEST_CASE_P(
    VP9MultiThreadedFrameParallel, TestVectorTest,
--- a/test/tile_independence_test.cc
+++ b/test/tile_independence_test.cc
@@ -104,4 +104,5 @@ TEST_P(TileIndependenceTest, MD5Match) {

 VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1));

+VP10_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1));
 }  // namespace
--- a/test/util.h
+++ b/test/util.h
@@ -19,8 +19,7 @@
 // Macros
 #define GET_PARAM(k) std::tr1::get< k >(GetParam())

-static double compute_psnr(const vpx_image_t *img1,
-                           const vpx_image_t *img2) {
+inline double compute_psnr(const vpx_image_t *img1, const vpx_image_t *img2) {
  assert((img1->fmt == img2->fmt) &&
         (img1->d_w == img2->d_w) &&
         (img1->d_h == img2->d_h));
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
--- a/test/video_source.h
+++ b/test/video_source.h
@@ -48,7 +48,7 @@ static std::string GetDataPath() {
 #undef TO_STRING
 #undef STRINGIFY

-static FILE *OpenTestDataFile(const std::string& file_name) {
+inline FILE *OpenTestDataFile(const std::string& file_name) {
  const std::string path_to_source = GetDataPath() + "/" + file_name;
  return fopen(path_to_source.c_str(), "rb");
 }
--- a/test/vp8_boolcoder_test.cc
+++ b/test/vp8_boolcoder_test.cc
@@ -16,12 +16,12 @@
 #include <string.h>
 #include <sys/types.h>

-#include "test/acm_random.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "vpx/vpx_integer.h"

-#include "vp8/encoder/boolhuff.h"
+#include "test/acm_random.h"
 #include "vp8/decoder/dboolhuff.h"
+#include "vp8/encoder/boolhuff.h"
+#include "vpx/vpx_integer.h"

 namespace {
 const int num_tests = 10;
--- a/test/vp8_fdct4x4_test.cc
+++ b/test/vp8_fdct4x4_test.cc
@@ -15,10 +15,10 @@
 #include <string.h>
 #include <sys/types.h>

-#include "./vp8_rtcd.h"
-
-#include "test/acm_random.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp8_rtcd.h"
+#include "test/acm_random.h"
 #include "vpx/vpx_integer.h"

 namespace {
--- a/test/vp9_arf_freq_test.cc
+++ b/test/vp9_arf_freq_test.cc
@@ -0,0 +1,238 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+#include "vp9/encoder/vp9_ratectrl.h"
+
+namespace {
+
+const unsigned int kFrames = 100;
+const int kBitrate = 500;
+
+#define ARF_NOT_SEEN               1000001
+#define ARF_SEEN_ONCE              1000000
+
+typedef struct {
+  const char *filename;
+  unsigned int width;
+  unsigned int height;
+  unsigned int framerate_num;
+  unsigned int framerate_den;
+  unsigned int input_bit_depth;
+  vpx_img_fmt fmt;
+  vpx_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+typedef struct {
+  libvpx_test::TestMode mode;
+  int cpu_used;
+} TestEncodeParam;
+
+const TestVideoParam kTestVectors[] = {
+  // artificially increase framerate to trigger default check
+  {"hantro_collage_w352h288.yuv", 352, 288, 5000, 1,
+    8, VPX_IMG_FMT_I420, VPX_BITS_8, 0},
+  {"hantro_collage_w352h288.yuv", 352, 288, 30, 1,
+    8, VPX_IMG_FMT_I420, VPX_BITS_8, 0},
+  {"rush_hour_444.y4m", 352, 288, 30, 1,
+    8, VPX_IMG_FMT_I444, VPX_BITS_8, 1},
+#if CONFIG_VP9_HIGHBITDEPTH
+  // Add list of profile 2/3 test videos here ...
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+};
+
+const TestEncodeParam kEncodeVectors[] = {
+  {::libvpx_test::kOnePassGood, 2},
+  {::libvpx_test::kOnePassGood, 5},
+  {::libvpx_test::kTwoPassGood, 1},
+  {::libvpx_test::kTwoPassGood, 2},
+  {::libvpx_test::kTwoPassGood, 5},
+  {::libvpx_test::kRealTime, 5},
+};
+
+const int kMinArfVectors[] = {
+  // NOTE: 0 refers to the default built-in logic in:
+  //       vp9_rc_get_default_min_gf_interval(...)
+  0, 4, 8, 12, 15
+};
+
+int is_extension_y4m(const char *filename) {
+  const char *dot = strrchr(filename, '.');
+  if (!dot || dot == filename)
+    return 0;
+  else
+    return !strcmp(dot, ".y4m");
+}
+
+class ArfFreqTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith3Params<TestVideoParam, \
+                                                 TestEncodeParam, int> {
+ protected:
+  ArfFreqTest()
+      : EncoderTest(GET_PARAM(0)),
+        test_video_param_(GET_PARAM(1)),
+        test_encode_param_(GET_PARAM(2)),
+        min_arf_requested_(GET_PARAM(3)) {
+  }
+
+  virtual ~ArfFreqTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(test_encode_param_.mode);
+    if (test_encode_param_.mode != ::libvpx_test::kRealTime) {
+      cfg_.g_lag_in_frames = 25;
+      cfg_.rc_end_usage = VPX_VBR;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = VPX_CBR;
+      cfg_.rc_buf_sz = 1000;
+      cfg_.rc_buf_initial_sz = 500;
+      cfg_.rc_buf_optimal_sz = 600;
+    }
+    dec_cfg_.threads = 4;
+  }
+
+  virtual void BeginPassHook(unsigned int) {
+    min_run_ = ARF_NOT_SEEN;
+    run_of_visible_frames_ = 0;
+  }
+
+  int GetNumFramesInPkt(const vpx_codec_cx_pkt_t *pkt) {
+    const uint8_t *buffer = reinterpret_cast<uint8_t*>(pkt->data.frame.buf);
+    const uint8_t marker = buffer[pkt->data.frame.sz - 1];
+    const int mag = ((marker >> 3) & 3) + 1;
+    int frames = (marker & 0x7) + 1;
+    const unsigned int index_sz = 2 + mag  * frames;
+    // Check for superframe or not.
+    // Assume superframe has only one visible frame, the rest being
+    // invisible. If superframe index is not found, then there is only
+    // one frame.
+    if (!((marker & 0xe0) == 0xc0 &&
+          pkt->data.frame.sz >= index_sz &&
+          buffer[pkt->data.frame.sz - index_sz] == marker)) {
+      frames = 1;
+    }
+    return frames;
+  }
+
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
+      return;
+    const int frames = GetNumFramesInPkt(pkt);
+    if (frames == 1) {
+      run_of_visible_frames_++;
+    } else if (frames == 2) {
+      if (min_run_ == ARF_NOT_SEEN) {
+        min_run_ = ARF_SEEN_ONCE;
+      } else if (min_run_ == ARF_SEEN_ONCE ||
+                 run_of_visible_frames_ < min_run_) {
+        min_run_ = run_of_visible_frames_;
+      }
+      run_of_visible_frames_ = 1;
+    } else {
+      min_run_ = 0;
+      run_of_visible_frames_ = 1;
+    }
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(VP9E_SET_TILE_COLUMNS, 4);
+      encoder->Control(VP8E_SET_CPUUSED, test_encode_param_.cpu_used);
+      encoder->Control(VP9E_SET_MIN_GF_INTERVAL, min_arf_requested_);
+      if (test_encode_param_.mode != ::libvpx_test::kRealTime) {
+        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+      }
+    }
+  }
+
+  int GetMinVisibleRun() const {
+    return min_run_;
+  }
+
+  int GetMinArfDistanceRequested() const {
+    if (min_arf_requested_)
+      return min_arf_requested_;
+    else
+      return vp9_rc_get_default_min_gf_interval(
+          test_video_param_.width, test_video_param_.height,
+          (double)test_video_param_.framerate_num /
+          test_video_param_.framerate_den);
+  }
+
+  TestVideoParam test_video_param_;
+  TestEncodeParam test_encode_param_;
+
+ private:
+  int min_arf_requested_;
+  int min_run_;
+  int run_of_visible_frames_;
+};
+
+TEST_P(ArfFreqTest, MinArfFreqTest) {
+  cfg_.rc_target_bitrate = kBitrate;
+  cfg_.g_error_resilient = 0;
+  cfg_.g_profile = test_video_param_.profile;
+  cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+  cfg_.g_bit_depth = test_video_param_.bit_depth;
+  init_flags_ = VPX_CODEC_USE_PSNR;
+  if (cfg_.g_bit_depth > 8)
+    init_flags_ |= VPX_CODEC_USE_HIGHBITDEPTH;
+
+  libvpx_test::VideoSource *video;
+  if (is_extension_y4m(test_video_param_.filename)) {
+    video = new libvpx_test::Y4mVideoSource(test_video_param_.filename,
+                                            0, kFrames);
+  } else {
+    video = new libvpx_test::YUVVideoSource(test_video_param_.filename,
+                                            test_video_param_.fmt,
+                                            test_video_param_.width,
+                                            test_video_param_.height,
+                                            test_video_param_.framerate_num,
+                                            test_video_param_.framerate_den,
+                                            0, kFrames);
+  }
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
+  const int min_run = GetMinVisibleRun();
+  const int min_arf_dist_requested = GetMinArfDistanceRequested();
+  if (min_run != ARF_NOT_SEEN && min_run != ARF_SEEN_ONCE) {
+    const int min_arf_dist = min_run + 1;
+    EXPECT_GE(min_arf_dist, min_arf_dist_requested);
+  }
+  delete(video);
+}
+
+VP9_INSTANTIATE_TEST_CASE(
+    ArfFreqTest,
+    ::testing::ValuesIn(kTestVectors),
+    ::testing::ValuesIn(kEncodeVectors),
+    ::testing::ValuesIn(kMinArfVectors));
+
+VP10_INSTANTIATE_TEST_CASE(
+    ArfFreqTest,
+    ::testing::ValuesIn(kTestVectors),
+    ::testing::ValuesIn(kEncodeVectors),
+    ::testing::ValuesIn(kMinArfVectors));
+}  // namespace
--- a/test/vp9_avg_test.cc
+++ b/test/vp9_avg_test.cc
@@ -8,22 +8,22 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-
-#include <string.h>
 #include <limits.h>
 #include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vpx_config.h"
 #if CONFIG_VP9_ENCODER
 #include "./vp9_rtcd.h"
 #endif
-#include "vpx_mem/vpx_mem.h"

 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vpx_mem/vpx_mem.h"

 using libvpx_test::ACMRandom;

@@ -286,6 +286,17 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon),
        make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon)));

+INSTANTIATE_TEST_CASE_P(
+    NEON, IntProRowTest, ::testing::Values(
+        make_tuple(16, &vp9_int_pro_row_neon, &vp9_int_pro_row_c),
+        make_tuple(32, &vp9_int_pro_row_neon, &vp9_int_pro_row_c),
+        make_tuple(64, &vp9_int_pro_row_neon, &vp9_int_pro_row_c)));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON, IntProColTest, ::testing::Values(
+        make_tuple(16, &vp9_int_pro_col_neon, &vp9_int_pro_col_c),
+        make_tuple(32, &vp9_int_pro_col_neon, &vp9_int_pro_col_c),
+        make_tuple(64, &vp9_int_pro_col_neon, &vp9_int_pro_col_c)));
 #endif

 #if HAVE_MSA
--- a/test/vp9_boolcoder_test.cc
+++ b/test/vp9_boolcoder_test.cc
@@ -14,11 +14,10 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "vp9/decoder/vp9_reader.h"
-#include "vp9/encoder/vp9_writer.h"
-
 #include "test/acm_random.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_dsp/bitreader.h"
+#include "vpx_dsp/bitwriter.h"

 using libvpx_test::ACMRandom;

@@ -50,9 +49,9 @@ TEST(VP9, TestBitIO) {
        const int random_seed = 6432;
        const int kBufferSize = 10000;
        ACMRandom bit_rnd(random_seed);
-        vp9_writer bw;
+        vpx_writer bw;
        uint8_t bw_buffer[kBufferSize];
-        vp9_start_encode(&bw, bw_buffer);
+        vpx_start_encode(&bw, bw_buffer);

        int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
        for (int i = 0; i < kBitsToTest; ++i) {
@@ -61,16 +60,16 @@ TEST(VP9, TestBitIO) {
          } else if (bit_method == 3) {
            bit = bit_rnd(2);
          }
-          vp9_write(&bw, bit, static_cast<int>(probas[i]));
+          vpx_write(&bw, bit, static_cast<int>(probas[i]));
        }

-        vp9_stop_encode(&bw);
+        vpx_stop_encode(&bw);

        // First bit should be zero
        GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0);

-        vp9_reader br;
-        vp9_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+        vpx_reader br;
+        vpx_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
        bit_rnd.Reset(random_seed);
        for (int i = 0; i < kBitsToTest; ++i) {
          if (bit_method == 2) {
@@ -78,7 +77,7 @@ TEST(VP9, TestBitIO) {
          } else if (bit_method == 3) {
            bit = bit_rnd(2);
          }
-          GTEST_ASSERT_EQ(vp9_read(&br, probas[i]), bit)
+          GTEST_ASSERT_EQ(vpx_read(&br, probas[i]), bit)
              << "pos: " << i << " / " << kBitsToTest
              << " bit_method: " << bit_method
              << " method: " << method;
--- a/test/vp9_encoder_parms_get_to_decoder.cc
+++ b/test/vp9_encoder_parms_get_to_decoder.cc
@@ -8,12 +8,13 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include "third_party/googletest/src/include/gtest/gtest.h"
+
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
+#include "test/util.h"
 #include "test/y4m_video_source.h"
 #include "test/yuv_video_source.h"
-#include "test/util.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
 #include "vp9/decoder/vp9_decoder.h"

 typedef vpx_codec_stream_info_t vp9_stream_info_t;
@@ -85,17 +86,17 @@ int is_extension_y4m(const char *filename) {
    return !strcmp(dot, ".y4m");
 }

-class Vp9EncoderParmsGetToDecoder
+class VpxEncoderParmsGetToDecoder
    : public ::libvpx_test::EncoderTest,
      public ::libvpx_test::CodecTestWith2Params<EncodeParameters, \
                                                 EncodePerfTestVideo> {
 protected:
-  Vp9EncoderParmsGetToDecoder()
+  VpxEncoderParmsGetToDecoder()
      : EncoderTest(GET_PARAM(0)),
        encode_parms(GET_PARAM(1)) {
  }

-  virtual ~Vp9EncoderParmsGetToDecoder() {}
+  virtual ~VpxEncoderParmsGetToDecoder() {}

  virtual void SetUp() {
    InitializeConfig();
@@ -165,7 +166,7 @@ class Vp9EncoderParmsGetToDecoder

 // TODO(hkuang): This test conflicts with frame parallel decode. So disable it
 // for now until fix.
-TEST_P(Vp9EncoderParmsGetToDecoder, DISABLED_BitstreamParms) {
+TEST_P(VpxEncoderParmsGetToDecoder, DISABLED_BitstreamParms) {
  init_flags_ = VPX_CODEC_USE_PSNR;

  libvpx_test::VideoSource *video;
@@ -186,8 +187,12 @@ TEST_P(Vp9EncoderParmsGetToDecoder, DISABLED_BitstreamParms) {
 }

 VP9_INSTANTIATE_TEST_CASE(
-    Vp9EncoderParmsGetToDecoder,
+    VpxEncoderParmsGetToDecoder,
    ::testing::ValuesIn(kVP9EncodeParameterSet),
    ::testing::ValuesIn(kVP9EncodePerfTestVectors));

+VP10_INSTANTIATE_TEST_CASE(
+    VpxEncoderParmsGetToDecoder,
+    ::testing::ValuesIn(kVP9EncodeParameterSet),
+    ::testing::ValuesIn(kVP9EncodePerfTestVectors));
 }  // namespace
--- a/test/vp9_end_to_end_test.cc
+++ b/test/vp9_end_to_end_test.cc
@@ -8,12 +8,13 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include "third_party/googletest/src/include/gtest/gtest.h"
+
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
+#include "test/util.h"
 #include "test/y4m_video_source.h"
 #include "test/yuv_video_source.h"
-#include "test/util.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"

 namespace {

@@ -186,4 +187,9 @@ VP9_INSTANTIATE_TEST_CASE(
    ::testing::ValuesIn(kTestVectors),
    ::testing::ValuesIn(kCpuUsedVectors));

+VP10_INSTANTIATE_TEST_CASE(
+    EndToEndTestLarge,
+    ::testing::ValuesIn(kEncodingModeVectors),
+    ::testing::ValuesIn(kTestVectors),
+    ::testing::ValuesIn(kCpuUsedVectors));
 }  // namespace
--- a/test/vp9_error_block_test.cc
+++ b/test/vp9_error_block_test.cc
@@ -14,12 +14,12 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -18,11 +18,11 @@
 #include "test/y4m_video_source.h"

 namespace {
-class VP9EncoderThreadTest
+class VPxEncoderThreadTest
    : public ::libvpx_test::EncoderTest,
      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
 protected:
-  VP9EncoderThreadTest()
+  VPxEncoderThreadTest()
      : EncoderTest(GET_PARAM(0)),
        encoder_initialized_(false),
        tiles_(2),
@@ -36,7 +36,7 @@ class VP9EncoderThreadTest

    md5_.clear();
  }
-  virtual ~VP9EncoderThreadTest() {
+  virtual ~VPxEncoderThreadTest() {
    delete decoder_;
  }

@@ -105,7 +105,7 @@ class VP9EncoderThreadTest
  std::vector<std::string> md5_;
 };

-TEST_P(VP9EncoderThreadTest, EncoderResultTest) {
+TEST_P(VPxEncoderThreadTest, EncoderResultTest) {
  std::vector<std::string> single_thr_md5, multi_thr_md5;

  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20);
@@ -130,8 +130,13 @@ TEST_P(VP9EncoderThreadTest, EncoderResultTest) {
 }

 VP9_INSTANTIATE_TEST_CASE(
-    VP9EncoderThreadTest,
+    VPxEncoderThreadTest,
    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
                      ::libvpx_test::kRealTime),
    ::testing::Range(1, 9));
+
+VP10_INSTANTIATE_TEST_CASE(
+    VPxEncoderThreadTest,
+    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
+    ::testing::Range(1, 3));
 }  // namespace
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
@@ -10,17 +10,17 @@

 #include <string>

-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vpx_config.h"
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_pred_common.h"
 #include "vpx_mem/vpx_mem.h"
-#include "test/util.h"

 namespace {

@@ -131,168 +131,171 @@ using std::tr1::make_tuple;

 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_USE_X86INC
 #if ARCH_X86_64
 INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
                        ::testing::Values(
-                            make_tuple(&vp9_highbd_dc_predictor_32x32_sse2,
-                                       &vp9_highbd_dc_predictor_32x32_c, 32, 8),
-                            make_tuple(&vp9_highbd_tm_predictor_16x16_sse2,
-                                       &vp9_highbd_tm_predictor_16x16_c, 16, 8),
-                            make_tuple(&vp9_highbd_tm_predictor_32x32_sse2,
-                                       &vp9_highbd_tm_predictor_32x32_c, 32, 8),
-                            make_tuple(&vp9_highbd_dc_predictor_4x4_sse,
-                                       &vp9_highbd_dc_predictor_4x4_c, 4, 8),
-                            make_tuple(&vp9_highbd_dc_predictor_8x8_sse2,
-                                       &vp9_highbd_dc_predictor_8x8_c, 8, 8),
-                            make_tuple(&vp9_highbd_dc_predictor_16x16_sse2,
-                                       &vp9_highbd_dc_predictor_16x16_c, 16, 8),
-                            make_tuple(&vp9_highbd_v_predictor_4x4_sse,
-                                       &vp9_highbd_v_predictor_4x4_c, 4, 8),
-                            make_tuple(&vp9_highbd_v_predictor_8x8_sse2,
-                                       &vp9_highbd_v_predictor_8x8_c, 8, 8),
-                            make_tuple(&vp9_highbd_v_predictor_16x16_sse2,
-                                       &vp9_highbd_v_predictor_16x16_c, 16, 8),
-                            make_tuple(&vp9_highbd_v_predictor_32x32_sse2,
-                                       &vp9_highbd_v_predictor_32x32_c, 32, 8),
-                            make_tuple(&vp9_highbd_tm_predictor_4x4_sse,
-                                       &vp9_highbd_tm_predictor_4x4_c, 4, 8),
-                            make_tuple(&vp9_highbd_tm_predictor_8x8_sse2,
-                                       &vp9_highbd_tm_predictor_8x8_c, 8, 8)));
+                            make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
+                                       &vpx_highbd_dc_predictor_32x32_c, 32, 8),
+                            make_tuple(&vpx_highbd_tm_predictor_16x16_sse2,
+                                       &vpx_highbd_tm_predictor_16x16_c, 16, 8),
+                            make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
+                                       &vpx_highbd_tm_predictor_32x32_c, 32, 8),
+                            make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+                                       &vpx_highbd_dc_predictor_4x4_c, 4, 8),
+                            make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
+                                       &vpx_highbd_dc_predictor_8x8_c, 8, 8),
+                            make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
+                                       &vpx_highbd_dc_predictor_16x16_c, 16, 8),
+                            make_tuple(&vpx_highbd_v_predictor_4x4_sse,
+                                       &vpx_highbd_v_predictor_4x4_c, 4, 8),
+                            make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
+                                       &vpx_highbd_v_predictor_8x8_c, 8, 8),
+                            make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
+                                       &vpx_highbd_v_predictor_16x16_c, 16, 8),
+                            make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
+                                       &vpx_highbd_v_predictor_32x32_c, 32, 8),
+                            make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+                                       &vpx_highbd_tm_predictor_4x4_c, 4, 8),
+                            make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
+                                       &vpx_highbd_tm_predictor_8x8_c, 8, 8)));
 #else
 INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
                        ::testing::Values(
-                            make_tuple(&vp9_highbd_dc_predictor_4x4_sse,
-                                       &vp9_highbd_dc_predictor_4x4_c, 4, 8),
-                            make_tuple(&vp9_highbd_dc_predictor_8x8_sse2,
-                                       &vp9_highbd_dc_predictor_8x8_c, 8, 8),
-                            make_tuple(&vp9_highbd_dc_predictor_16x16_sse2,
-                                       &vp9_highbd_dc_predictor_16x16_c, 16, 8),
-                            make_tuple(&vp9_highbd_v_predictor_4x4_sse,
-                                       &vp9_highbd_v_predictor_4x4_c, 4, 8),
-                            make_tuple(&vp9_highbd_v_predictor_8x8_sse2,
-                                       &vp9_highbd_v_predictor_8x8_c, 8, 8),
-                            make_tuple(&vp9_highbd_v_predictor_16x16_sse2,
-                                       &vp9_highbd_v_predictor_16x16_c, 16, 8),
-                            make_tuple(&vp9_highbd_v_predictor_32x32_sse2,
-                                       &vp9_highbd_v_predictor_32x32_c, 32, 8),
-                            make_tuple(&vp9_highbd_tm_predictor_4x4_sse,
-                                       &vp9_highbd_tm_predictor_4x4_c, 4, 8),
-                            make_tuple(&vp9_highbd_tm_predictor_8x8_sse2,
-                                       &vp9_highbd_tm_predictor_8x8_c, 8, 8)));
-#endif
+                            make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+                                       &vpx_highbd_dc_predictor_4x4_c, 4, 8),
+                            make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
+                                       &vpx_highbd_dc_predictor_8x8_c, 8, 8),
+                            make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
+                                       &vpx_highbd_dc_predictor_16x16_c, 16, 8),
+                            make_tuple(&vpx_highbd_v_predictor_4x4_sse,
+                                       &vpx_highbd_v_predictor_4x4_c, 4, 8),
+                            make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
+                                       &vpx_highbd_v_predictor_8x8_c, 8, 8),
+                            make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
+                                       &vpx_highbd_v_predictor_16x16_c, 16, 8),
+                            make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
+                                       &vpx_highbd_v_predictor_32x32_c, 32, 8),
+                            make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+                                       &vpx_highbd_tm_predictor_4x4_c, 4, 8),
+                            make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
+                                       &vpx_highbd_tm_predictor_8x8_c, 8, 8)));
+#endif  // !ARCH_X86_64
+
 #if ARCH_X86_64
 INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
                        ::testing::Values(
-                            make_tuple(&vp9_highbd_dc_predictor_32x32_sse2,
-                                       &vp9_highbd_dc_predictor_32x32_c, 32,
+                            make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
+                                       &vpx_highbd_dc_predictor_32x32_c, 32,
                                       10),
-                            make_tuple(&vp9_highbd_tm_predictor_16x16_sse2,
-                                       &vp9_highbd_tm_predictor_16x16_c, 16,
+                            make_tuple(&vpx_highbd_tm_predictor_16x16_sse2,
+                                       &vpx_highbd_tm_predictor_16x16_c, 16,
                                       10),
-                            make_tuple(&vp9_highbd_tm_predictor_32x32_sse2,
-                                       &vp9_highbd_tm_predictor_32x32_c, 32,
+                            make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
+                                       &vpx_highbd_tm_predictor_32x32_c, 32,
                                       10),
-                            make_tuple(&vp9_highbd_dc_predictor_4x4_sse,
-                                       &vp9_highbd_dc_predictor_4x4_c, 4, 10),
-                            make_tuple(&vp9_highbd_dc_predictor_8x8_sse2,
-                                       &vp9_highbd_dc_predictor_8x8_c, 8, 10),
-                            make_tuple(&vp9_highbd_dc_predictor_16x16_sse2,
-                                       &vp9_highbd_dc_predictor_16x16_c, 16,
+                            make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+                                       &vpx_highbd_dc_predictor_4x4_c, 4, 10),
+                            make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
+                                       &vpx_highbd_dc_predictor_8x8_c, 8, 10),
+                            make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
+                                       &vpx_highbd_dc_predictor_16x16_c, 16,
                                       10),
-                            make_tuple(&vp9_highbd_v_predictor_4x4_sse,
-                                       &vp9_highbd_v_predictor_4x4_c, 4, 10),
-                            make_tuple(&vp9_highbd_v_predictor_8x8_sse2,
-                                       &vp9_highbd_v_predictor_8x8_c, 8, 10),
-                            make_tuple(&vp9_highbd_v_predictor_16x16_sse2,
-                                       &vp9_highbd_v_predictor_16x16_c, 16,
+                            make_tuple(&vpx_highbd_v_predictor_4x4_sse,
+                                       &vpx_highbd_v_predictor_4x4_c, 4, 10),
+                            make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
+                                       &vpx_highbd_v_predictor_8x8_c, 8, 10),
+                            make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
+                                       &vpx_highbd_v_predictor_16x16_c, 16,
                                       10),
-                            make_tuple(&vp9_highbd_v_predictor_32x32_sse2,
-                                       &vp9_highbd_v_predictor_32x32_c, 32,
+                            make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
+                                       &vpx_highbd_v_predictor_32x32_c, 32,
                                       10),
-                            make_tuple(&vp9_highbd_tm_predictor_4x4_sse,
-                                       &vp9_highbd_tm_predictor_4x4_c, 4, 10),
-                            make_tuple(&vp9_highbd_tm_predictor_8x8_sse2,
-                                       &vp9_highbd_tm_predictor_8x8_c, 8, 10)));
+                            make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+                                       &vpx_highbd_tm_predictor_4x4_c, 4, 10),
+                            make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
+                                       &vpx_highbd_tm_predictor_8x8_c, 8, 10)));
 #else
 INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
                        ::testing::Values(
-                            make_tuple(&vp9_highbd_dc_predictor_4x4_sse,
-                                       &vp9_highbd_dc_predictor_4x4_c, 4, 10),
-                            make_tuple(&vp9_highbd_dc_predictor_8x8_sse2,
-                                       &vp9_highbd_dc_predictor_8x8_c, 8, 10),
-                            make_tuple(&vp9_highbd_dc_predictor_16x16_sse2,
-                                       &vp9_highbd_dc_predictor_16x16_c, 16,
+                            make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+                                       &vpx_highbd_dc_predictor_4x4_c, 4, 10),
+                            make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
+                                       &vpx_highbd_dc_predictor_8x8_c, 8, 10),
+                            make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
+                                       &vpx_highbd_dc_predictor_16x16_c, 16,
                                       10),
-                            make_tuple(&vp9_highbd_v_predictor_4x4_sse,
-                                       &vp9_highbd_v_predictor_4x4_c, 4, 10),
-                            make_tuple(&vp9_highbd_v_predictor_8x8_sse2,
-                                       &vp9_highbd_v_predictor_8x8_c, 8, 10),
-                            make_tuple(&vp9_highbd_v_predictor_16x16_sse2,
-                                       &vp9_highbd_v_predictor_16x16_c, 16, 10),
-                            make_tuple(&vp9_highbd_v_predictor_32x32_sse2,
-                                       &vp9_highbd_v_predictor_32x32_c, 32, 10),
-                            make_tuple(&vp9_highbd_tm_predictor_4x4_sse,
-                                       &vp9_highbd_tm_predictor_4x4_c, 4, 10),
-                            make_tuple(&vp9_highbd_tm_predictor_8x8_sse2,
-                                       &vp9_highbd_tm_predictor_8x8_c, 8, 10)));
-#endif
+                            make_tuple(&vpx_highbd_v_predictor_4x4_sse,
+                                       &vpx_highbd_v_predictor_4x4_c, 4, 10),
+                            make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
+                                       &vpx_highbd_v_predictor_8x8_c, 8, 10),
+                            make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
+                                       &vpx_highbd_v_predictor_16x16_c, 16, 10),
+                            make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
+                                       &vpx_highbd_v_predictor_32x32_c, 32, 10),
+                            make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+                                       &vpx_highbd_tm_predictor_4x4_c, 4, 10),
+                            make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
+                                       &vpx_highbd_tm_predictor_8x8_c, 8, 10)));
+#endif  // !ARCH_X86_64

 #if ARCH_X86_64
 INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
                        ::testing::Values(
-                            make_tuple(&vp9_highbd_dc_predictor_32x32_sse2,
-                                       &vp9_highbd_dc_predictor_32x32_c, 32,
+                            make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
+                                       &vpx_highbd_dc_predictor_32x32_c, 32,
                                       12),
-                            make_tuple(&vp9_highbd_tm_predictor_16x16_sse2,
-                                       &vp9_highbd_tm_predictor_16x16_c, 16,
+                            make_tuple(&vpx_highbd_tm_predictor_16x16_sse2,
+                                       &vpx_highbd_tm_predictor_16x16_c, 16,
                                       12),
-                            make_tuple(&vp9_highbd_tm_predictor_32x32_sse2,
-                                       &vp9_highbd_tm_predictor_32x32_c, 32,
+                            make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
+                                       &vpx_highbd_tm_predictor_32x32_c, 32,
                                       12),
-                            make_tuple(&vp9_highbd_dc_predictor_4x4_sse,
-                                       &vp9_highbd_dc_predictor_4x4_c, 4, 12),
-                            make_tuple(&vp9_highbd_dc_predictor_8x8_sse2,
-                                       &vp9_highbd_dc_predictor_8x8_c, 8, 12),
-                            make_tuple(&vp9_highbd_dc_predictor_16x16_sse2,
-                                       &vp9_highbd_dc_predictor_16x16_c, 16,
+                            make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+                                       &vpx_highbd_dc_predictor_4x4_c, 4, 12),
+                            make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
+                                       &vpx_highbd_dc_predictor_8x8_c, 8, 12),
+                            make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
+                                       &vpx_highbd_dc_predictor_16x16_c, 16,
                                       12),
-                            make_tuple(&vp9_highbd_v_predictor_4x4_sse,
-                                       &vp9_highbd_v_predictor_4x4_c, 4, 12),
-                            make_tuple(&vp9_highbd_v_predictor_8x8_sse2,
-                                       &vp9_highbd_v_predictor_8x8_c, 8, 12),
-                            make_tuple(&vp9_highbd_v_predictor_16x16_sse2,
-                                       &vp9_highbd_v_predictor_16x16_c, 16,
+                            make_tuple(&vpx_highbd_v_predictor_4x4_sse,
+                                       &vpx_highbd_v_predictor_4x4_c, 4, 12),
+                            make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
+                                       &vpx_highbd_v_predictor_8x8_c, 8, 12),
+                            make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
+                                       &vpx_highbd_v_predictor_16x16_c, 16,
                                       12),
-                            make_tuple(&vp9_highbd_v_predictor_32x32_sse2,
-                                       &vp9_highbd_v_predictor_32x32_c, 32,
+                            make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
+                                       &vpx_highbd_v_predictor_32x32_c, 32,
                                       12),
-                            make_tuple(&vp9_highbd_tm_predictor_4x4_sse,
-                                       &vp9_highbd_tm_predictor_4x4_c, 4, 12),
-                            make_tuple(&vp9_highbd_tm_predictor_8x8_sse2,
-                                       &vp9_highbd_tm_predictor_8x8_c, 8, 12)));
+                            make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+                                       &vpx_highbd_tm_predictor_4x4_c, 4, 12),
+                            make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
+                                       &vpx_highbd_tm_predictor_8x8_c, 8, 12)));
 #else
 INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
                        ::testing::Values(
-                            make_tuple(&vp9_highbd_dc_predictor_4x4_sse,
-                                       &vp9_highbd_dc_predictor_4x4_c, 4, 12),
-                            make_tuple(&vp9_highbd_dc_predictor_8x8_sse2,
-                                       &vp9_highbd_dc_predictor_8x8_c, 8, 12),
-                            make_tuple(&vp9_highbd_dc_predictor_16x16_sse2,
-                                       &vp9_highbd_dc_predictor_16x16_c, 16,
+                            make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+                                       &vpx_highbd_dc_predictor_4x4_c, 4, 12),
+                            make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
+                                       &vpx_highbd_dc_predictor_8x8_c, 8, 12),
+                            make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
+                                       &vpx_highbd_dc_predictor_16x16_c, 16,
                                       12),
-                            make_tuple(&vp9_highbd_v_predictor_4x4_sse,
-                                       &vp9_highbd_v_predictor_4x4_c, 4, 12),
-                            make_tuple(&vp9_highbd_v_predictor_8x8_sse2,
-                                       &vp9_highbd_v_predictor_8x8_c, 8, 12),
-                            make_tuple(&vp9_highbd_v_predictor_16x16_sse2,
-                                       &vp9_highbd_v_predictor_16x16_c, 16, 12),
-                            make_tuple(&vp9_highbd_v_predictor_32x32_sse2,
-                                       &vp9_highbd_v_predictor_32x32_c, 32, 12),
-                            make_tuple(&vp9_highbd_tm_predictor_4x4_sse,
-                                       &vp9_highbd_tm_predictor_4x4_c, 4, 12),
-                            make_tuple(&vp9_highbd_tm_predictor_8x8_sse2,
-                                       &vp9_highbd_tm_predictor_8x8_c, 8, 12)));
-#endif
+                            make_tuple(&vpx_highbd_v_predictor_4x4_sse,
+                                       &vpx_highbd_v_predictor_4x4_c, 4, 12),
+                            make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
+                                       &vpx_highbd_v_predictor_8x8_c, 8, 12),
+                            make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
+                                       &vpx_highbd_v_predictor_16x16_c, 16, 12),
+                            make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
+                                       &vpx_highbd_v_predictor_32x32_c, 32, 12),
+                            make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+                                       &vpx_highbd_tm_predictor_4x4_c, 4, 12),
+                            make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
+                                       &vpx_highbd_tm_predictor_8x8_c, 8, 12)));
+#endif  // !ARCH_X86_64
+#endif  // CONFIG_USE_X86INC
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_SSE2
 }  // namespace
--- a/test/vp9_lossless_test.cc
+++ b/test/vp9_lossless_test.cc
@@ -7,8 +7,10 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
-#include "./vpx_config.h"
+
 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
 #include "test/i420_video_source.h"
@@ -125,4 +127,8 @@ VP9_INSTANTIATE_TEST_CASE(LosslessTest,
                          ::testing::Values(::libvpx_test::kRealTime,
                                            ::libvpx_test::kOnePassGood,
                                            ::libvpx_test::kTwoPassGood));
+
+VP10_INSTANTIATE_TEST_CASE(LosslessTest,
+                           ::testing::Values(::libvpx_test::kOnePassGood,
+                                             ::libvpx_test::kTwoPassGood));
 }  // namespace
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -14,12 +14,12 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
@@ -331,21 +331,21 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    SSE2, VP9QuantizeTest,
    ::testing::Values(
-        make_tuple(&vp9_highbd_quantize_b_sse2,
-                   &vp9_highbd_quantize_b_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_quantize_b_sse2,
-                   &vp9_highbd_quantize_b_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_quantize_b_sse2,
-                   &vp9_highbd_quantize_b_c, VPX_BITS_12)));
+        make_tuple(&vpx_highbd_quantize_b_sse2,
+                   &vpx_highbd_quantize_b_c, VPX_BITS_8),
+        make_tuple(&vpx_highbd_quantize_b_sse2,
+                   &vpx_highbd_quantize_b_c, VPX_BITS_10),
+        make_tuple(&vpx_highbd_quantize_b_sse2,
+                   &vpx_highbd_quantize_b_c, VPX_BITS_12)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, VP9Quantize32Test,
    ::testing::Values(
-        make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
-                   &vp9_highbd_quantize_b_32x32_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
-                   &vp9_highbd_quantize_b_32x32_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
-                   &vp9_highbd_quantize_b_32x32_c, VPX_BITS_12)));
+        make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
+                   &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8),
+        make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
+                   &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10),
+        make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
+                   &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12)));
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 }  // namespace
--- a/test/vp9_subtract_test.cc
+++ b/test/vp9_subtract_test.cc
@@ -9,11 +9,13 @@
 */

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vpx_mem/vpx_mem.h"

@@ -89,15 +91,19 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
 }

 INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest,
-                        ::testing::Values(vp9_subtract_block_c));
+                        ::testing::Values(vpx_subtract_block_c));

 #if HAVE_SSE2 && CONFIG_USE_X86INC
 INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest,
-                        ::testing::Values(vp9_subtract_block_sse2));
+                        ::testing::Values(vpx_subtract_block_sse2));
 #endif
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(NEON, VP9SubtractBlockTest,
-                        ::testing::Values(vp9_subtract_block_neon));
+                        ::testing::Values(vpx_subtract_block_neon));
+#endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, VP9SubtractBlockTest,
+                        ::testing::Values(vpx_subtract_block_msa));
 #endif

 }  // namespace vp9
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -18,33 +18,33 @@
 #if CONFIG_WEBM_IO
 #include "test/webm_video_source.h"
 #endif
-#include "vp9/common/vp9_thread.h"
+#include "vpx_util/vpx_thread.h"

 namespace {

 using std::string;

-class VP9WorkerThreadTest : public ::testing::TestWithParam<bool> {
+class VPxWorkerThreadTest : public ::testing::TestWithParam<bool> {
 protected:
-  virtual ~VP9WorkerThreadTest() {}
+  virtual ~VPxWorkerThreadTest() {}
  virtual void SetUp() {
-    vp9_get_worker_interface()->init(&worker_);
+    vpx_get_worker_interface()->init(&worker_);
  }

  virtual void TearDown() {
-    vp9_get_worker_interface()->end(&worker_);
+    vpx_get_worker_interface()->end(&worker_);
  }

-  void Run(VP9Worker* worker) {
+  void Run(VPxWorker* worker) {
    const bool synchronous = GetParam();
    if (synchronous) {
-      vp9_get_worker_interface()->execute(worker);
+      vpx_get_worker_interface()->execute(worker);
    } else {
-      vp9_get_worker_interface()->launch(worker);
+      vpx_get_worker_interface()->launch(worker);
    }
  }

-  VP9Worker worker_;
+  VPxWorker worker_;
 };

 int ThreadHook(void* data, void* return_value) {
@@ -53,12 +53,12 @@ int ThreadHook(void* data, void* return_value) {
  return *reinterpret_cast<int*>(return_value);
 }

-TEST_P(VP9WorkerThreadTest, HookSuccess) {
+TEST_P(VPxWorkerThreadTest, HookSuccess) {
  // should be a no-op.
-  EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
+  EXPECT_NE(vpx_get_worker_interface()->sync(&worker_), 0);

  for (int i = 0; i < 2; ++i) {
-    EXPECT_NE(vp9_get_worker_interface()->reset(&worker_), 0);
+    EXPECT_NE(vpx_get_worker_interface()->reset(&worker_), 0);

    int hook_data = 0;
    int return_value = 1;  // return successfully from the hook
@@ -67,17 +67,17 @@ TEST_P(VP9WorkerThreadTest, HookSuccess) {
    worker_.data2 = &return_value;

    Run(&worker_);
-    EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
+    EXPECT_NE(vpx_get_worker_interface()->sync(&worker_), 0);
    EXPECT_FALSE(worker_.had_error);
    EXPECT_EQ(5, hook_data);

    // should be a no-op.
-    EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
+    EXPECT_NE(vpx_get_worker_interface()->sync(&worker_), 0);
  }
 }

-TEST_P(VP9WorkerThreadTest, HookFailure) {
-  EXPECT_NE(vp9_get_worker_interface()->reset(&worker_), 0);
+TEST_P(VPxWorkerThreadTest, HookFailure) {
+  EXPECT_NE(vpx_get_worker_interface()->reset(&worker_), 0);

  int hook_data = 0;
  int return_value = 0;  // return failure from the hook
@@ -86,29 +86,29 @@ TEST_P(VP9WorkerThreadTest, HookFailure) {
  worker_.data2 = &return_value;

  Run(&worker_);
-  EXPECT_FALSE(vp9_get_worker_interface()->sync(&worker_));
+  EXPECT_FALSE(vpx_get_worker_interface()->sync(&worker_));
  EXPECT_EQ(1, worker_.had_error);

  // Ensure _reset() clears the error and _launch() can be called again.
  return_value = 1;
-  EXPECT_NE(vp9_get_worker_interface()->reset(&worker_), 0);
+  EXPECT_NE(vpx_get_worker_interface()->reset(&worker_), 0);
  EXPECT_FALSE(worker_.had_error);
-  vp9_get_worker_interface()->launch(&worker_);
-  EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
+  vpx_get_worker_interface()->launch(&worker_);
+  EXPECT_NE(vpx_get_worker_interface()->sync(&worker_), 0);
  EXPECT_FALSE(worker_.had_error);
 }

-TEST_P(VP9WorkerThreadTest, EndWithoutSync) {
+TEST_P(VPxWorkerThreadTest, EndWithoutSync) {
  // Create a large number of threads to increase the chances of detecting a
  // race. Doing more work in the hook is no guarantee as any race would occur
  // post hook execution in the main thread loop driver.
  static const int kNumWorkers = 64;
-  VP9Worker workers[kNumWorkers];
+  VPxWorker workers[kNumWorkers];
  int hook_data[kNumWorkers];
  int return_value[kNumWorkers];

  for (int n = 0; n < kNumWorkers; ++n) {
-    vp9_get_worker_interface()->init(&workers[n]);
+    vpx_get_worker_interface()->init(&workers[n]);
    return_value[n] = 1;  // return successfully from the hook
    workers[n].hook = ThreadHook;
    workers[n].data1 = &hook_data[n];
@@ -117,7 +117,7 @@ TEST_P(VP9WorkerThreadTest, EndWithoutSync) {

  for (int i = 0; i < 2; ++i) {
    for (int n = 0; n < kNumWorkers; ++n) {
-      EXPECT_NE(vp9_get_worker_interface()->reset(&workers[n]), 0);
+      EXPECT_NE(vpx_get_worker_interface()->reset(&workers[n]), 0);
      hook_data[n] = 0;
    }

@@ -126,16 +126,16 @@ TEST_P(VP9WorkerThreadTest, EndWithoutSync) {
    }

    for (int n = kNumWorkers - 1; n >= 0; --n) {
-      vp9_get_worker_interface()->end(&workers[n]);
+      vpx_get_worker_interface()->end(&workers[n]);
    }
  }
 }

-TEST(VP9WorkerThreadTest, TestInterfaceAPI) {
-  EXPECT_EQ(0, vp9_set_worker_interface(NULL));
-  EXPECT_TRUE(vp9_get_worker_interface() != NULL);
+TEST(VPxWorkerThreadTest, TestInterfaceAPI) {
+  EXPECT_EQ(0, vpx_set_worker_interface(NULL));
+  EXPECT_TRUE(vpx_get_worker_interface() != NULL);
  for (int i = 0; i < 6; ++i) {
-    VP9WorkerInterface winterface = *vp9_get_worker_interface();
+    VPxWorkerInterface winterface = *vpx_get_worker_interface();
    switch (i) {
      default:
      case 0: winterface.init = NULL; break;
@@ -145,7 +145,7 @@ TEST(VP9WorkerThreadTest, TestInterfaceAPI) {
      case 4: winterface.execute = NULL; break;
      case 5: winterface.end = NULL; break;
    }
-    EXPECT_EQ(0, vp9_set_worker_interface(&winterface));
+    EXPECT_EQ(0, vpx_set_worker_interface(&winterface));
  }
 }

@@ -202,21 +202,21 @@ void DecodeFiles(const FileList files[]) {
 // hang.
 namespace impl {

-void Init(VP9Worker *const worker) { memset(worker, 0, sizeof(*worker)); }
-int Reset(VP9Worker *const /*worker*/) { return 1; }
-int Sync(VP9Worker *const worker) { return !worker->had_error; }
+void Init(VPxWorker *const worker) { memset(worker, 0, sizeof(*worker)); }
+int Reset(VPxWorker *const /*worker*/) { return 1; }
+int Sync(VPxWorker *const worker) { return !worker->had_error; }

-void Execute(VP9Worker *const worker) {
+void Execute(VPxWorker *const worker) {
  worker->had_error |= !worker->hook(worker->data1, worker->data2);
 }

-void Launch(VP9Worker *const worker) { Execute(worker); }
-void End(VP9Worker *const /*worker*/) {}
+void Launch(VPxWorker *const worker) { Execute(worker); }
+void End(VPxWorker *const /*worker*/) {}

 }  // namespace impl

-TEST(VP9WorkerThreadTest, TestSerialInterface) {
-  static const VP9WorkerInterface serial_interface = {
+TEST(VPxWorkerThreadTest, TestSerialInterface) {
+  static const VPxWorkerInterface serial_interface = {
    impl::Init, impl::Reset, impl::Sync, impl::Launch, impl::Execute, impl::End
  };
  // TODO(jzern): Avoid using a file that will use the row-based thread
@@ -225,13 +225,13 @@ TEST(VP9WorkerThreadTest, TestSerialInterface) {
  // progress in the row above before proceeding.
  static const char expected_md5[] = "b35a1b707b28e82be025d960aba039bc";
  static const char filename[] = "vp90-2-03-size-226x226.webm";
-  VP9WorkerInterface default_interface = *vp9_get_worker_interface();
+  VPxWorkerInterface default_interface = *vpx_get_worker_interface();

-  EXPECT_NE(vp9_set_worker_interface(&serial_interface), 0);
+  EXPECT_NE(vpx_set_worker_interface(&serial_interface), 0);
  EXPECT_EQ(expected_md5, DecodeFile(filename, 2));

  // Reset the interface.
-  EXPECT_NE(vp9_set_worker_interface(&default_interface), 0);
+  EXPECT_NE(vpx_set_worker_interface(&default_interface), 0);
  EXPECT_EQ(expected_md5, DecodeFile(filename, 2));
 }

@@ -309,6 +309,6 @@ TEST(VP9DecodeMultiThreadedTest, Decode3) {
 }
 #endif  // CONFIG_WEBM_IO

-INSTANTIATE_TEST_CASE_P(Synchronous, VP9WorkerThreadTest, ::testing::Bool());
+INSTANTIATE_TEST_CASE_P(Synchronous, VPxWorkerThreadTest, ::testing::Bool());

 }  // namespace
--- a/test/vpx_scale_test.cc
+++ b/test/vpx_scale_test.cc
@@ -10,11 +10,10 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-
 #include "./vpx_config.h"
 #include "./vpx_scale_rtcd.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_scale/yv12config.h"

--- a/test/y4m_test.cc
+++ b/test/y4m_test.cc
@@ -9,12 +9,14 @@
 */

 #include <string>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./y4menc.h"
 #include "test/md5_helper.h"
 #include "test/util.h"
 #include "test/y4m_video_source.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
-#include "./y4menc.h"

 namespace {

--- a/third_party/libwebm/Android.mk
+++ b/third_party/libwebm/Android.mk
@@ -1,11 +1,10 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
+LOCAL_PATH:= $(call my-dir)

-LOCAL_CPP_EXTENSION := .cpp
-LOCAL_SRC_FILES := mkvmuxer.cpp \
-                   mkvmuxerutil.cpp \
-                   mkvparser.cpp \
-                   mkvreader.cpp \
-                   mkvwriter.cpp
-LOCAL_MODULE := libwebm
+include $(CLEAR_VARS)
+LOCAL_MODULE:= libwebm
+LOCAL_SRC_FILES:= mkvparser.cpp \
+                  mkvreader.cpp \
+                  mkvmuxer.cpp \
+                  mkvmuxerutil.cpp \
+                  mkvwriter.cpp
 include $(BUILD_STATIC_LIBRARY)
--- a/third_party/libwebm/PATENTS.TXT
+++ b/third_party/libwebm/PATENTS.TXT
@@ -17,7 +17,7 @@ or agree to the institution of patent litigation or any other patent
 enforcement activity against any entity (including a cross-claim or
 counterclaim in a lawsuit) alleging that any of these implementations of WebM
 or any code incorporated within any of these implementations of WebM
-constitutes direct or contributory patent infringement, or inducement of
+constitute direct or contributory patent infringement, or inducement of
 patent infringement, then any patent rights granted to you under this License
 for these implementations of WebM shall terminate as of the date such
 litigation is filed.
--- a/third_party/libwebm/README.libvpx
+++ b/third_party/libwebm/README.libvpx
@@ -1,5 +1,5 @@
 URL: https://chromium.googlesource.com/webm/libwebm
-Version: 249629d46c6e9391f25a90cff6d19075f47474cb
+Version: 2dec09426ab62b794464cc9971bd135b4d313e65
 License: BSD
 License File: LICENSE.txt

--- a/third_party/libwebm/mkvmuxer.cpp
+++ b/third_party/libwebm/mkvmuxer.cpp
--- a/third_party/libwebm/mkvmuxer.hpp
+++ b/third_party/libwebm/mkvmuxer.hpp
@@ -23,6 +23,8 @@ namespace mkvmuxer {
 class MkvWriter;
 class Segment;

+const uint64 kMaxTrackNumber = 126;
+
 ///////////////////////////////////////////////////////////////
 // Interface used by the mkvmuxer to write out the Mkv data.
 class IMkvWriter {
@@ -57,6 +59,10 @@ class IMkvWriter {

 // Writes out the EBML header for a WebM file. This function must be called
 // before any other libwebm writing functions are called.
+bool WriteEbmlHeader(IMkvWriter* writer, uint64 doc_type_version);
+
+// Deprecated. Writes out EBML header with doc_type_version as
+// kDefaultDocTypeVersion. Exists for backward compatibility.
 bool WriteEbmlHeader(IMkvWriter* writer);

 // Copies in Chunk from source to destination between the given byte positions
@@ -70,12 +76,23 @@ class Frame {
  Frame();
  ~Frame();

+  // Sets this frame's contents based on |frame|. Returns true on success. On
+  // failure, this frame's existing contents may be lost.
+  bool CopyFrom(const Frame& frame);
+
  // Copies |frame| data into |frame_|. Returns true on success.
  bool Init(const uint8* frame, uint64 length);

  // Copies |additional| data into |additional_|. Returns true on success.
  bool AddAdditionalData(const uint8* additional, uint64 length, uint64 add_id);

+  // Returns true if the frame has valid parameters.
+  bool IsValid() const;
+
+  // Returns true if the frame can be written as a SimpleBlock based on current
+  // parameters.
+  bool CanBeSimpleBlock() const;
+
  uint64 add_id() const { return add_id_; }
  const uint8* additional() const { return additional_; }
  uint64 additional_length() const { return additional_length_; }
@@ -89,10 +106,15 @@ class Frame {
  uint64 track_number() const { return track_number_; }
  void set_timestamp(uint64 timestamp) { timestamp_ = timestamp; }
  uint64 timestamp() const { return timestamp_; }
-  void set_discard_padding(uint64 discard_padding) {
+  void set_discard_padding(int64 discard_padding) {
    discard_padding_ = discard_padding;
  }
-  uint64 discard_padding() const { return discard_padding_; }
+  int64 discard_padding() const { return discard_padding_; }
+  void set_reference_block_timestamp(int64 reference_block_timestamp);
+  int64 reference_block_timestamp() const { return reference_block_timestamp_; }
+  bool reference_block_timestamp_set() const {
+    return reference_block_timestamp_set_;
+  }

 private:
  // Id of the Additional data.
@@ -124,6 +146,14 @@ class Frame {

  // Discard padding for the frame.
  int64 discard_padding_;
+
+  // Reference block timestamp.
+  int64 reference_block_timestamp_;
+
+  // Flag indicating if |reference_block_timestamp_| has been set.
+  bool reference_block_timestamp_set_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Frame);
 };

 ///////////////////////////////////////////////////////////////
@@ -422,6 +452,16 @@ class VideoTrack : public Track {
  uint64 display_height() const { return display_height_; }
  void set_display_width(uint64 width) { display_width_ = width; }
  uint64 display_width() const { return display_width_; }
+
+  void set_crop_left(uint64 crop_left) { crop_left_ = crop_left; }
+  uint64 crop_left() const { return crop_left_; }
+  void set_crop_right(uint64 crop_right) { crop_right_ = crop_right; }
+  uint64 crop_right() const { return crop_right_; }
+  void set_crop_top(uint64 crop_top) { crop_top_ = crop_top; }
+  uint64 crop_top() const { return crop_top_; }
+  void set_crop_bottom(uint64 crop_bottom) { crop_bottom_ = crop_bottom; }
+  uint64 crop_bottom() const { return crop_bottom_; }
+
  void set_frame_rate(double frame_rate) { frame_rate_ = frame_rate; }
  double frame_rate() const { return frame_rate_; }
  void set_height(uint64 height) { height_ = height; }
@@ -438,6 +478,10 @@ class VideoTrack : public Track {
  // Video track element names.
  uint64 display_height_;
  uint64 display_width_;
+  uint64 crop_left_;
+  uint64 crop_right_;
+  uint64 crop_top_;
+  uint64 crop_bottom_;
  double frame_rate_;
  uint64 height_;
  uint64 stereo_mode_;
@@ -489,6 +533,7 @@ class Tracks {
  static const char kVorbisCodecId[];
  static const char kVp8CodecId[];
  static const char kVp9CodecId[];
+  static const char kVp10CodecId[];

  Tracks();
  ~Tracks();
@@ -692,6 +737,112 @@ class Chapters {
  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Chapters);
 };

+///////////////////////////////////////////////////////////////
+// Tag element
+//
+class Tag {
+ public:
+  bool add_simple_tag(const char* tag_name, const char* tag_string);
+
+ private:
+  // Tags calls Clear and the destructor of Tag
+  friend class Tags;
+
+  // For storage of simple tags
+  class SimpleTag {
+   public:
+    // Establish representation invariant for new SimpleTag object.
+    void Init();
+
+    // Reclaim resources, in anticipation of destruction.
+    void Clear();
+
+    // Copies the title to the |tag_name_| member.  Returns false on
+    // error.
+    bool set_tag_name(const char* tag_name);
+
+    // Copies the language to the |tag_string_| member.  Returns false
+    // on error.
+    bool set_tag_string(const char* tag_string);
+
+    // If |writer| is non-NULL, serialize the SimpleTag sub-element of
+    // the Atom into the stream.  Returns the SimpleTag element size on
+    // success, 0 if error.
+    uint64 Write(IMkvWriter* writer) const;
+
+   private:
+    char* tag_name_;
+    char* tag_string_;
+  };
+
+  Tag();
+  ~Tag();
+
+  // Copies this Tag object to a different one.  This is used when
+  // expanding a plain array of Tag objects (see Tags).
+  void ShallowCopy(Tag* dst) const;
+
+  // Reclaim resources used by this Tag object, pending its
+  // destruction.
+  void Clear();
+
+  // If there is no storage remaining on the |simple_tags_| array for a
+  // new display object, creates a new, longer array and copies the
+  // existing SimpleTag objects to the new array.  Returns false if the
+  // array cannot be expanded.
+  bool ExpandSimpleTagsArray();
+
+  // If |writer| is non-NULL, serialize the Tag sub-element into the
+  // stream.  Returns the total size of the element on success, 0 if
+  // error.
+  uint64 Write(IMkvWriter* writer) const;
+
+  // The Atom element can contain multiple SimpleTag sub-elements
+  SimpleTag* simple_tags_;
+
+  // The physical length (total size) of the |simple_tags_| array.
+  int simple_tags_size_;
+
+  // The logical length (number of active elements) on the |simple_tags_|
+  // array.
+  int simple_tags_count_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tag);
+};
+
+///////////////////////////////////////////////////////////////
+// Tags element
+//
+class Tags {
+ public:
+  Tags();
+  ~Tags();
+
+  Tag* AddTag();
+
+  // Returns the number of tags that have been added.
+  int Count() const;
+
+  // Output the Tags element to the writer. Returns true on success.
+  bool Write(IMkvWriter* writer) const;
+
+ private:
+  // Expands the tags_ array if there is not enough space to contain
+  // another tag object.  Returns true on success.
+  bool ExpandTagsArray();
+
+  // Total length of the tags_ array.
+  int tags_size_;
+
+  // Number of active tags on the tags_ array.
+  int tags_count_;
+
+  // Array for storage of tag objects.
+  Tag* tags_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tags);
+};
+
 ///////////////////////////////////////////////////////////////
 // Cluster element
 //
@@ -699,32 +850,36 @@ class Chapters {
 //  |Init| must be called before any other method in this class.
 class Cluster {
 public:
-  Cluster(uint64 timecode, int64 cues_pos);
-  ~Cluster();
-
  // |timecode| is the absolute timecode of the cluster. |cues_pos| is the
  // position for the cluster within the segment that should be written in
-  // the cues element.
+  // the cues element. |timecode_scale| is the timecode scale of the segment.
+  Cluster(uint64 timecode, int64 cues_pos, uint64 timecode_scale);
+  ~Cluster();
+
  bool Init(IMkvWriter* ptr_writer);

+  // Adds a frame to be output in the file. The frame is written out through
+  // |writer_| if successful. Returns true on success.
+  bool AddFrame(const Frame* frame);
+
  // Adds a frame to be output in the file. The frame is written out through
  // |writer_| if successful. Returns true on success.
  // Inputs:
-  //   frame: Pointer to the data
+  //   data: Pointer to the data
  //   length: Length of the data
  //   track_number: Track to add the data to. Value returned by Add track
  //                 functions.  The range of allowed values is [1, 126].
  //   timecode:     Absolute (not relative to cluster) timestamp of the
  //                 frame, expressed in timecode units.
  //   is_key:       Flag telling whether or not this frame is a key frame.
-  bool AddFrame(const uint8* frame, uint64 length, uint64 track_number,
+  bool AddFrame(const uint8* data, uint64 length, uint64 track_number,
                uint64 timecode,  // timecode units (absolute)
                bool is_key);

  // Adds a frame to be output in the file. The frame is written out through
  // |writer_| if successful. Returns true on success.
  // Inputs:
-  //   frame: Pointer to the data
+  //   data: Pointer to the data
  //   length: Length of the data
  //   additional: Pointer to the additional data
  //   additional_length: Length of the additional data
@@ -734,7 +889,7 @@ class Cluster {
  //   abs_timecode: Absolute (not relative to cluster) timestamp of the
  //                 frame, expressed in timecode units.
  //   is_key:       Flag telling whether or not this frame is a key frame.
-  bool AddFrameWithAdditional(const uint8* frame, uint64 length,
+  bool AddFrameWithAdditional(const uint8* data, uint64 length,
                              const uint8* additional, uint64 additional_length,
                              uint64 add_id, uint64 track_number,
                              uint64 abs_timecode, bool is_key);
@@ -742,7 +897,7 @@ class Cluster {
  // Adds a frame to be output in the file. The frame is written out through
  // |writer_| if successful. Returns true on success.
  // Inputs:
-  //   frame: Pointer to the data.
+  //   data: Pointer to the data.
  //   length: Length of the data.
  //   discard_padding: DiscardPadding element value.
  //   track_number: Track to add the data to. Value returned by Add track
@@ -750,14 +905,14 @@ class Cluster {
  //   abs_timecode: Absolute (not relative to cluster) timestamp of the
  //                 frame, expressed in timecode units.
  //   is_key:       Flag telling whether or not this frame is a key frame.
-  bool AddFrameWithDiscardPadding(const uint8* frame, uint64 length,
+  bool AddFrameWithDiscardPadding(const uint8* data, uint64 length,
                                  int64 discard_padding, uint64 track_number,
                                  uint64 abs_timecode, bool is_key);

  // Writes a frame of metadata to the output medium; returns true on
  // success.
  // Inputs:
-  //   frame: Pointer to the data
+  //   data: Pointer to the data
  //   length: Length of the data
  //   track_number: Track to add the data to. Value returned by Add track
  //                 functions.  The range of allowed values is [1, 126].
@@ -768,7 +923,7 @@ class Cluster {
  // The metadata frame is written as a block group, with a duration
  // sub-element but no reference time sub-elements (indicating that
  // it is considered a keyframe, per Matroska semantics).
-  bool AddMetadata(const uint8* frame, uint64 length, uint64 track_number,
+  bool AddMetadata(const uint8* data, uint64 length, uint64 track_number,
                   uint64 timecode, uint64 duration);

  // Increments the size of the cluster's data in bytes.
@@ -781,75 +936,29 @@ class Cluster {
  // Returns the size in bytes for the entire Cluster element.
  uint64 Size() const;

+  // Given |abs_timecode|, calculates timecode relative to most recent timecode.
+  // Returns -1 on failure, or a relative timecode.
+  int64 GetRelativeTimecode(int64 abs_timecode) const;
+
  int64 size_position() const { return size_position_; }
  int32 blocks_added() const { return blocks_added_; }
  uint64 payload_size() const { return payload_size_; }
  int64 position_for_cues() const { return position_for_cues_; }
  uint64 timecode() const { return timecode_; }
+  uint64 timecode_scale() const { return timecode_scale_; }

 private:
-  //  Signature that matches either of WriteSimpleBlock or WriteMetadataBlock
-  //  in the muxer utilities package.
-  typedef uint64 (*WriteBlock)(IMkvWriter* writer, const uint8* data,
-                               uint64 length, uint64 track_number,
-                               int64 timecode, uint64 generic_arg);
-
-  //  Signature that matches WriteBlockWithAdditional
-  //  in the muxer utilities package.
-  typedef uint64 (*WriteBlockAdditional)(IMkvWriter* writer, const uint8* data,
-                                         uint64 length, const uint8* additional,
-                                         uint64 add_id,
-                                         uint64 additional_length,
-                                         uint64 track_number, int64 timecode,
-                                         uint64 is_key);
-
-  //  Signature that matches WriteBlockWithDiscardPadding
-  //  in the muxer utilities package.
-  typedef uint64 (*WriteBlockDiscardPadding)(IMkvWriter* writer,
-                                             const uint8* data, uint64 length,
-                                             int64 discard_padding,
-                                             uint64 track_number,
-                                             int64 timecode, uint64 is_key);
-
  // Utility method that confirms that blocks can still be added, and that the
-  // cluster header has been written. Used by |DoWriteBlock*|. Returns true
+  // cluster header has been written. Used by |DoWriteFrame*|. Returns true
  // when successful.
-  template <typename Type>
-  bool PreWriteBlock(Type* write_function);
+  bool PreWriteBlock();

-  // Utility method used by the |DoWriteBlock*| methods that handles the book
+  // Utility method used by the |DoWriteFrame*| methods that handles the book
  // keeping required after each block is written.
  void PostWriteBlock(uint64 element_size);

-  // To simplify things, we require that there be fewer than 127
-  // tracks -- this allows us to serialize the track number value for
-  // a stream using a single byte, per the Matroska encoding.
-  bool IsValidTrackNumber(uint64 track_number) const;
-
-  // Given |abs_timecode|, calculates timecode relative to most recent timecode.
-  // Returns -1 on failure, or a relative timecode.
-  int64 GetRelativeTimecode(int64 abs_timecode) const;
-
-  //  Used to implement AddFrame and AddMetadata.
-  bool DoWriteBlock(const uint8* frame, uint64 length, uint64 track_number,
-                    uint64 absolute_timecode, uint64 generic_arg,
-                    WriteBlock write_block);
-
-  // Used to implement AddFrameWithAdditional
-  bool DoWriteBlockWithAdditional(const uint8* frame, uint64 length,
-                                  const uint8* additional,
-                                  uint64 additional_length, uint64 add_id,
-                                  uint64 track_number, uint64 absolute_timecode,
-                                  uint64 generic_arg,
-                                  WriteBlockAdditional write_block);
-
-  // Used to implement AddFrameWithDiscardPadding
-  bool DoWriteBlockWithDiscardPadding(const uint8* frame, uint64 length,
-                                      int64 discard_padding,
-                                      uint64 track_number,
-                                      uint64 absolute_timecode,
-                                      uint64 generic_arg,
-                                      WriteBlockDiscardPadding write_block);
+  // Does some verification and calls WriteFrame.
+  bool DoWriteFrame(const Frame* const frame);

  // Outputs the Cluster header to |writer_|. Returns true on success.
  bool WriteClusterHeader();
@@ -875,6 +984,9 @@ class Cluster {
  // The absolute timecode of the cluster.
  const uint64 timecode_;

+  // The timecode scale of the Segment containing the cluster.
+  const uint64 timecode_scale_;
+
  // Pointer to the writer object. Not owned by this class.
  IMkvWriter* writer_;

@@ -996,6 +1108,7 @@ class Segment {
    kBeforeClusters = 0x1  // Position Cues before Clusters
  };

+  const static uint32 kDefaultDocTypeVersion = 2;
  const static uint64 kDefaultMaxClusterDuration = 30000000000ULL;

  Segment();
@@ -1023,6 +1136,11 @@ class Segment {
  // populate its fields via the Chapter member functions.
  Chapter* AddChapter();

+  // Adds an empty tag to the tags of this segment.  Returns
+  // non-NULL on success.  After adding the tag, the caller should
+  // populate its fields via the Tag member functions.
+  Tag* AddTag();
+
  // Adds a cue point to the Cues element. |timestamp| is the time in
  // nanoseconds of the cue's time. |track| is the Track of the Cue. This
  // function must be called after AddFrame to calculate the correct
@@ -1031,19 +1149,19 @@ class Segment {

  // Adds a frame to be output in the file. Returns true on success.
  // Inputs:
-  //   frame: Pointer to the data
+  //   data: Pointer to the data
  //   length: Length of the data
  //   track_number: Track to add the data to. Value returned by Add track
  //                 functions.
  //   timestamp:    Timestamp of the frame in nanoseconds from 0.
  //   is_key:       Flag telling whether or not this frame is a key frame.
-  bool AddFrame(const uint8* frame, uint64 length, uint64 track_number,
+  bool AddFrame(const uint8* data, uint64 length, uint64 track_number,
                uint64 timestamp_ns, bool is_key);

  // Writes a frame of metadata to the output medium; returns true on
  // success.
  // Inputs:
-  //   frame: Pointer to the data
+  //   data: Pointer to the data
  //   length: Length of the data
  //   track_number: Track to add the data to. Value returned by Add track
  //                 functions.
@@ -1054,13 +1172,13 @@ class Segment {
  // The metadata frame is written as a block group, with a duration
  // sub-element but no reference time sub-elements (indicating that
  // it is considered a keyframe, per Matroska semantics).
-  bool AddMetadata(const uint8* frame, uint64 length, uint64 track_number,
+  bool AddMetadata(const uint8* data, uint64 length, uint64 track_number,
                   uint64 timestamp_ns, uint64 duration_ns);

  // Writes a frame with additional data to the output medium; returns true on
  // success.
  // Inputs:
-  //   frame: Pointer to the data.
+  //   data: Pointer to the data.
  //   length: Length of the data.
  //   additional: Pointer to additional data.
  //   additional_length: Length of additional data.
@@ -1070,7 +1188,7 @@ class Segment {
  //   timestamp:    Absolute timestamp of the frame, expressed in nanosecond
  //                 units.
  //   is_key:       Flag telling whether or not this frame is a key frame.
-  bool AddFrameWithAdditional(const uint8* frame, uint64 length,
+  bool AddFrameWithAdditional(const uint8* data, uint64 length,
                              const uint8* additional, uint64 additional_length,
                              uint64 add_id, uint64 track_number,
                              uint64 timestamp, bool is_key);
@@ -1078,7 +1196,7 @@ class Segment {
  // Writes a frame with DiscardPadding to the output medium; returns true on
  // success.
  // Inputs:
-  //   frame: Pointer to the data.
+  //   data: Pointer to the data.
  //   length: Length of the data.
  //   discard_padding: DiscardPadding element value.
  //   track_number: Track to add the data to. Value returned by Add track
@@ -1086,7 +1204,7 @@ class Segment {
  //   timestamp:    Absolute timestamp of the frame, expressed in nanosecond
  //                 units.
  //   is_key:       Flag telling whether or not this frame is a key frame.
-  bool AddFrameWithDiscardPadding(const uint8* frame, uint64 length,
+  bool AddFrameWithDiscardPadding(const uint8* data, uint64 length,
                                  int64 discard_padding, uint64 track_number,
                                  uint64 timestamp, bool is_key);

@@ -1177,6 +1295,9 @@ class Segment {
  // Cues elements.
  bool CheckHeaderInfo();

+  // Sets |doc_type_version_| based on the current element requirements.
+  void UpdateDocTypeVersion();
+
  // Sets |name| according to how many chunks have been written. |ext| is the
  // file extension. |name| must be deleted by the calling app. Returns true
  // on success.
@@ -1233,7 +1354,7 @@ class Segment {
  // diff - indicates the difference in size of the Cues element that needs to
  //        accounted for.
  // index - index in the list of Cues which is currently being adjusted.
-  // cue_size - size of the Cues element.
+  // cue_size - sum of size of all the CuePoint elements.
  void MoveCuesBeforeClustersHelper(uint64 diff, int index, uint64* cue_size);

  // Seeds the random number generator used to make UIDs.
@@ -1245,6 +1366,7 @@ class Segment {
  SegmentInfo segment_info_;
  Tracks tracks_;
  Chapters chapters_;
+  Tags tags_;

  // Number of chunks written.
  int chunk_count_;
@@ -1316,6 +1438,9 @@ class Segment {
  // Last timestamp in nanoseconds added to a cluster.
  uint64 last_timestamp_;

+  // Last timestamp in nanoseconds by track number added to a cluster.
+  uint64 last_track_timestamp_[kMaxTrackNumber];
+
  // Maximum time in nanoseconds for a cluster duration. This variable is a
  // guideline and some clusters may have a longer duration. Default is 30
  // seconds.
@@ -1337,12 +1462,23 @@ class Segment {
  // Flag whether or not the muxer should output a Cues element.
  bool output_cues_;

+  // The size of the EBML header, used to validate the header if
+  // WriteEbmlHeader() is called more than once.
+  int32 ebml_header_size_;
+
  // The file position of the segment's payload.
  int64 payload_pos_;

  // The file position of the element's size.
  int64 size_position_;

+  // Current DocTypeVersion (|doc_type_version_|) and that written in
+  // WriteSegmentHeader().
+  // WriteEbmlHeader() will be called from Finalize() if |doc_type_version_|
+  // differs from |doc_type_version_written_|.
+  uint32 doc_type_version_;
+  uint32 doc_type_version_written_;
+
  // Pointer to the writer objects. Not owned by this class.
  IMkvWriter* writer_cluster_;
  IMkvWriter* writer_cues_;
--- a/third_party/libwebm/mkvmuxerutil.cpp
+++ b/third_party/libwebm/mkvmuxerutil.cpp
@@ -15,18 +15,19 @@
 #include <cassert>
 #include <cmath>
 #include <cstdio>
-#ifdef _MSC_VER
-#define _CRT_RAND_S
-#endif
 #include <cstdlib>
 #include <cstring>
 #include <ctime>
-
 #include <new>

 #include "mkvwriter.hpp"
 #include "webmids.hpp"

+#ifdef _MSC_VER
+// Disable MSVC warnings that suggest making code non-portable.
+#pragma warning(disable : 4996)
+#endif
+
 namespace mkvmuxer {

 namespace {
@@ -34,6 +35,144 @@ namespace {
 // Date elements are always 8 octets in size.
 const int kDateElementSize = 8;

+uint64 WriteBlock(IMkvWriter* writer, const Frame* const frame, int64 timecode,
+                  uint64 timecode_scale) {
+  uint64 block_additional_elem_size = 0;
+  uint64 block_addid_elem_size = 0;
+  uint64 block_more_payload_size = 0;
+  uint64 block_more_elem_size = 0;
+  uint64 block_additions_payload_size = 0;
+  uint64 block_additions_elem_size = 0;
+  if (frame->additional()) {
+    block_additional_elem_size = EbmlElementSize(
+        kMkvBlockAdditional, frame->additional(), frame->additional_length());
+    block_addid_elem_size = EbmlElementSize(kMkvBlockAddID, frame->add_id());
+
+    block_more_payload_size =
+        block_addid_elem_size + block_additional_elem_size;
+    block_more_elem_size =
+        EbmlMasterElementSize(kMkvBlockMore, block_more_payload_size) +
+        block_more_payload_size;
+    block_additions_payload_size = block_more_elem_size;
+    block_additions_elem_size =
+        EbmlMasterElementSize(kMkvBlockAdditions,
+                              block_additions_payload_size) +
+        block_additions_payload_size;
+  }
+
+  uint64 discard_padding_elem_size = 0;
+  if (frame->discard_padding() != 0) {
+    discard_padding_elem_size =
+        EbmlElementSize(kMkvDiscardPadding, frame->discard_padding());
+  }
+
+  const uint64 reference_block_timestamp =
+      frame->reference_block_timestamp() / timecode_scale;
+  uint64 reference_block_elem_size = 0;
+  if (!frame->is_key()) {
+    reference_block_elem_size =
+        EbmlElementSize(kMkvReferenceBlock, reference_block_timestamp);
+  }
+
+  const uint64 duration = frame->duration() / timecode_scale;
+  uint64 block_duration_elem_size = 0;
+  if (duration > 0)
+    block_duration_elem_size = EbmlElementSize(kMkvBlockDuration, duration);
+
+  const uint64 block_payload_size = 4 + frame->length();
+  const uint64 block_elem_size =
+      EbmlMasterElementSize(kMkvBlock, block_payload_size) + block_payload_size;
+
+  const uint64 block_group_payload_size =
+      block_elem_size + block_additions_elem_size + block_duration_elem_size +
+      discard_padding_elem_size + reference_block_elem_size;
+
+  if (!WriteEbmlMasterElement(writer, kMkvBlockGroup,
+                              block_group_payload_size)) {
+    return 0;
+  }
+
+  if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size))
+    return 0;
+
+  if (WriteUInt(writer, frame->track_number()))
+    return 0;
+
+  if (SerializeInt(writer, timecode, 2))
+    return 0;
+
+  // For a Block, flags is always 0.
+  if (SerializeInt(writer, 0, 1))
+    return 0;
+
+  if (writer->Write(frame->frame(), static_cast<uint32>(frame->length())))
+    return 0;
+
+  if (frame->additional()) {
+    if (!WriteEbmlMasterElement(writer, kMkvBlockAdditions,
+                                block_additions_payload_size)) {
+      return 0;
+    }
+
+    if (!WriteEbmlMasterElement(writer, kMkvBlockMore, block_more_payload_size))
+      return 0;
+
+    if (!WriteEbmlElement(writer, kMkvBlockAddID, frame->add_id()))
+      return 0;
+
+    if (!WriteEbmlElement(writer, kMkvBlockAdditional, frame->additional(),
+                          frame->additional_length())) {
+      return 0;
+    }
+  }
+
+  if (frame->discard_padding() != 0 &&
+      !WriteEbmlElement(writer, kMkvDiscardPadding, frame->discard_padding())) {
+    return false;
+  }
+
+  if (!frame->is_key() &&
+      !WriteEbmlElement(writer, kMkvReferenceBlock,
+                        reference_block_timestamp)) {
+    return false;
+  }
+
+  if (duration > 0 && !WriteEbmlElement(writer, kMkvBlockDuration, duration)) {
+    return false;
+  }
+  return EbmlMasterElementSize(kMkvBlockGroup, block_group_payload_size) +
+         block_group_payload_size;
+}
+
+uint64 WriteSimpleBlock(IMkvWriter* writer, const Frame* const frame,
+                        int64 timecode) {
+  if (WriteID(writer, kMkvSimpleBlock))
+    return 0;
+
+  const int32 size = static_cast<int32>(frame->length()) + 4;
+  if (WriteUInt(writer, size))
+    return 0;
+
+  if (WriteUInt(writer, static_cast<uint64>(frame->track_number())))
+    return 0;
+
+  if (SerializeInt(writer, timecode, 2))
+    return 0;
+
+  uint64 flags = 0;
+  if (frame->is_key())
+    flags |= 0x80;
+
+  if (SerializeInt(writer, flags, 1))
+    return 0;
+
+  if (writer->Write(frame->frame(), static_cast<uint32>(frame->length())))
+    return 0;
+
+  return GetUIntSize(kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 +
+         frame->length();
+}
+
 }  // namespace

 int32 GetCodedUIntSize(uint64 value) {
@@ -72,6 +211,13 @@ int32 GetUIntSize(uint64 value) {
  return 8;
 }

+int32 GetIntSize(int64 value) {
+  // Doubling the requested value ensures positive values with their high bit
+  // set are written with 0-padding to avoid flipping the signedness.
+  const uint64 v = (value < 0) ? value ^ -1LL : value;
+  return GetUIntSize(2 * v);
+}
+
 uint64 EbmlMasterElementSize(uint64 type, uint64 value) {
  // Size of EBML ID
  int32 ebml_size = GetUIntSize(type);
@@ -83,7 +229,16 @@ uint64 EbmlMasterElementSize(uint64 type, uint64 value) {
 }

 uint64 EbmlElementSize(uint64 type, int64 value) {
-  return EbmlElementSize(type, static_cast<uint64>(value));
+  // Size of EBML ID
+  int32 ebml_size = GetUIntSize(type);
+
+  // Datasize
+  ebml_size += GetIntSize(value);
+
+  // Size of Datasize
+  ebml_size++;
+
+  return ebml_size;
 }

 uint64 EbmlElementSize(uint64 type, uint64 value) {
@@ -144,7 +299,7 @@ uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size) {
  return ebml_size;
 }

-uint64 EbmlDateElementSize(uint64 type, int64 value) {
+uint64 EbmlDateElementSize(uint64 type) {
  // Size of EBML ID
  uint64 ebml_size = GetUIntSize(type);

@@ -289,6 +444,23 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value) {
  return true;
 }

+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value) {
+  if (!writer)
+    return false;
+
+  if (WriteID(writer, type))
+    return 0;
+
+  const uint64 size = GetIntSize(value);
+  if (WriteUInt(writer, size))
+    return false;
+
+  if (SerializeInt(writer, value, static_cast<int32>(size)))
+    return false;
+
+  return true;
+}
+
 bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value) {
  if (!writer)
    return false;
@@ -355,289 +527,25 @@ bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value) {
  return true;
 }

-uint64 WriteSimpleBlock(IMkvWriter* writer, const uint8* data, uint64 length,
-                        uint64 track_number, int64 timecode, uint64 is_key) {
-  if (!writer)
-    return false;
+uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame,
+                  Cluster* cluster) {
+  if (!writer || !frame || !frame->IsValid() || !cluster ||
+      !cluster->timecode_scale())
+    return 0;

-  if (!data || length < 1)
-    return false;
-
-  //  Here we only permit track number values to be no greater than
-  //  126, which the largest value we can store having a Matroska
-  //  integer representation of only 1 byte.
-
-  if (track_number < 1 || track_number > 126)
-    return false;
-
-  //  Technically the timestamp for a block can be less than the
-  //  timestamp for the cluster itself (remember that block timestamp
+  //  Technically the timecode for a block can be less than the
+  //  timecode for the cluster itself (remember that block timecode
  //  is a signed, 16-bit integer).  However, as a simplification we
-  //  only permit non-negative cluster-relative timestamps for blocks.
-
-  if (timecode < 0 || timecode > kMaxBlockTimecode)
-    return false;
-
-  if (WriteID(writer, kMkvSimpleBlock))
+  //  only permit non-negative cluster-relative timecodes for blocks.
+  const int64 relative_timecode = cluster->GetRelativeTimecode(
+      frame->timestamp() / cluster->timecode_scale());
+  if (relative_timecode < 0 || relative_timecode > kMaxBlockTimecode)
    return 0;

-  const int32 size = static_cast<int32>(length) + 4;
-  if (WriteUInt(writer, size))
-    return 0;
-
-  if (WriteUInt(writer, static_cast<uint64>(track_number)))
-    return 0;
-
-  if (SerializeInt(writer, timecode, 2))
-    return 0;
-
-  uint64 flags = 0;
-  if (is_key)
-    flags |= 0x80;
-
-  if (SerializeInt(writer, flags, 1))
-    return 0;
-
-  if (writer->Write(data, static_cast<uint32>(length)))
-    return 0;
-
-  const uint64 element_size =
-      GetUIntSize(kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 + length;
-
-  return element_size;
-}
-
-// We must write the metadata (key)frame as a BlockGroup element,
-// because we need to specify a duration for the frame.  The
-// BlockGroup element comprises the frame itself and its duration,
-// and is laid out as follows:
-//
-//   BlockGroup tag
-//   BlockGroup size
-//     Block tag
-//     Block size
-//     (the frame is the block payload)
-//     Duration tag
-//     Duration size
-//     (duration payload)
-//
-uint64 WriteMetadataBlock(IMkvWriter* writer, const uint8* data, uint64 length,
-                          uint64 track_number, int64 timecode,
-                          uint64 duration) {
-  // We don't backtrack when writing to the stream, so we must
-  // pre-compute the BlockGroup size, by summing the sizes of each
-  // sub-element (the block and the duration).
-
-  // We use a single byte for the track number of the block, which
-  // means the block header is exactly 4 bytes.
-
-  // TODO(matthewjheaney): use EbmlMasterElementSize and WriteEbmlMasterElement
-
-  const uint64 block_payload_size = 4 + length;
-  const int32 block_size = GetCodedUIntSize(block_payload_size);
-  const uint64 block_elem_size = 1 + block_size + block_payload_size;
-
-  const int32 duration_payload_size = GetUIntSize(duration);
-  const int32 duration_size = GetCodedUIntSize(duration_payload_size);
-  const uint64 duration_elem_size = 1 + duration_size + duration_payload_size;
-
-  const uint64 blockg_payload_size = block_elem_size + duration_elem_size;
-  const int32 blockg_size = GetCodedUIntSize(blockg_payload_size);
-  const uint64 blockg_elem_size = 1 + blockg_size + blockg_payload_size;
-
-  if (WriteID(writer, kMkvBlockGroup))  // 1-byte ID size
-    return 0;
-
-  if (WriteUInt(writer, blockg_payload_size))
-    return 0;
-
-  //  Write Block element
-
-  if (WriteID(writer, kMkvBlock))  // 1-byte ID size
-    return 0;
-
-  if (WriteUInt(writer, block_payload_size))
-    return 0;
-
-  // Byte 1 of 4
-
-  if (WriteUInt(writer, track_number))
-    return 0;
-
-  // Bytes 2 & 3 of 4
-
-  if (SerializeInt(writer, timecode, 2))
-    return 0;
-
-  // Byte 4 of 4
-
-  const uint64 flags = 0;
-
-  if (SerializeInt(writer, flags, 1))
-    return 0;
-
-  // Now write the actual frame (of metadata)
-
-  if (writer->Write(data, static_cast<uint32>(length)))
-    return 0;
-
-  // Write Duration element
-
-  if (WriteID(writer, kMkvBlockDuration))  // 1-byte ID size
-    return 0;
-
-  if (WriteUInt(writer, duration_payload_size))
-    return 0;
-
-  if (SerializeInt(writer, duration, duration_payload_size))
-    return 0;
-
-  // Note that we don't write a reference time as part of the block
-  // group; no reference time(s) indicates that this block is a
-  // keyframe.  (Unlike the case for a SimpleBlock element, the header
-  // bits of the Block sub-element of a BlockGroup element do not
-  // indicate keyframe status.  The keyframe status is inferred from
-  // the absence of reference time sub-elements.)
-
-  return blockg_elem_size;
-}
-
-// Writes a WebM BlockGroup with BlockAdditional data. The structure is as
-// follows:
-// Indentation shows sub-levels
-// BlockGroup
-//  Block
-//    Data
-//  BlockAdditions
-//    BlockMore
-//      BlockAddID
-//        1 (Denotes Alpha)
-//      BlockAdditional
-//        Data
-uint64 WriteBlockWithAdditional(IMkvWriter* writer, const uint8* data,
-                                uint64 length, const uint8* additional,
-                                uint64 additional_length, uint64 add_id,
-                                uint64 track_number, int64 timecode,
-                                uint64 is_key) {
-  if (!data || !additional || length < 1 || additional_length < 1)
-    return 0;
-
-  const uint64 block_payload_size = 4 + length;
-  const uint64 block_elem_size =
-      EbmlMasterElementSize(kMkvBlock, block_payload_size) + block_payload_size;
-  const uint64 block_additional_elem_size =
-      EbmlElementSize(kMkvBlockAdditional, additional, additional_length);
-  const uint64 block_addid_elem_size = EbmlElementSize(kMkvBlockAddID, add_id);
-
-  const uint64 block_more_payload_size =
-      block_addid_elem_size + block_additional_elem_size;
-  const uint64 block_more_elem_size =
-      EbmlMasterElementSize(kMkvBlockMore, block_more_payload_size) +
-      block_more_payload_size;
-  const uint64 block_additions_payload_size = block_more_elem_size;
-  const uint64 block_additions_elem_size =
-      EbmlMasterElementSize(kMkvBlockAdditions, block_additions_payload_size) +
-      block_additions_payload_size;
-  const uint64 block_group_payload_size =
-      block_elem_size + block_additions_elem_size;
-  const uint64 block_group_elem_size =
-      EbmlMasterElementSize(kMkvBlockGroup, block_group_payload_size) +
-      block_group_payload_size;
-
-  if (!WriteEbmlMasterElement(writer, kMkvBlockGroup, block_group_payload_size))
-    return 0;
-
-  if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size))
-    return 0;
-
-  if (WriteUInt(writer, track_number))
-    return 0;
-
-  if (SerializeInt(writer, timecode, 2))
-    return 0;
-
-  uint64 flags = 0;
-  if (is_key)
-    flags |= 0x80;
-  if (SerializeInt(writer, flags, 1))
-    return 0;
-
-  if (writer->Write(data, static_cast<uint32>(length)))
-    return 0;
-
-  if (!WriteEbmlMasterElement(writer, kMkvBlockAdditions,
-                              block_additions_payload_size))
-    return 0;
-
-  if (!WriteEbmlMasterElement(writer, kMkvBlockMore, block_more_payload_size))
-    return 0;
-
-  if (!WriteEbmlElement(writer, kMkvBlockAddID, add_id))
-    return 0;
-
-  if (!WriteEbmlElement(writer, kMkvBlockAdditional, additional,
-                        additional_length))
-    return 0;
-
-  return block_group_elem_size;
-}
-
-// Writes a WebM BlockGroup with DiscardPadding. The structure is as follows:
-// Indentation shows sub-levels
-// BlockGroup
-//  Block
-//    Data
-//  DiscardPadding
-uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer, const uint8* data,
-                                    uint64 length, int64 discard_padding,
-                                    uint64 track_number, int64 timecode,
-                                    uint64 is_key) {
-  if (!data || length < 1 || discard_padding <= 0)
-    return 0;
-
-  const uint64 block_payload_size = 4 + length;
-  const uint64 block_elem_size =
-      EbmlMasterElementSize(kMkvBlock, block_payload_size) + block_payload_size;
-  const uint64 discard_padding_elem_size =
-      EbmlElementSize(kMkvDiscardPadding, discard_padding);
-  const uint64 block_group_payload_size =
-      block_elem_size + discard_padding_elem_size;
-  const uint64 block_group_elem_size =
-      EbmlMasterElementSize(kMkvBlockGroup, block_group_payload_size) +
-      block_group_payload_size;
-
-  if (!WriteEbmlMasterElement(writer, kMkvBlockGroup, block_group_payload_size))
-    return 0;
-
-  if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size))
-    return 0;
-
-  if (WriteUInt(writer, track_number))
-    return 0;
-
-  if (SerializeInt(writer, timecode, 2))
-    return 0;
-
-  uint64 flags = 0;
-  if (is_key)
-    flags |= 0x80;
-  if (SerializeInt(writer, flags, 1))
-    return 0;
-
-  if (writer->Write(data, static_cast<uint32>(length)))
-    return 0;
-
-  if (WriteID(writer, kMkvDiscardPadding))
-    return 0;
-
-  const uint64 size = GetUIntSize(discard_padding);
-  if (WriteUInt(writer, size))
-    return false;
-
-  if (SerializeInt(writer, discard_padding, static_cast<int32>(size)))
-    return false;
-
-  return block_group_elem_size;
+  return frame->CanBeSimpleBlock() ?
+             WriteSimpleBlock(writer, frame, relative_timecode) :
+             WriteBlock(writer, frame, relative_timecode,
+                        cluster->timecode_scale());
 }

 uint64 WriteVoidElement(IMkvWriter* writer, uint64 size) {
@@ -698,10 +606,7 @@ mkvmuxer::uint64 mkvmuxer::MakeUID(unsigned int* seed) {
 // TODO(fgalligan): Move random number generation to platform specific code.
 #ifdef _MSC_VER
    (void)seed;
-    unsigned int random_value;
-    const errno_t e = rand_s(&random_value);
-    (void)e;
-    const int32 nn = random_value;
+    const int32 nn = rand();
 #elif __ANDROID__
    int32 temp_num = 1;
    int fd = open("/dev/urandom", O_RDONLY);
--- a/third_party/libwebm/mkvmuxerutil.hpp
+++ b/third_party/libwebm/mkvmuxerutil.hpp
@@ -9,6 +9,7 @@
 #ifndef MKVMUXERUTIL_HPP
 #define MKVMUXERUTIL_HPP

+#include "mkvmuxer.hpp"
 #include "mkvmuxertypes.hpp"

 namespace mkvmuxer {
@@ -23,6 +24,7 @@ int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size);

 // Returns the size in bytes of the element.
 int32 GetUIntSize(uint64 value);
+int32 GetIntSize(int64 value);
 int32 GetCodedUIntSize(uint64 value);
 uint64 EbmlMasterElementSize(uint64 type, uint64 value);
 uint64 EbmlElementSize(uint64 type, int64 value);
@@ -30,7 +32,7 @@ uint64 EbmlElementSize(uint64 type, uint64 value);
 uint64 EbmlElementSize(uint64 type, float value);
 uint64 EbmlElementSize(uint64 type, const char* value);
 uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size);
-uint64 EbmlDateElementSize(uint64 type, int64 value);
+uint64 EbmlDateElementSize(uint64 type);

 // Creates an EBML coded number from |value| and writes it out. The size of
 // the coded number is determined by the value of |value|. |value| must not
@@ -51,73 +53,17 @@ int32 WriteID(IMkvWriter* writer, uint64 type);

 // Output an Mkv non-master element. Returns true if the element was written.
 bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value);
 bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value);
 bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value);
 bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
                      uint64 size);
 bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value);

-// Output an Mkv Simple Block.
-// Inputs:
-//   data:         Pointer to the data.
-//   length:       Length of the data.
-//   track_number: Track to add the data to. Value returned by Add track
-//                  functions.  Only values in the range [1, 126] are
-//                  permitted.
-//   timecode:     Relative timecode of the Block.  Only values in the
-//                  range [0, 2^15) are permitted.
-//   is_key:       Non-zero value specifies that frame is a key frame.
-uint64 WriteSimpleBlock(IMkvWriter* writer, const uint8* data, uint64 length,
-                        uint64 track_number, int64 timecode, uint64 is_key);
-
-// Output a metadata keyframe, using a Block Group element.
-// Inputs:
-//   data:         Pointer to the (meta)data.
-//   length:       Length of the (meta)data.
-//   track_number: Track to add the data to. Value returned by Add track
-//                  functions.  Only values in the range [1, 126] are
-//                  permitted.
-//   timecode      Timecode of frame, relative to cluster timecode.  Only
-//                  values in the range [0, 2^15) are permitted.
-//   duration_timecode  Duration of frame, using timecode units.
-uint64 WriteMetadataBlock(IMkvWriter* writer, const uint8* data, uint64 length,
-                          uint64 track_number, int64 timecode,
-                          uint64 duration_timecode);
-
-// Output an Mkv Block with BlockAdditional data.
-// Inputs:
-//   data:         Pointer to the data.
-//   length:       Length of the data.
-//   additional:   Pointer to the additional data
-//   additional_length: Length of the additional data.
-//   add_id: Value of BlockAddID element.
-//   track_number: Track to add the data to. Value returned by Add track
-//                  functions.  Only values in the range [1, 126] are
-//                  permitted.
-//   timecode:     Relative timecode of the Block.  Only values in the
-//                  range [0, 2^15) are permitted.
-//   is_key:       Non-zero value specifies that frame is a key frame.
-uint64 WriteBlockWithAdditional(IMkvWriter* writer, const uint8* data,
-                                uint64 length, const uint8* additional,
-                                uint64 additional_length, uint64 add_id,
-                                uint64 track_number, int64 timecode,
-                                uint64 is_key);
-
-// Output an Mkv Block with a DiscardPadding element.
-// Inputs:
-//   data:            Pointer to the data.
-//   length:          Length of the data.
-//   discard_padding: DiscardPadding value.
-//   track_number:    Track to add the data to. Value returned by Add track
-//                    functions. Only values in the range [1, 126] are
-//                    permitted.
-//   timecode:        Relative timecode of the Block.  Only values in the
-//                    range [0, 2^15) are permitted.
-//   is_key:          Non-zero value specifies that frame is a key frame.
-uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer, const uint8* data,
-                                    uint64 length, int64 discard_padding,
-                                    uint64 track_number, int64 timecode,
-                                    uint64 is_key);
+// Output a Mkv Frame. It decides the correct element to write (Block vs
+// SimpleBlock) based on the parameters of the Frame.
+uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame,
+                  Cluster* cluster);

 // Output a void element. |size| must be the entire size in bytes that will be
 // void. The function will calculate the size of the void header and subtract
--- a/third_party/libwebm/mkvparser.cpp
+++ b/third_party/libwebm/mkvparser.cpp
--- a/third_party/libwebm/mkvparser.hpp
+++ b/third_party/libwebm/mkvparser.hpp
@@ -32,7 +32,8 @@ long long ReadUInt(IMkvReader*, long long, long&);
 long long UnserializeUInt(IMkvReader*, long long pos, long long size);

 long UnserializeFloat(IMkvReader*, long long pos, long long size, double&);
-long UnserializeInt(IMkvReader*, long long pos, long len, long long& result);
+long UnserializeInt(IMkvReader*, long long pos, long long size,
+                    long long& result);

 long UnserializeString(IMkvReader*, long long pos, long long size, char*& str);

@@ -398,6 +399,10 @@ class VideoTrack : public Track {

  long long GetWidth() const;
  long long GetHeight() const;
+  long long GetDisplayWidth() const;
+  long long GetDisplayHeight() const;
+  long long GetDisplayUnit() const;
+  long long GetStereoMode() const;
  double GetFrameRate() const;

  bool VetEntry(const BlockEntry*) const;
@@ -406,6 +411,11 @@ class VideoTrack : public Track {
 private:
  long long m_width;
  long long m_height;
+  long long m_display_width;
+  long long m_display_height;
+  long long m_display_unit;
+  long long m_stereo_mode;
+
  double m_rate;
 };

@@ -582,6 +592,85 @@ class Chapters {
  int m_editions_count;
 };

+class Tags {
+  Tags(const Tags&);
+  Tags& operator=(const Tags&);
+
+ public:
+  Segment* const m_pSegment;
+  const long long m_start;
+  const long long m_size;
+  const long long m_element_start;
+  const long long m_element_size;
+
+  Tags(Segment*, long long payload_start, long long payload_size,
+       long long element_start, long long element_size);
+
+  ~Tags();
+
+  long Parse();
+
+  class Tag;
+  class SimpleTag;
+
+  class SimpleTag {
+    friend class Tag;
+    SimpleTag();
+    SimpleTag(const SimpleTag&);
+    ~SimpleTag();
+    SimpleTag& operator=(const SimpleTag&);
+
+   public:
+    const char* GetTagName() const;
+    const char* GetTagString() const;
+
+   private:
+    void Init();
+    void ShallowCopy(SimpleTag&) const;
+    void Clear();
+    long Parse(IMkvReader*, long long pos, long long size);
+
+    char* m_tag_name;
+    char* m_tag_string;
+  };
+
+  class Tag {
+    friend class Tags;
+    Tag();
+    Tag(const Tag&);
+    ~Tag();
+    Tag& operator=(const Tag&);
+
+   public:
+    int GetSimpleTagCount() const;
+    const SimpleTag* GetSimpleTag(int index) const;
+
+   private:
+    void Init();
+    void ShallowCopy(Tag&) const;
+    void Clear();
+    long Parse(IMkvReader*, long long pos, long long size);
+
+    long ParseSimpleTag(IMkvReader*, long long pos, long long size);
+    bool ExpandSimpleTagsArray();
+
+    SimpleTag* m_simple_tags;
+    int m_simple_tags_size;
+    int m_simple_tags_count;
+  };
+
+  int GetTagCount() const;
+  const Tag* GetTag(int index) const;
+
+ private:
+  long ParseTag(long long pos, long long size);
+  bool ExpandTagsArray();
+
+  Tag* m_tags;
+  int m_tags_size;
+  int m_tags_count;
+};
+
 class SegmentInfo {
  SegmentInfo(const SegmentInfo&);
  SegmentInfo& operator=(const SegmentInfo&);
@@ -684,7 +773,7 @@ class CuePoint {
  long long m_element_start;
  long long m_element_size;

-  void Load(IMkvReader*);
+  bool Load(IMkvReader*);

  long long GetTimeCode() const;  // absolute but unscaled
  long long GetTime(const Segment*) const;  // absolute and scaled (ns units)
@@ -697,7 +786,7 @@ class CuePoint {
    // reference = clusters containing req'd referenced blocks
    //  reftime = timecode of the referenced block

-    void Parse(IMkvReader*, long long, long long);
+    bool Parse(IMkvReader*, long long, long long);
  };

  const TrackPosition* Find(const Track*) const;
@@ -730,14 +819,6 @@ class Cues {
      long long time_ns, const Track*, const CuePoint*&,
      const CuePoint::TrackPosition*&) const;

-#if 0
-    bool FindNext(  //upper_bound of time_ns
-        long long time_ns,
-        const Track*,
-        const CuePoint*&,
-        const CuePoint::TrackPosition*&) const;
-#endif
-
  const CuePoint* GetFirst() const;
  const CuePoint* GetLast() const;
  const CuePoint* GetNext(const CuePoint*) const;
@@ -751,7 +832,7 @@ class Cues {
  bool DoneParsing() const;

 private:
-  void Init() const;
+  bool Init() const;
  void PreloadCuePoint(long&, long long) const;

  mutable CuePoint** m_cue_points;
@@ -877,18 +958,12 @@ class Segment {
  long ParseNext(const Cluster* pCurr, const Cluster*& pNext, long long& pos,
                 long& size);

-#if 0
-    //This pair parses one cluster, but only changes the state of the
-    //segment object when the cluster is actually added to the index.
-    long ParseCluster(long long& cluster_pos, long long& new_pos) const;
-    bool AddCluster(long long cluster_pos, long long new_pos);
-#endif
-
  const SeekHead* GetSeekHead() const;
  const Tracks* GetTracks() const;
  const SegmentInfo* GetInfo() const;
  const Cues* GetCues() const;
  const Chapters* GetChapters() const;
+  const Tags* GetTags() const;

  long long GetDuration() const;

@@ -914,6 +989,7 @@ class Segment {
  Tracks* m_pTracks;
  Cues* m_pCues;
  Chapters* m_pChapters;
+  Tags* m_pTags;
  Cluster** m_clusters;
  long m_clusterCount;  // number of entries for which m_index >= 0
  long m_clusterPreloadCount;  // number of entries for which m_index < 0
--- a/third_party/libwebm/webmids.hpp
+++ b/third_party/libwebm/webmids.hpp
@@ -133,7 +133,13 @@ enum MkvId {
  kMkvChapterDisplay = 0x80,
  kMkvChapString = 0x85,
  kMkvChapLanguage = 0x437C,
-  kMkvChapCountry = 0x437E
+  kMkvChapCountry = 0x437E,
+  // Tags
+  kMkvTags = 0x1254C367,
+  kMkvTag = 0x7373,
+  kMkvSimpleTag = 0x67C8,
+  kMkvTagName = 0x45A3,
+  kMkvTagString = 0x4487
 };

 }  // end namespace mkvmuxer
--- a/third_party/libyuv/README.libvpx
+++ b/third_party/libyuv/README.libvpx
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1305
+Version: 1456
 License: BSD
 License File: LICENSE

@@ -13,4 +13,3 @@ which down-samples the original input video (f.g. 1280x720) a number of times
 in order to encode multiple resolution bit streams.

 Local Modifications:
-cherry pick r1311 'disable nv12 avx2 for vs9/10 that dont support avx2 instructions.'
--- a/third_party/libyuv/include/libyuv/convert.h
+++ b/third_party/libyuv/include/libyuv/convert.h
@@ -71,6 +71,8 @@ int I400ToI420(const uint8* src_y, int src_stride_y,
               uint8* dst_v, int dst_stride_v,
               int width, int height);

+#define J400ToJ420 I400ToI420
+
 // Convert NV12 to I420.
 LIBYUV_API
 int NV12ToI420(const uint8* src_y, int src_stride_y,
--- a/third_party/libyuv/include/libyuv/convert_argb.h
+++ b/third_party/libyuv/include/libyuv/convert_argb.h
@@ -68,20 +68,20 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height);

-// Convert I400 (grey) to ARGB.
+// Convert I400 (grey) to ARGB.  Reverse of ARGBToI400.
 LIBYUV_API
 int I400ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height);

-// Alias.
-#define YToARGB I400ToARGB_Reference
-
-// Convert I400 to ARGB. Reverse of ARGBToI400.
+// Convert J400 (jpeg grey) to ARGB.
 LIBYUV_API
-int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
-                         uint8* dst_argb, int dst_stride_argb,
-                         int width, int height);
+int J400ToARGB(const uint8* src_y, int src_stride_y,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Alias.
+#define YToARGB I400ToARGB

 // Convert NV12 to ARGB.
 LIBYUV_API
--- a/third_party/libyuv/include/libyuv/convert_from.h
+++ b/third_party/libyuv/include/libyuv/convert_from.h
@@ -137,6 +137,17 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
                 uint8* dst_frame, int dst_stride_frame,
                 int width, int height);

+// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes).
+// Values in dither matrix from 0 to 7 recommended.
+// The order of the dither matrix is first byte is upper left.
+
+LIBYUV_API
+int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
+                       const uint8* src_u, int src_stride_u,
+                       const uint8* src_v, int src_stride_v,
+                       uint8* dst_frame, int dst_stride_frame,
+                       const uint8* dither4x4, int width, int height);
+
 LIBYUV_API
 int I420ToARGB1555(const uint8* src_y, int src_stride_y,
                   const uint8* src_u, int src_stride_u,
--- a/third_party/libyuv/include/libyuv/convert_from_argb.h
+++ b/third_party/libyuv/include/libyuv/convert_from_argb.h
@@ -61,12 +61,15 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
                 uint8* dst_rgb565, int dst_stride_rgb565,
                 int width, int height);

-// Convert ARGB To RGB565 with 8x8 dither matrix (64 bytes).
-// Values in dither matrix from 0 to 255.  128 is best for no dither.
+// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
+// Values in dither matrix from 0 to 7 recommended.
+// The order of the dither matrix is first byte is upper left.
+// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
+// const uint8(*dither)[4][4];
 LIBYUV_API
 int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
                       uint8* dst_rgb565, int dst_stride_rgb565,
-                       const uint8* dither8x8, int width, int height);
+                       const uint8* dither4x4, int width, int height);

 // Convert ARGB To ARGB1555.
 LIBYUV_API
@@ -140,6 +143,12 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
               uint8* dst_y, int dst_stride_y,
               int width, int height);

+// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
+LIBYUV_API
+int ARGBToG(const uint8* src_argb, int src_stride_argb,
+            uint8* dst_g, int dst_stride_g,
+            int width, int height);
+
 // Convert ARGB To NV12.
 LIBYUV_API
 int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
--- a/third_party/libyuv/include/libyuv/planar_functions.h
+++ b/third_party/libyuv/include/libyuv/planar_functions.h
@@ -45,6 +45,7 @@ int I400ToI400(const uint8* src_y, int src_stride_y,
               uint8* dst_y, int dst_stride_y,
               int width, int height);

+#define J400ToJ400 I400ToI400

 // Copy I422 to I422.
 #define I422ToI422 I422Copy
@@ -84,6 +85,18 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
               uint8* dst_v, int dst_stride_v,
               int width, int height);

+LIBYUV_API
+int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_uv, int dst_stride_uv,
+               int width, int height);
+
+LIBYUV_API
+int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_uv, int dst_stride_uv,
+               int width, int height);
+
 // Convert I420 to I400. (calls CopyPlane ignoring u/v).
 LIBYUV_API
 int I420ToI400(const uint8* src_y, int src_stride_y,
@@ -93,6 +106,7 @@ int I420ToI400(const uint8* src_y, int src_stride_y,
               int width, int height);

 // Alias
+#define J420ToJ400 I420ToI400
 #define I420ToI420Mirror I420Mirror

 // I420 mirror.
@@ -387,24 +401,24 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
                    uint8* dst_argb, int dst_stride_argb,
                    int width, int height, int interpolation);

-#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
-    defined(TARGET_IPHONE_SIMULATOR)
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+    (defined(__i386__) && !defined(__SSE2__))
 #define LIBYUV_DISABLE_X86
 #endif
+// The following are available on all x86 platforms:
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+#define HAS_ARGBAFFINEROW_SSE2
+#endif

-// Row functions for copying a pixels from a source with a slope to a row
+// Row function for copying pixels from a source with a slope to a row
 // of destination. Useful for scaling, rotation, mirror, texture mapping.
 LIBYUV_API
 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
                     uint8* dst_argb, const float* uv_dudv, int width);
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
 LIBYUV_API
 void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
                        uint8* dst_argb, const float* uv_dudv, int width);
-#define HAS_ARGBAFFINEROW_SSE2
-#endif  // LIBYUV_DISABLE_X86

 // Shuffle ARGB channel order.  e.g. BGRA to ARGB.
 // shuffler is 16 bytes and must be aligned.
--- a/third_party/libyuv/include/libyuv/rotate_row.h
+++ b/third_party/libyuv/include/libyuv/rotate_row.h
@@ -0,0 +1,138 @@
+/*
+ *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_  // NOLINT
+#define INCLUDE_LIBYUV_ROTATE_ROW_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+    (defined(__i386__) && !defined(__SSE2__))
+#define LIBYUV_DISABLE_X86
+#endif
+
+// Visual C 2012 required for AVX2.
+#if defined(_M_IX86) && !defined(__clang__) && \
+    defined(_MSC_VER) && _MSC_VER >= 1700
+#define VISUALC_HAS_AVX2 1
+#endif  // VisualStudio >= 2012
+
+// TODO(fbarchard): switch to standard form of inline; fails on clangcl.
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+#if defined(__APPLE__) && defined(__i386__)
+#define DECLARE_FUNCTION(name)                                                 \
+    ".text                                     \n"                             \
+    ".private_extern _" #name "                \n"                             \
+    ".align 4,0x90                             \n"                             \
+"_" #name ":                                   \n"
+#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
+#define DECLARE_FUNCTION(name)                                                 \
+    ".text                                     \n"                             \
+    ".align 4,0x90                             \n"                             \
+"_" #name ":                                   \n"
+#else
+#define DECLARE_FUNCTION(name)                                                 \
+    ".text                                     \n"                             \
+    ".align 4,0x90                             \n"                             \
+#name ":                                       \n"
+#endif
+#endif
+
+// The following are available for Visual C:
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
+    defined(_MSC_VER) && !defined(__clang__)
+#define HAS_TRANSPOSEWX8_SSSE3
+#define HAS_TRANSPOSEUVWX8_SSE2
+#endif
+
+// The following are available for GCC but not NaCL:
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
+#define HAS_TRANSPOSEWX8_SSSE3
+#endif
+
+// The following are available for 32 bit GCC:
+#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__)  && !defined(__clang__)
+#define HAS_TRANSPOSEUVWX8_SSE2
+#endif
+
+// The following are available for 64 bit GCC but not NaCL:
+#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
+    defined(__x86_64__)
+#define HAS_TRANSPOSEWX8_FAST_SSSE3
+#define HAS_TRANSPOSEUVWX8_SSE2
+#endif
+
+#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
+#define HAS_TRANSPOSEWX8_NEON
+#define HAS_TRANSPOSEUVWX8_NEON
+#endif
+
+#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
+    defined(__mips__) && \
+    defined(__mips_dsp) && (__mips_dsp_rev >= 2)
+#define HAS_TRANSPOSEWX8_MIPS_DSPR2
+#define HAS_TRANSPOSEUVWx8_MIPS_DSPR2
+#endif  // defined(__mips__)
+
+void TransposeWxH_C(const uint8* src, int src_stride,
+                    uint8* dst, int dst_stride, int width, int height);
+
+void TransposeWx8_C(const uint8* src, int src_stride,
+                    uint8* dst, int dst_stride, int width);
+void TransposeWx8_NEON(const uint8* src, int src_stride,
+                       uint8* dst, int dst_stride, int width);
+void TransposeWx8_SSSE3(const uint8* src, int src_stride,
+                        uint8* dst, int dst_stride, int width);
+void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
+                             uint8* dst, int dst_stride, int width);
+void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
+                             uint8* dst, int dst_stride, int width);
+
+void TransposeWx8_Any_NEON(const uint8* src, int src_stride,
+                           uint8* dst, int dst_stride, int width);
+void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride,
+                            uint8* dst, int dst_stride, int width);
+void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride,
+                                 uint8* dst, int dst_stride, int width);
+void TransposeWx8_Any_MIPS_DSPR2(const uint8* src, int src_stride,
+                                 uint8* dst, int dst_stride, int width);
+
+void TransposeUVWxH_C(const uint8* src, int src_stride,
+                      uint8* dst_a, int dst_stride_a,
+                      uint8* dst_b, int dst_stride_b,
+                      int width, int height);
+
+void TransposeUVWx8_C(const uint8* src, int src_stride,
+                      uint8* dst_a, int dst_stride_a,
+                      uint8* dst_b, int dst_stride_b, int width);
+void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
+                         uint8* dst_a, int dst_stride_a,
+                         uint8* dst_b, int dst_stride_b, int width);
+void TransposeUVWx8_NEON(const uint8* src, int src_stride,
+                         uint8* dst_a, int dst_stride_a,
+                         uint8* dst_b, int dst_stride_b, int width);
+void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
+                               uint8* dst_a, int dst_stride_a,
+                               uint8* dst_b, int dst_stride_b, int width);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_ROTATE_ROW_H_  NOLINT
--- a/third_party/libyuv/include/libyuv/row.h
+++ b/third_party/libyuv/include/libyuv/row.h
@@ -37,10 +37,8 @@ extern "C" {
  free(var##_mem);  \
  var = 0

-#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
-    defined(TARGET_IPHONE_SIMULATOR) || \
-    (defined(__i386__) && !defined(__SSE2__)) || \
-    (defined(_MSC_VER) && defined(__clang__))
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+    (defined(__i386__) && !defined(__SSE2__))
 #define LIBYUV_DISABLE_X86
 #endif
 // True if compiling for SSSE3 as a requirement.
@@ -48,6 +46,9 @@ extern "C" {
 #define LIBYUV_SSSE3_ONLY
 #endif

+#if defined(__native_client__)
+#define LIBYUV_DISABLE_NEON
+#endif
 // clang >= 3.5.0 required for Arm64.
 #if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON)
 #if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5))
@@ -63,11 +64,11 @@ extern "C" {
 #define HAS_ABGRTOYROW_SSSE3
 #define HAS_ARGB1555TOARGBROW_SSE2
 #define HAS_ARGB4444TOARGBROW_SSE2
+#define HAS_ARGBSETROW_X86
 #define HAS_ARGBSHUFFLEROW_SSE2
 #define HAS_ARGBSHUFFLEROW_SSSE3
 #define HAS_ARGBTOARGB1555ROW_SSE2
 #define HAS_ARGBTOARGB4444ROW_SSE2
-#define HAS_ARGBTOBAYERGGROW_SSE2
 #define HAS_ARGBTORAWROW_SSSE3
 #define HAS_ARGBTORGB24ROW_SSSE3
 #define HAS_ARGBTORGB565ROW_SSE2
@@ -95,7 +96,8 @@ extern "C" {
 #define HAS_I422TOUYVYROW_SSE2
 #define HAS_I422TOYUY2ROW_SSE2
 #define HAS_I444TOARGBROW_SSSE3
-// #define HAS_J422TOARGBROW_SSSE3
+#define HAS_J400TOARGBROW_SSE2
+#define HAS_J422TOARGBROW_SSSE3
 #define HAS_MERGEUVROW_SSE2
 #define HAS_MIRRORROW_SSE2
 #define HAS_MIRRORROW_SSSE3
@@ -112,15 +114,13 @@ extern "C" {
 #define HAS_RGB565TOARGBROW_SSE2
 #define HAS_RGBATOUVROW_SSSE3
 #define HAS_RGBATOYROW_SSSE3
-#define HAS_SETROW_X86
 #define HAS_SETROW_ERMS
-#define HAS_ARGBSETROW_X86
+#define HAS_SETROW_X86
 #define HAS_SPLITUVROW_SSE2
 #define HAS_UYVYTOARGBROW_SSSE3
 #define HAS_UYVYTOUV422ROW_SSE2
 #define HAS_UYVYTOUVROW_SSE2
 #define HAS_UYVYTOYROW_SSE2
-#define HAS_YTOARGBROW_SSE2
 #define HAS_YUY2TOARGBROW_SSSE3
 #define HAS_YUY2TOUV422ROW_SSE2
 #define HAS_YUY2TOUVROW_SSE2
@@ -157,8 +157,9 @@ extern "C" {
 #define HAS_SOBELYROW_SSE2
 #endif

-// The following are available on x64 Visual C:
-#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64)
+// The following are available on x64 Visual C and clangcl.
+#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
+    (!defined(__clang__) || defined(__SSSE3__))
 #define HAS_I422TOARGBROW_SSSE3
 #endif

@@ -177,27 +178,31 @@ extern "C" {
 #endif  // __clang__

 // Visual C 2012 required for AVX2.
-#if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700
+#if defined(_M_IX86) && !defined(__clang__) && \
+    defined(_MSC_VER) && _MSC_VER >= 1700
 #define VISUALC_HAS_AVX2 1
 #endif  // VisualStudio >= 2012

 // The following are available require VS2012.  Port to GCC.
 #if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
-// TODO(fbarchard): fix AVX2 versions of YUV conversion.  bug=393
-#define HAS_I422TOABGRROW_AVX2
-#define HAS_I422TOARGBROW_AVX2
-#define HAS_I422TOBGRAROW_AVX2
-#define HAS_I422TORGBAROW_AVX2
-#define HAS_NV12TOARGBROW_AVX2
-#define HAS_NV21TOARGBROW_AVX2
-#define HAS_ARGBTORGB565ROW_AVX2
+#define HAS_ARGB1555TOARGBROW_AVX2
+#define HAS_ARGB4444TOARGBROW_AVX2
 #define HAS_ARGBTOARGB1555ROW_AVX2
 #define HAS_ARGBTOARGB4444ROW_AVX2
-#define HAS_NV12TORGB565ROW_AVX2
-#define HAS_NV21TORGB565ROW_AVX2
-#define HAS_I422TORGB565ROW_AVX2
+#define HAS_ARGBTORGB565DITHERROW_AVX2
+#define HAS_ARGBTORGB565DITHERROW_SSE2
+#define HAS_ARGBTORGB565ROW_AVX2
+#define HAS_I411TOARGBROW_AVX2
 #define HAS_I422TOARGB1555ROW_AVX2
 #define HAS_I422TOARGB4444ROW_AVX2
+#define HAS_I422TORGB565ROW_AVX2
+#define HAS_I444TOARGBROW_AVX2
+#define HAS_J400TOARGBROW_AVX2
+#define HAS_NV12TOARGBROW_AVX2
+#define HAS_NV12TORGB565ROW_AVX2
+#define HAS_NV21TOARGBROW_AVX2
+#define HAS_NV21TORGB565ROW_AVX2
+#define HAS_RGB565TOARGBROW_AVX2
 #endif

 // The following are available on all x86 platforms, but
@@ -214,24 +219,27 @@ extern "C" {
 #define HAS_ARGBTOYJROW_AVX2
 #define HAS_ARGBTOYROW_AVX2
 #define HAS_COPYROW_AVX
+#define HAS_I400TOARGBROW_AVX2
+#define HAS_I422TOABGRROW_AVX2
+#define HAS_I422TOARGBROW_AVX2
+#define HAS_I422TOBGRAROW_AVX2
+#define HAS_I422TORAWROW_AVX2
+#define HAS_I422TORGB24ROW_AVX2
+#define HAS_I422TORGBAROW_AVX2
 #define HAS_INTERPOLATEROW_AVX2
+#define HAS_J422TOARGBROW_AVX2
 #define HAS_MERGEUVROW_AVX2
 #define HAS_MIRRORROW_AVX2
 #define HAS_SPLITUVROW_AVX2
+#define HAS_UYVYTOARGBROW_AVX2
 #define HAS_UYVYTOUV422ROW_AVX2
 #define HAS_UYVYTOUVROW_AVX2
 #define HAS_UYVYTOYROW_AVX2
-#define HAS_YTOARGBROW_AVX2
+#define HAS_YUY2TOARGBROW_AVX2
 #define HAS_YUY2TOUV422ROW_AVX2
 #define HAS_YUY2TOUVROW_AVX2
 #define HAS_YUY2TOYROW_AVX2

-// The following require HAS_I422TOARGBROW_AVX2
-#if defined(HAS_I422TOARGBROW_AVX2)
-#define HAS_YUY2TOARGBROW_AVX2
-#define HAS_UYVYTOARGBROW_AVX2
-#endif
-
 // Effects:
 #define HAS_ARGBADDROW_AVX2
 #define HAS_ARGBATTENUATEROW_AVX2
@@ -240,22 +248,6 @@ extern "C" {
 #define HAS_ARGBUNATTENUATEROW_AVX2
 #endif

-
-// The following are Yasm x86 only:
-// TODO(fbarchard): Port AVX2 to inline.
-#if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM)
-    (defined(_M_IX86) || defined(_M_X64) || \
-    defined(__x86_64__) || defined(__i386__))
-#define HAS_MERGEUVROW_AVX2
-#define HAS_MERGEUVROW_MMX
-#define HAS_SPLITUVROW_AVX2
-#define HAS_SPLITUVROW_MMX
-#define HAS_UYVYTOYROW_AVX2
-#define HAS_UYVYTOYROW_MMX
-#define HAS_YUY2TOYROW_AVX2
-#define HAS_YUY2TOYROW_MMX
-#endif
-
 // The following are disabled when SSSE3 is available:
 #if !defined(LIBYUV_DISABLE_X86) && \
    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
@@ -278,7 +270,6 @@ extern "C" {
 #define HAS_ARGB4444TOYROW_NEON
 #define HAS_ARGBTOARGB1555ROW_NEON
 #define HAS_ARGBTOARGB4444ROW_NEON
-#define HAS_ARGBTOBAYERGGROW_NEON
 #define HAS_ARGBTORAWROW_NEON
 #define HAS_ARGBTORGB24ROW_NEON
 #define HAS_ARGBTORGB565ROW_NEON
@@ -292,7 +283,7 @@ extern "C" {
 #define HAS_BGRATOUVROW_NEON
 #define HAS_BGRATOYROW_NEON
 #define HAS_COPYROW_NEON
-#define HAS_I400TOARGBROW_NEON
+#define HAS_J400TOARGBROW_NEON
 #define HAS_I411TOARGBROW_NEON
 #define HAS_I422TOABGRROW_NEON
 #define HAS_I422TOARGB1555ROW_NEON
@@ -331,11 +322,12 @@ extern "C" {
 #define HAS_UYVYTOUV422ROW_NEON
 #define HAS_UYVYTOUVROW_NEON
 #define HAS_UYVYTOYROW_NEON
-#define HAS_YTOARGBROW_NEON
+#define HAS_I400TOARGBROW_NEON
 #define HAS_YUY2TOARGBROW_NEON
 #define HAS_YUY2TOUV422ROW_NEON
 #define HAS_YUY2TOUVROW_NEON
 #define HAS_YUY2TOYROW_NEON
+#define HAS_ARGBTORGB565DITHERROW_NEON

 // Effects:
 #define HAS_ARGBADDROW_NEON
@@ -388,7 +380,6 @@ typedef __declspec(align(32)) int8 lvec8[32];
 typedef __declspec(align(32)) uint16 ulvec16[16];
 typedef __declspec(align(32)) uint32 ulvec32[8];
 typedef __declspec(align(32)) uint8 ulvec8[32];
-
 #elif defined(__GNUC__)
 // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
@@ -869,6 +860,11 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
                            int pix);
 void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
                            int pix);
+void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int pix);
+void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
+                            int pix);
+void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb,
+                            int pix);

 void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
 void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
@@ -884,12 +880,20 @@ void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
 void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
 void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
 void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
+
 void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
                              int pix);
 void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
                                int pix);
 void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
                                int pix);
+void RGB565ToARGBRow_Any_AVX2(const uint8* src_rgb565, uint8* dst_argb,
+                              int pix);
+void ARGB1555ToARGBRow_Any_AVX2(const uint8* src_argb1555, uint8* dst_argb,
+                                int pix);
+void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444, uint8* dst_argb,
+                                int pix);
+
 void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
 void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
 void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
@@ -905,6 +909,13 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);

+void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
+                             const uint32 dither4, int pix);
+void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
+                                const uint32 dither4, int pix);
+void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
+                                const uint32 dither4, int pix);
+
 void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
@@ -914,6 +925,8 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
+                                const uint32 dither4, int width);

 void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
@@ -922,14 +935,13 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);

-void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
-                             const uint8* dither8x8, int pix);
-
-void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
+void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
+void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int pix);
+void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
+void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
+void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
+void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int pix);
+void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);

 void I444ToARGBRow_C(const uint8* src_y,
                     const uint8* src_u,
@@ -1038,6 +1050,11 @@ void I444ToARGBRow_SSSE3(const uint8* src_y,
                         const uint8* src_v,
                         uint8* dst_argb,
                         int width);
+void I444ToARGBRow_AVX2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width);
 void I422ToARGBRow_SSSE3(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
@@ -1048,6 +1065,11 @@ void I411ToARGBRow_SSSE3(const uint8* src_y,
                         const uint8* src_v,
                         uint8* dst_argb,
                         int width);
+void I411ToARGBRow_AVX2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width);
 void NV12ToARGBRow_SSSE3(const uint8* src_y,
                         const uint8* src_uv,
                         uint8* dst_argb,
@@ -1097,6 +1119,11 @@ void J422ToARGBRow_SSSE3(const uint8* src_y,
                         const uint8* src_v,
                         uint8* dst_argb,
                         int width);
+void J422ToARGBRow_AVX2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width);
 void I422ToBGRARow_SSSE3(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
@@ -1147,11 +1174,21 @@ void I422ToRGB24Row_SSSE3(const uint8* src_y,
                          const uint8* src_v,
                          uint8* dst_rgb24,
                          int width);
+void I422ToRGB24Row_AVX2(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_rgb24,
+                         int width);
 void I422ToRAWRow_SSSE3(const uint8* src_y,
                        const uint8* src_u,
                        const uint8* src_v,
                        uint8* dst_raw,
                        int width);
+void I422ToRAWRow_AVX2(const uint8* src_y,
+                       const uint8* src_u,
+                       const uint8* src_v,
+                       uint8* dst_raw,
+                       int width);
 void I422ToARGBRow_Any_AVX2(const uint8* src_y,
                            const uint8* src_u,
                            const uint8* src_v,
@@ -1177,6 +1214,11 @@ void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
                             const uint8* src_v,
                             uint8* dst_argb,
                             int width);
+void I444ToARGBRow_Any_AVX2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
+                            int width);
 void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
@@ -1187,6 +1229,11 @@ void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
                             const uint8* src_v,
                             uint8* dst_argb,
                             int width);
+void I411ToARGBRow_Any_AVX2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
+                            int width);
 void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
                             const uint8* src_uv,
                             uint8* dst_argb,
@@ -1231,6 +1278,16 @@ void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2,
 void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy,
                            uint8* dst_argb,
                            int width);
+void J422ToARGBRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
+                             int width);
+void J422ToARGBRow_Any_AVX2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
+                            int width);
 void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
@@ -1281,33 +1338,29 @@ void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
                              const uint8* src_v,
                              uint8* dst_argb,
                              int width);
+void I422ToRGB24Row_Any_AVX2(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
+                             int width);
 void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
                            const uint8* src_u,
                            const uint8* src_v,
                            uint8* dst_argb,
                            int width);
+void I422ToRAWRow_Any_AVX2(const uint8* src_y,
+                           const uint8* src_u,
+                           const uint8* src_v,
+                           uint8* dst_argb,
+                           int width);

-void YToARGBRow_C(const uint8* src_y,
-                  uint8* dst_argb,
-                  int width);
-void YToARGBRow_SSE2(const uint8* src_y,
-                     uint8* dst_argb,
-                     int width);
-void YToARGBRow_AVX2(const uint8* src_y,
-                     uint8* dst_argb,
-                     int width);
-void YToARGBRow_NEON(const uint8* src_y,
-                     uint8* dst_argb,
-                     int width);
-void YToARGBRow_Any_SSE2(const uint8* src_y,
-                         uint8* dst_argb,
-                         int width);
-void YToARGBRow_Any_AVX2(const uint8* src_y,
-                         uint8* dst_argb,
-                         int width);
-void YToARGBRow_Any_NEON(const uint8* src_y,
-                         uint8* dst_argb,
-                         int width);
+void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
+void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);

 // ARGB preattenuated alpha blend.
 void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
@@ -1375,6 +1428,11 @@ void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);

+void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
+                                    const uint32 dither4, int pix);
+void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
+                                    const uint32 dither4, int pix);
+
 void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
@@ -1384,6 +1442,8 @@ void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
 void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
+                                    const uint32 dither4, int width);

 void I444ToARGBRow_Any_NEON(const uint8* src_y,
                            const uint8* src_u,
@@ -1570,17 +1630,6 @@ void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
 void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
                             uint8* dst_u, uint8* dst_v, int pix);

-void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer,
-                        uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
-                           uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
-                           uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer,
-                               uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
-                               uint32 /* selector */, int pix);
-
 void I422ToYUY2Row_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
@@ -1770,6 +1819,18 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
                     uint8* dst_argb, int width);
 void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
                     uint8* dst_argb, int width);
+void SobelRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                       uint8* dst_argb, int width);
+void SobelRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                       uint8* dst_argb, int width);
+void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                              uint8* dst_y, int width);
+void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                              uint8* dst_y, int width);
+void SobelXYRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                         uint8* dst_argb, int width);
+void SobelXYRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                         uint8* dst_argb, int width);

 void ARGBPolynomialRow_C(const uint8* src_argb,
                         uint8* dst_argb, const float* poly,
--- a/third_party/libyuv/include/libyuv/scale_row.h
+++ b/third_party/libyuv/include/libyuv/scale_row.h
@@ -12,45 +12,66 @@
 #define INCLUDE_LIBYUV_SCALE_ROW_H_

 #include "libyuv/basic_types.h"
+#include "libyuv/scale.h"

 #ifdef __cplusplus
 namespace libyuv {
 extern "C" {
 #endif

-#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
-    defined(TARGET_IPHONE_SIMULATOR)
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+    (defined(__i386__) && !defined(__SSE2__))
 #define LIBYUV_DISABLE_X86
 #endif

+// Visual C 2012 required for AVX2.
+#if defined(_M_IX86) && !defined(__clang__) && \
+    defined(_MSC_VER) && _MSC_VER >= 1700
+#define VISUALC_HAS_AVX2 1
+#endif  // VisualStudio >= 2012
+
 // The following are available on all x86 platforms:
 #if !defined(LIBYUV_DISABLE_X86) && \
    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#define HAS_SCALEROWDOWN2_SSE2
-#define HAS_SCALEROWDOWN4_SSE2
-#define HAS_SCALEROWDOWN34_SSSE3
-#define HAS_SCALEROWDOWN38_SSSE3
-#define HAS_SCALEADDROWS_SSE2
-#define HAS_SCALEFILTERCOLS_SSSE3
-#define HAS_SCALECOLSUP2_SSE2
+#define HAS_FIXEDDIV1_X86
+#define HAS_FIXEDDIV_X86
+#define HAS_SCALEARGBCOLS_SSE2
+#define HAS_SCALEARGBCOLSUP2_SSE2
+#define HAS_SCALEARGBFILTERCOLS_SSSE3
 #define HAS_SCALEARGBROWDOWN2_SSE2
 #define HAS_SCALEARGBROWDOWNEVEN_SSE2
-#define HAS_SCALEARGBCOLS_SSE2
-#define HAS_SCALEARGBFILTERCOLS_SSSE3
-#define HAS_SCALEARGBCOLSUP2_SSE2
-#define HAS_FIXEDDIV_X86
-#define HAS_FIXEDDIV1_X86
+#define HAS_SCALECOLSUP2_SSE2
+#define HAS_SCALEFILTERCOLS_SSSE3
+#define HAS_SCALEROWDOWN2_SSE2
+#define HAS_SCALEROWDOWN34_SSSE3
+#define HAS_SCALEROWDOWN38_SSSE3
+#define HAS_SCALEROWDOWN4_SSE2
+#endif
+
+// The following are available on VS2012:
+#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
+#define HAS_SCALEADDROW_AVX2
+#define HAS_SCALEROWDOWN2_AVX2
+#define HAS_SCALEROWDOWN4_AVX2
+#endif
+
+// The following are available on Visual C:
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && !defined(__clang__)
+#define HAS_SCALEADDROW_SSE2
 #endif

 // The following are available on Neon platforms:
 #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
+#define HAS_SCALEARGBCOLS_NEON
+#define HAS_SCALEARGBROWDOWN2_NEON
+#define HAS_SCALEARGBROWDOWNEVEN_NEON
+#define HAS_SCALEFILTERCOLS_NEON
 #define HAS_SCALEROWDOWN2_NEON
-#define HAS_SCALEROWDOWN4_NEON
 #define HAS_SCALEROWDOWN34_NEON
 #define HAS_SCALEROWDOWN38_NEON
-#define HAS_SCALEARGBROWDOWNEVEN_NEON
-#define HAS_SCALEARGBROWDOWN2_NEON
+#define HAS_SCALEROWDOWN4_NEON
+#define HAS_SCALEARGBFILTERCOLS_NEON
 #endif

 // The following are available on Mips platforms:
@@ -164,10 +185,8 @@ void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
                            uint8* dst_ptr, int dst_width);
 void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
                               uint16* dst_ptr, int dst_width);
-void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                    uint16* dst_ptr, int src_width, int src_height);
-void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
-                       uint32* dst_ptr, int src_width, int src_height);
+void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width);
 void ScaleARGBRowDown2_C(const uint8* src_argb,
                         ptrdiff_t src_stride,
                         uint8* dst_argb, int dst_width);
@@ -194,16 +213,28 @@ void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
 void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
                             int dst_width, int x, int dx);

+// Specialized scalers for x86.
 void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                        uint8* dst_ptr, int dst_width);
 void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                              uint8* dst_ptr, int dst_width);
 void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                           uint8* dst_ptr, int dst_width);
+void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+                              uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width);
 void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                        uint8* dst_ptr, int dst_width);
 void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                           uint8* dst_ptr, int dst_width);
+void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width);
+void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width);
+
 void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
                          uint8* dst_ptr, int dst_width);
 void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
@@ -220,46 +251,124 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
 void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
                                ptrdiff_t src_stride,
                                uint8* dst_ptr, int dst_width);
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                       uint16* dst_ptr, int src_width,
-                       int src_height);
+void ScaleRowDown2_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Linear_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                  uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width);
+void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                  uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width);
+void ScaleRowDown4_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst_ptr, int dst_width);
+void ScaleRowDown4Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width);
+void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst_ptr, int dst_width);
+void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width);
+
+void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                              uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr,
+                                    ptrdiff_t src_stride,
+                                    uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr,
+                                    ptrdiff_t src_stride,
+                                    uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                              uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr,
+                                    ptrdiff_t src_stride,
+                                    uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr,
+                                    ptrdiff_t src_stride,
+                                    uint8* dst_ptr, int dst_width);
+
+void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+
 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
                           int dst_width, int x, int dx);
 void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
                       int dst_width, int x, int dx);
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
-                            ptrdiff_t src_stride,
-                            uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
-                                  ptrdiff_t src_stride,
-                                  uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
-                               ptrdiff_t src_stride,
-                               uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
-                               int src_stepx,
-                               uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
-                                  ptrdiff_t src_stride,
-                                  int src_stepx,
-                                  uint8* dst_argb, int dst_width);
+
+
+// ARGB Column functions
 void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
                        int dst_width, int x, int dx);
 void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
                               int dst_width, int x, int dx);
 void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
                           int dst_width, int x, int dx);
-// Row functions.
+void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
+                              int dst_width, int x, int dx);
+void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
+                        int dst_width, int x, int dx);
+void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
+                                  int dst_width, int x, int dx);
+void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
+                            int dst_width, int x, int dx);
+
+// ARGB Row functions
+void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+                            uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+                                  uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+                               uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst, int dst_width);
+void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+                                  uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst, int dst_width);
+void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+                                uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb,
+                                      ptrdiff_t src_stride,
+                                      uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+                                   uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                                uint8* dst, int dst_width);
+void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb,
+                                      ptrdiff_t src_stride,
+                                      uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                                   uint8* dst, int dst_width);
+
+void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+                               int src_stepx, uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+                                  int src_stepx,
+                                  uint8* dst_argb, int dst_width);
 void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
                               int src_stepx,
                               uint8* dst_argb, int dst_width);
 void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
                                  int src_stepx,
                                  uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                            uint8* dst, int dst_width);
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                               uint8* dst, int dst_width);
+void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+                                   int src_stepx,
+                                   uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb,
+                                      ptrdiff_t src_stride,
+                                      int src_stepx,
+                                      uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+                                   int src_stepx,
+                                   uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb,
+                                      ptrdiff_t src_stride,
+                                      int src_stepx,
+                                      uint8* dst_argb, int dst_width);

 // ScaleRowDown2Box also used by planar functions
 // NEON downscalers with interpolation.
@@ -267,7 +376,8 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
 // Note - not static due to reuse in convert for 444 to 420.
 void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
                        uint8* dst, int dst_width);
-
+void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                              uint8* dst, int dst_width);
 void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
                           uint8* dst, int dst_width);

@@ -302,6 +412,42 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
                               ptrdiff_t src_stride,
                               uint8* dst_ptr, int dst_width);

+void ScaleRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst, int dst_width);
+void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                                  uint8* dst, int dst_width);
+void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst, int dst_width);
+void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst_ptr, int dst_width);
+void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                             uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                                   uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                                   uint8* dst_ptr, int dst_width);
+// 32 -> 12
+void ScaleRowDown38_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                             uint8* dst_ptr, int dst_width);
+// 32x3 -> 12x1
+void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width);
+// 32x2 -> 12x1
+void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width);
+
+void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
+
+void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
+                          int dst_width, int x, int dx);
+
+void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
+                              int dst_width, int x, int dx);
+
+
 void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
                              uint8* dst, int dst_width);
 void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
--- a/third_party/libyuv/include/libyuv/version.h
+++ b/third_party/libyuv/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 1305
+#define LIBYUV_VERSION 1456

 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/third_party/libyuv/source/compare.cc
+++ b/third_party/libyuv/source/compare.cc
@@ -37,7 +37,7 @@ uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
 #define HAS_HASHDJB2_SSE41
 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);

-#if _MSC_VER >= 1700
+#ifdef VISUALC_HAS_AVX2
 #define HAS_HASHDJB2_AVX2
 uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
 #endif
@@ -138,8 +138,8 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
 #define HAS_SUMSQUAREERROR_SSE2
 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
 #endif
-// Visual C 2012 required for AVX2.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
+
+#ifdef VISUALC_HAS_AVX2
 #define HAS_SUMSQUAREERROR_AVX2
 uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
 #endif
--- a/third_party/libyuv/source/compare_posix.cc
+++ b/third_party/libyuv/source/compare_posix.cc
--- a/third_party/libyuv/source/compare_neon64.cc
+++ b/third_party/libyuv/source/compare_neon64.cc
@@ -32,7 +32,7 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
    "ld1        {v0.16b}, [%0], #16            \n"
    MEMACCESS(1)
    "ld1        {v1.16b}, [%1], #16            \n"
-    "subs       %2, %2, #16                    \n"
+    "subs       %w2, %w2, #16                  \n"
    "usubl      v2.8h, v0.8b, v1.8b            \n"
    "usubl2     v3.8h, v0.16b, v1.16b          \n"
    "smlal      v16.4s, v2.4h, v2.4h           \n"
--- a/third_party/libyuv/source/compare_win.cc
+++ b/third_party/libyuv/source/compare_win.cc
@@ -16,9 +16,11 @@ namespace libyuv {
 extern "C" {
 #endif

-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+// This module is for Visual C x86.
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
+    defined(_MSC_VER) && !defined(__clang__)

-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
  __asm {
    mov        eax, [esp + 4]    // src_a
@@ -59,7 +61,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
 #if _MSC_VER >= 1700
 // C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
 #pragma warning(disable: 4752)
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
  __asm {
    mov        eax, [esp + 4]    // src_a
@@ -133,7 +135,7 @@ static uvec32 kHashMul3 = {
 #define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
    _asm _emit 0x40 _asm _emit reg

-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
  __asm {
    mov        eax, [esp + 4]    // src
@@ -184,7 +186,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {

 // Visual C 2012 required for AVX2.
 #if _MSC_VER >= 1700
-__declspec(naked) __declspec(align(16))
+__declspec(naked)
 uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
  __asm {
    mov        eax, [esp + 4]    // src
@@ -219,8 +221,7 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
  }
 }
 #endif  // _MSC_VER >= 1700
-
-#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)

 #ifdef __cplusplus
 }  // extern "C"
--- a/third_party/libyuv/source/convert.cc
+++ b/third_party/libyuv/source/convert.cc
@@ -817,22 +817,20 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
    src_stride_rgb24 = -src_stride_rgb24;
  }

+// Neon version does direct RGB24 to YUV.
 #if defined(HAS_RGB24TOYROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
+    RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
    RGB24ToYRow = RGB24ToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      RGB24ToYRow = RGB24ToYRow_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        RGB24ToUVRow = RGB24ToUVRow_NEON;
+      }
    }
  }
-#endif
-#if defined(HAS_RGB24TOUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      RGB24ToUVRow = RGB24ToUVRow_NEON;
-    }
-  }
-#endif
+// Other platforms do intermediate conversion from RGB24 to ARGB.
+#else
 #if defined(HAS_RGB24TOARGBROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
    RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
@@ -841,27 +839,29 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
    }
  }
 #endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
      ARGBToYRow = ARGBToYRow_SSSE3;
    }
  }
-#endif  // HAS_ARGBTOUVROW_SSSE3
-
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToUVRow = ARGBToUVRow_AVX2;
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
  {
-#if !defined(HAS_RGB24TOYROW_NEON)
    // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 15) & ~15;
+    const int kRowSize = (width * 4 + 31) & ~31;
    align_buffer_64(row, kRowSize * 2);
 #endif

@@ -894,8 +894,8 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
    }
 #if !defined(HAS_RGB24TOYROW_NEON)
    free_aligned_buffer_64(row);
-#endif
  }
+#endif
  return 0;
 }

@@ -931,22 +931,20 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
    src_stride_raw = -src_stride_raw;
  }

+// Neon version does direct RAW to YUV.
 #if defined(HAS_RAWTOYROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
+    RAWToUVRow = RAWToUVRow_Any_NEON;
    RAWToYRow = RAWToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      RAWToYRow = RAWToYRow_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        RAWToUVRow = RAWToUVRow_NEON;
+      }
    }
  }
-#endif
-#if defined(HAS_RAWTOUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    RAWToUVRow = RAWToUVRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      RAWToUVRow = RAWToUVRow_NEON;
-    }
-  }
-#endif
+// Other platforms do intermediate conversion from RAW to ARGB.
+#else
 #if defined(HAS_RAWTOARGBROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
    RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
@@ -955,59 +953,63 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
    }
  }
 #endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
      ARGBToYRow = ARGBToYRow_SSSE3;
    }
  }
-#endif  // HAS_ARGBTOUVROW_SSSE3
-
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToUVRow = ARGBToUVRow_AVX2;
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
  {
    // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 15) & ~15;
+    const int kRowSize = (width * 4 + 31) & ~31;
    align_buffer_64(row, kRowSize * 2);
+#endif

    for (y = 0; y < height - 1; y += 2) {
-  #if defined(HAS_RAWTOYROW_NEON)
+#if defined(HAS_RAWTOYROW_NEON)
      RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
      RAWToYRow(src_raw, dst_y, width);
      RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
-  #else
+#else
      RAWToARGBRow(src_raw, row, width);
      RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
      ARGBToYRow(row, dst_y, width);
      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-  #endif
+#endif
      src_raw += src_stride_raw * 2;
      dst_y += dst_stride_y * 2;
      dst_u += dst_stride_u;
      dst_v += dst_stride_v;
    }
    if (height & 1) {
-  #if defined(HAS_RAWTOYROW_NEON)
+#if defined(HAS_RAWTOYROW_NEON)
      RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
      RAWToYRow(src_raw, dst_y, width);
-  #else
+#else
      RAWToARGBRow(src_raw, row, width);
      ARGBToUVRow(row, 0, dst_u, dst_v, width);
      ARGBToYRow(row, dst_y, width);
-  #endif
+#endif
    }
-  #if !defined(HAS_RAWTOYROW_NEON)
+#if !defined(HAS_RAWTOYROW_NEON)
    free_aligned_buffer_64(row);
-  #endif
  }
+#endif
  return 0;
 }

@@ -1043,19 +1045,20 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
    src_stride_rgb565 = -src_stride_rgb565;
  }

+// Neon version does direct RGB565 to YUV.
 #if defined(HAS_RGB565TOYROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
+    RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
    RGB565ToYRow = RGB565ToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      RGB565ToYRow = RGB565ToYRow_NEON;
-    }
-    RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      RGB565ToUVRow = RGB565ToUVRow_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        RGB565ToUVRow = RGB565ToUVRow_NEON;
+      }
    }
  }
-#else  // HAS_RGB565TOYROW_NEON
-
+// Other platforms do intermediate conversion from RGB565 to ARGB.
+#else
 #if defined(HAS_RGB565TOARGBROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2)) {
    RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
@@ -1064,28 +1067,37 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
    }
  }
 #endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+#if defined(HAS_RGB565TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_SSSE3;
+      RGB565ToARGBRow = RGB565ToARGBRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
      ARGBToYRow = ARGBToYRow_SSSE3;
    }
  }
-#endif  // HAS_ARGBTOUVROW_SSSE3
-#endif  // HAS_RGB565TOYROW_NEON
-
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToUVRow = ARGBToUVRow_AVX2;
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
  {
-#if !defined(HAS_RGB565TOYROW_NEON)
    // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 15) & ~15;
+    const int kRowSize = (width * 4 + 31) & ~31;
    align_buffer_64(row, kRowSize * 2);
 #endif

@@ -1118,8 +1130,8 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
    }
 #if !defined(HAS_RGB565TOYROW_NEON)
    free_aligned_buffer_64(row);
-#endif
  }
+#endif
  return 0;
 }

@@ -1155,19 +1167,20 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
    src_stride_argb1555 = -src_stride_argb1555;
  }

+// Neon version does direct ARGB1555 to YUV.
 #if defined(HAS_ARGB1555TOYROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
    ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      ARGB1555ToYRow = ARGB1555ToYRow_NEON;
-    }
-    ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
+      }
    }
  }
-#else  // HAS_ARGB1555TOYROW_NEON
-
+// Other platforms do intermediate conversion from ARGB1555 to ARGB.
+#else
 #if defined(HAS_ARGB1555TOARGBROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2)) {
    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
@@ -1176,30 +1189,40 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
    }
  }
 #endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+#if defined(HAS_ARGB1555TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_SSSE3;
+      ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
      ARGBToYRow = ARGBToYRow_SSSE3;
    }
  }
-#endif  // HAS_ARGBTOUVROW_SSSE3
-#endif  // HAS_ARGB1555TOYROW_NEON
-
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToUVRow = ARGBToUVRow_AVX2;
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
  {
-#if !defined(HAS_ARGB1555TOYROW_NEON)
    // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 15) & ~15;
+    const int kRowSize = (width * 4 + 31) & ~31;
    align_buffer_64(row, kRowSize * 2);
 #endif
+
    for (y = 0; y < height - 1; y += 2) {
 #if defined(HAS_ARGB1555TOYROW_NEON)
      ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
@@ -1230,9 +1253,9 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
 #endif
    }
 #if !defined(HAS_ARGB1555TOYROW_NEON)
-  free_aligned_buffer_64(row);
-#endif
+    free_aligned_buffer_64(row);
  }
+#endif
  return 0;
 }

@@ -1268,19 +1291,20 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
    src_stride_argb4444 = -src_stride_argb4444;
  }

+// Neon version does direct ARGB4444 to YUV.
 #if defined(HAS_ARGB4444TOYROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
    ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      ARGB4444ToYRow = ARGB4444ToYRow_NEON;
-    }
-    ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
+      }
    }
  }
-#else  // HAS_ARGB4444TOYROW_NEON
-
+// Other platforms do intermediate conversion from ARGB4444 to ARGB.
+#else
 #if defined(HAS_ARGB4444TOARGBROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2)) {
    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
@@ -1289,28 +1313,37 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
    }
  }
 #endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+#if defined(HAS_ARGB4444TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_SSSE3;
+      ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_ARGBTOUVROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
      ARGBToYRow = ARGBToYRow_SSSE3;
    }
  }
-#endif  // HAS_ARGBTOUVROW_SSSE3
-#endif  // HAS_ARGB4444TOYROW_NEON
-
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToUVRow = ARGBToUVRow_AVX2;
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
  {
-#if !defined(HAS_ARGB4444TOYROW_NEON)
    // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 15) & ~15;
+    const int kRowSize = (width * 4 + 31) & ~31;
    align_buffer_64(row, kRowSize * 2);
 #endif

@@ -1345,8 +1378,8 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
    }
 #if !defined(HAS_ARGB4444TOYROW_NEON)
    free_aligned_buffer_64(row);
-#endif
  }
+#endif
  return 0;
 }

--- a/third_party/libyuv/source/convert_argb.cc
+++ b/third_party/libyuv/source/convert_argb.cc
@@ -85,6 +85,14 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
    }
  }
 #endif
+#if defined(HAS_I444TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I444ToARGBRow = I444ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I444ToARGBRow = I444ToARGBRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_I444TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    I444ToARGBRow = I444ToARGBRow_Any_NEON;
@@ -222,6 +230,14 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
    }
  }
 #endif
+#if defined(HAS_I411TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I411ToARGBRow = I411ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I411ToARGBRow = I411ToARGBRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_I411TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    I411ToARGBRow = I411ToARGBRow_Any_NEON;
@@ -243,13 +259,13 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,

 // Convert I400 to ARGB.
 LIBYUV_API
-int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
-                         uint8* dst_argb, int dst_stride_argb,
-                         int width, int height) {
+int I400ToARGB(const uint8* src_y, int src_stride_y,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
  int y;
-  void (*YToARGBRow)(const uint8* y_buf,
+  void (*I400ToARGBRow)(const uint8* y_buf,
                     uint8* rgb_buf,
-                     int width) = YToARGBRow_C;
+                     int width) = I400ToARGBRow_C;
  if (!src_y || !dst_argb ||
      width <= 0 || height == 0) {
    return -1;
@@ -267,47 +283,47 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
    height = 1;
    src_stride_y = dst_stride_argb = 0;
  }
-#if defined(HAS_YTOARGBROW_SSE2)
+#if defined(HAS_I400TOARGBROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2)) {
-    YToARGBRow = YToARGBRow_Any_SSE2;
+    I400ToARGBRow = I400ToARGBRow_Any_SSE2;
    if (IS_ALIGNED(width, 8)) {
-      YToARGBRow = YToARGBRow_SSE2;
+      I400ToARGBRow = I400ToARGBRow_SSE2;
    }
  }
 #endif
-#if defined(HAS_YTOARGBROW_AVX2)
+#if defined(HAS_I400TOARGBROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
-    YToARGBRow = YToARGBRow_Any_AVX2;
+    I400ToARGBRow = I400ToARGBRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
-      YToARGBRow = YToARGBRow_AVX2;
+      I400ToARGBRow = I400ToARGBRow_AVX2;
    }
  }
 #endif
-#if defined(HAS_YTOARGBROW_NEON)
+#if defined(HAS_I400TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
-    YToARGBRow = YToARGBRow_Any_NEON;
+    I400ToARGBRow = I400ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
-      YToARGBRow = YToARGBRow_NEON;
+      I400ToARGBRow = I400ToARGBRow_NEON;
    }
  }
 #endif

  for (y = 0; y < height; ++y) {
-    YToARGBRow(src_y, dst_argb, width);
+    I400ToARGBRow(src_y, dst_argb, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
  }
  return 0;
 }

-// Convert I400 to ARGB.
+// Convert J400 to ARGB.
 LIBYUV_API
-int I400ToARGB(const uint8* src_y, int src_stride_y,
+int J400ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  int y;
-  void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
-      I400ToARGBRow_C;
+  void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
+      J400ToARGBRow_C;
  if (!src_y || !dst_argb ||
      width <= 0 || height == 0) {
    return -1;
@@ -325,24 +341,32 @@ int I400ToARGB(const uint8* src_y, int src_stride_y,
    height = 1;
    src_stride_y = dst_stride_argb = 0;
  }
-#if defined(HAS_I400TOARGBROW_SSE2)
+#if defined(HAS_J400TOARGBROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2)) {
-    I400ToARGBRow = I400ToARGBRow_Any_SSE2;
+    J400ToARGBRow = J400ToARGBRow_Any_SSE2;
    if (IS_ALIGNED(width, 8)) {
-      I400ToARGBRow = I400ToARGBRow_SSE2;
+      J400ToARGBRow = J400ToARGBRow_SSE2;
    }
  }
 #endif
-#if defined(HAS_I400TOARGBROW_NEON)
+#if defined(HAS_J400TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    J400ToARGBRow = J400ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      J400ToARGBRow = J400ToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_J400TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
-    I400ToARGBRow = I400ToARGBRow_Any_NEON;
+    J400ToARGBRow = J400ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
-      I400ToARGBRow = I400ToARGBRow_NEON;
+      J400ToARGBRow = J400ToARGBRow_NEON;
    }
  }
 #endif
  for (y = 0; y < height; ++y) {
-    I400ToARGBRow(src_y, dst_argb, width);
+    J400ToARGBRow(src_y, dst_argb, width);
    src_y += src_stride_y;
    dst_argb += dst_stride_argb;
  }
@@ -552,6 +576,14 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
    }
  }
 #endif
+#if defined(HAS_RGB565TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      RGB565ToARGBRow = RGB565ToARGBRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_RGB565TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
@@ -602,6 +634,14 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
    }
  }
 #endif
+#if defined(HAS_ARGB1555TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_ARGB1555TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
@@ -652,6 +692,14 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
    }
  }
 #endif
+#if defined(HAS_ARGB4444TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_ARGB4444TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
--- a/third_party/libyuv/source/convert_from.cc
+++ b/third_party/libyuv/source/convert_from.cc
@@ -739,6 +739,14 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
    }
  }
 #endif
+#if defined(HAS_I422TORGB24ROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I422ToRGB24Row = I422ToRGB24Row_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToRGB24Row = I422ToRGB24Row_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_I422TORGB24ROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
@@ -791,6 +799,14 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
    }
  }
 #endif
+#if defined(HAS_I422TORAWROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I422ToRAWRow = I422ToRAWRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToRAWRow = I422ToRAWRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_I422TORAWROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    I422ToRAWRow = I422ToRAWRow_Any_NEON;
@@ -993,6 +1009,117 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
  return 0;
 }

+// Ordered 8x8 dither for 888 to 565.  Values from 0 to 7.
+static const uint8 kDither565_4x4[16] = {
+  0, 4, 1, 5,
+  6, 2, 7, 3,
+  1, 5, 0, 4,
+  7, 3, 6, 2,
+};
+
+// Convert I420 to RGB565 with dithering.
+LIBYUV_API
+int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
+                       const uint8* src_u, int src_stride_u,
+                       const uint8* src_v, int src_stride_v,
+                       uint8* dst_rgb565, int dst_stride_rgb565,
+                       const uint8* dither4x4, int width, int height) {
+  int y;
+  void (*I422ToARGBRow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I422ToARGBRow_C;
+  void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
+      const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
+  if (!src_y || !src_u || !src_v || !dst_rgb565 ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
+    dst_stride_rgb565 = -dst_stride_rgb565;
+  }
+  if (!dither4x4) {
+    dither4x4 = kDither565_4x4;
+  }
+#if defined(HAS_I422TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToARGBRow = I422ToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
+  }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
+    }
+  }
+#endif
+  {
+    // Allocate a row of argb.
+    align_buffer_64(row_argb, width * 4);
+    for (y = 0; y < height; ++y) {
+      I422ToARGBRow(src_y, src_u, src_v, row_argb, width);
+      ARGBToRGB565DitherRow(row_argb, dst_rgb565,
+                            *(uint32*)(dither4x4 + ((y & 3) << 2)), width);
+      dst_rgb565 += dst_stride_rgb565;
+      src_y += src_stride_y;
+      if (y & 1) {
+        src_u += src_stride_u;
+        src_v += src_stride_v;
+      }
+    }
+    free_aligned_buffer_64(row_argb);
+  }
+  return 0;
+}
+
 // Convert I420 to specified format
 LIBYUV_API
 int ConvertFromI420(const uint8* y, int y_stride,
--- a/third_party/libyuv/source/convert_from_argb.cc
+++ b/third_party/libyuv/source/convert_from_argb.cc
@@ -72,7 +72,14 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
      ARGBToYRow = ARGBToYRow_SSSE3;
    }
  }
-
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
 #endif
 #if defined(HAS_ARGBTOYROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
@@ -139,7 +146,6 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
    }
  }
 #endif
-
 #if defined(HAS_ARGBTOYROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
@@ -148,6 +154,14 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    ARGBToYRow = ARGBToYRow_Any_NEON;
@@ -275,6 +289,16 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToUVRow = ARGBToUVRow_AVX2;
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    ARGBToYRow = ARGBToYRow_Any_NEON;
@@ -317,8 +341,8 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
 #endif
  {
    // Allocate a rows of uv.
-    align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
-    uint8* row_v = row_u + ((halfwidth + 15) & ~15);
+    align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
+    uint8* row_v = row_u + ((halfwidth + 31) & ~31);

    for (y = 0; y < height - 1; y += 2) {
      ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@@ -374,6 +398,16 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToUVRow = ARGBToUVRow_AVX2;
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    ARGBToYRow = ARGBToYRow_Any_NEON;
@@ -416,8 +450,8 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
 #endif
  {
    // Allocate a rows of uv.
-    align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
-    uint8* row_v = row_u + ((halfwidth + 15) & ~15);
+    align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
+    uint8* row_v = row_u + ((halfwidth + 31) & ~31);

    for (y = 0; y < height - 1; y += 2) {
      ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@@ -492,6 +526,14 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    ARGBToYRow = ARGBToYRow_Any_NEON;
@@ -591,6 +633,14 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    ARGBToYRow = ARGBToYRow_Any_NEON;
@@ -804,25 +854,22 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
  return 0;
 }

-static const uint8 kDither8x8[64] = {
-  0, 128, 32, 160,  8, 136, 40, 168,
-  192, 64, 224, 96, 200, 72, 232, 104,
-  48, 176, 16, 144, 56, 184, 24, 152,
-  240, 112, 208, 80, 248, 120, 216, 88,
-  12, 140, 44, 172,  4, 132, 36, 164,
-  204, 76, 236, 108, 196, 68, 228, 100,
-  60, 188, 28, 156, 52, 180, 20, 148,
-  252, 124, 220, 92, 244, 116, 212, 84,
+// Ordered 8x8 dither for 888 to 565.  Values from 0 to 7.
+static const uint8 kDither565_4x4[16] = {
+  0, 4, 1, 5,
+  6, 2, 7, 3,
+  1, 5, 0, 4,
+  7, 3, 6, 2,
 };

-// Convert ARGB To RGB565 with 8x8 dither matrix (64 bytes).
+// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
 LIBYUV_API
 int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
                       uint8* dst_rgb565, int dst_stride_rgb565,
-                       const uint8* dither8x8, int width, int height) {
+                       const uint8* dither4x4, int width, int height) {
  int y;
  void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
-      const uint8* dither8x8, int pix) = ARGBToRGB565DitherRow_C;
+      const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
  if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
    return -1;
  }
@@ -831,13 +878,36 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
    src_argb = src_argb + (height - 1) * src_stride_argb;
    src_stride_argb = -src_stride_argb;
  }
-  if (!dither8x8) {
-    dither8x8 = kDither8x8;
-
+  if (!dither4x4) {
+    dither4x4 = kDither565_4x4;
  }
+#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
+    }
+  }
+#endif
  for (y = 0; y < height; ++y) {
    ARGBToRGB565DitherRow(src_argb, dst_rgb565,
-                          dither8x8 + ((y & 7) << 3), width);
+                          *(uint32*)(dither4x4 + ((y & 3) << 2)), width);
    src_argb += src_stride_argb;
    dst_rgb565 += dst_stride_rgb565;
  }
@@ -845,6 +915,7 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
 }

 // Convert ARGB To RGB565.
+// TODO(fbarchard): Consider using dither function low level with zeros.
 LIBYUV_API
 int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
                 uint8* dst_rgb565, int dst_stride_rgb565,
@@ -1021,7 +1092,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
               int width, int height) {
  int y;
  void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
+                       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
      ARGBToYJRow_C;
  if (!src_argb ||
@@ -1045,7 +1116,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
    }
  }
 #endif
-#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
+#if defined(HAS_ARGBTOYJROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
    ARGBToYJRow = ARGBToYJRow_Any_AVX2;
    if (IS_ALIGNED(width, 32)) {
@@ -1140,6 +1211,14 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOYJROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYJRow = ARGBToYJRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_ARGBTOYJROW_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    ARGBToYJRow = ARGBToYJRow_Any_NEON;
--- a/third_party/libyuv/source/cpu_id.cc
+++ b/third_party/libyuv/source/cpu_id.cc
@@ -10,13 +10,12 @@

 #include "libyuv/cpu_id.h"

-#if defined(_MSC_VER) && !defined(__clang__)
+#if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
 #include <intrin.h>  // For __cpuidex()
 #endif
 #if !defined(__pnacl__) && !defined(__CLR_VER) && \
-    !defined(__native_client__)  && \
-    defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) && \
-    (defined(_M_IX86) || defined(_M_X64))
+    !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
+    defined(_MSC_VER) && !defined(__clang__) && (_MSC_FULL_VER >= 160040219)
 #include <immintrin.h>  // For _xgetbv()
 #endif

@@ -37,23 +36,23 @@ extern "C" {

 // For functions that use the stack and have runtime checks for overflow,
 // use SAFEBUFFERS to avoid additional check.
-#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
+#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
 #define SAFEBUFFERS __declspec(safebuffers)
 #else
 #define SAFEBUFFERS
 #endif

-// Low level cpuid for X86. Returns zeros on other CPUs.
-#if !defined(__pnacl__) && !defined(__CLR_VER) && \
-    (defined(_M_IX86) || defined(_M_X64) || \
-    defined(__i386__) || defined(__x86_64__))
+// Low level cpuid for X86.
+#if (defined(_M_IX86) || defined(_M_X64) || \
+    defined(__i386__) || defined(__x86_64__)) && \
+    !defined(__pnacl__) && !defined(__CLR_VER)
 LIBYUV_API
 void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
-#if defined(_MSC_VER) && !defined(__clang__)
+#if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
+// Visual C version uses intrinsic or inline x86 assembly.
 #if (_MSC_FULL_VER >= 160040219)
  __cpuidex((int*)(cpu_info), info_eax, info_ecx);
-#endif
-#if defined(_M_IX86)
+#elif defined(_M_IX86)
  __asm {
    mov        eax, info_eax
    mov        ecx, info_ecx
@@ -71,7 +70,8 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
    cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0;
  }
 #endif
-#else  // defined(_MSC_VER)
+// GCC version uses inline x86 assembly.
+#else  // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
  uint32 info_ebx, info_edx;
  asm volatile (  // NOLINT
 #if defined( __i386__) && defined(__PIC__)
@@ -89,37 +89,38 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
  cpu_info[1] = info_ebx;
  cpu_info[2] = info_ecx;
  cpu_info[3] = info_edx;
-#endif  // defined(_MSC_VER)
+#endif  // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
 }
-
-#if !defined(__native_client__)
-#define HAS_XGETBV
-// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
-int TestOsSaveYmm() {
-  uint32 xcr0 = 0u;
-#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
-  xcr0 = (uint32)(_xgetbv(0));  // VS2010 SP1 required.
-#endif
-#if defined(_M_IX86) && defined(_MSC_VER)
-  __asm {
-    xor        ecx, ecx    // xcr 0
-    _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0  // For VS2010 and earlier.
-    mov        xcr0, eax
-  }
-#endif
-#if defined(__i386__) || defined(__x86_64__)
-  asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
-#endif  // defined(_MSC_VER)
-  return((xcr0 & 6) == 6);  // Is ymm saved?
-}
-#endif  // !defined(__native_client__)
-#else
+#else  // (defined(_M_IX86) || defined(_M_X64) ...
 LIBYUV_API
 void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
  cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
 }
 #endif

+// TODO(fbarchard): Enable xgetbv when validator supports it.
+#if (defined(_M_IX86) || defined(_M_X64) || \
+    defined(__i386__) || defined(__x86_64__)) && \
+    !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
+#define HAS_XGETBV
+// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
+int TestOsSaveYmm() {
+  uint32 xcr0 = 0u;
+#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
+  xcr0 = (uint32)(_xgetbv(0));  // VS2010 SP1 required.
+#elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__)
+  __asm {
+    xor        ecx, ecx    // xcr 0
+    _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0  // For VS2010 and earlier.
+    mov        xcr0, eax
+  }
+#elif defined(__i386__) || defined(__x86_64__)
+  asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
+#endif  // defined(__i386__) || defined(__x86_64__)
+  return((xcr0 & 6) == 6);  // Is ymm saved?
+}
+#endif  // defined(_M_IX86) || defined(_M_X64) ..
+
 // based on libvpx arm_cpudetect.c
 // For Arm, but public to allow testing on any CPU
 LIBYUV_API SAFEBUFFERS
--- a/Show More
+++ b/Show More