Allow backward prob update in external mode info coding flow

This commit enables vpxenc to properly count the coded motion vector related information for backward update. This allows the coding flow using external mode info to use backward probability update. In the short test clip, over 10% bit-rate saving is observed at no distortion change. Change-Id: Ie27e97114ab91c3d95ba7b5554d617d226db5e20
Enable motion vector based prediction mode decision
2014-07-21 12:11:31 -07:00 · 2014-07-17 16:35:11 -07:00 · 2014-07-17 11:52:21 -07:00 · 2014-07-16 21:37:15 -07:00 · 2014-07-16 21:35:29 -07:00 · 2014-07-15 22:40:33 -07:00
170 changed files with 4081 additions and 8886 deletions
--- a/4
+++ b/4
@@ -55,7 +55,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    armv6-linux-rvct
    armv6-linux-gcc
    armv6-none-rvct
-    arm64-darwin-gcc
    armv7-android-gcc
    armv7-darwin-gcc
    armv7-linux-rvct
@@ -63,7 +62,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    armv7-none-rvct
    armv7-win32-vs11
    armv7-win32-vs12
-    armv7s-darwin-gcc
    mips32-linux-gcc
    ppc32-darwin8-gcc
    ppc32-darwin9-gcc
@@ -81,7 +79,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-darwin11-gcc
    x86-darwin12-gcc
    x86-darwin13-gcc
-    x86-iphonesimulator-gcc
    x86-linux-gcc
    x86-linux-icc
    x86-os2-gcc
@@ -98,7 +95,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86_64-darwin11-gcc
    x86_64-darwin12-gcc
    x86_64-darwin13-gcc
-    x86_64-iphonesimulator-gcc
    x86_64-linux-gcc
    x86_64-linux-icc
    x86_64-solaris-gcc
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -330,10 +330,7 @@ endef
 ifneq ($(target),)
 include $(SRC_PATH_BARE)/$(target:-$(TOOLCHAIN)=).mk
 endif
-
-skip_deps := $(filter %clean,$(MAKECMDGOALS))
-skip_deps += $(findstring testdata,$(MAKECMDGOALS))
-ifeq ($(strip $(skip_deps)),)
+ifeq ($(filter %clean,$(MAKECMDGOALS)),)
  # Older versions of make don't like -include directives with no arguments
  ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),)
    -include $(filter %.d,$(OBJS-yes:.o=.d))
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -799,10 +799,10 @@ process_common_toolchain() {
    arm*)
        # on arm, isa versions are supersets
        case ${tgt_isa} in
-        arm64|armv8)
+        armv8)
            soft_enable neon
            ;;
-        armv7|armv7s)
+        armv7)
            soft_enable neon
            soft_enable neon_asm
            soft_enable media
@@ -831,7 +831,7 @@ process_common_toolchain() {
            arch_int=${arch_int%%te}
            check_add_asflags --defsym ARCHITECTURE=${arch_int}
            tune_cflags="-mtune="
-            if [ ${tgt_isa} = "armv7" ] || [ ${tgt_isa} = "armv7s" ]; then
+            if [ ${tgt_isa} = "armv7" ]; then
                if [ -z "${float_abi}" ]; then
                    check_cpp <<EOF && float_abi=hard || float_abi=softfp
 #ifndef __ARM_PCS_VFP
@@ -1048,6 +1048,14 @@ EOF
        esac
    ;;
    x86*)
+        bits=32
+        enabled x86_64 && bits=64
+        check_cpp <<EOF && bits=x32
+#ifndef __ILP32__
+#error "not x32"
+#endif
+EOF
+
        case  ${tgt_os} in
            win*)
                enabled gcc && add_cflags -fno-common
@@ -1086,6 +1094,8 @@ EOF
                esac
            ;;
            gcc*)
+                add_cflags -m${bits}
+                add_ldflags -m${bits}
                link_with_cc=gcc
                tune_cflags="-march="
                setup_gnu_toolchain
@@ -1110,20 +1120,6 @@ EOF
            ;;
        esac

-        bits=32
-        enabled x86_64 && bits=64
-        check_cpp <<EOF && bits=x32
-#ifndef __ILP32__
-#error "not x32"
-#endif
-EOF
-        case ${tgt_cc} in
-            gcc*)
-                add_cflags -m${bits}
-                add_ldflags -m${bits}
-            ;;
-        esac
-
        soft_enable runtime_cpu_detect
        # We can't use 'check_cflags' until the compiler is configured and CC is
        # populated.
@@ -1226,12 +1222,10 @@ EOF
        fi
    fi

-    tgt_os_no_version=$(echo "${tgt_os}" | tr -d "[0-9]")
-    # Default use_x86inc to yes when we are 64 bit, non-pic, or on any
-    # non-Darwin target.
-    if [ "${tgt_isa}" = "x86_64" ] || [ "${pic}" != "yes" ] || \
-            [ "${tgt_os_no_version}" != "darwin" ]; then
-        soft_enable use_x86inc
+    # default use_x86inc to yes if pic is no or 64bit or we are not on darwin
+    if [ ${tgt_isa} = x86_64 -o ! "$pic" = "yes" -o \
+         "${tgt_os#darwin}" = "${tgt_os}"  ]; then
+      soft_enable use_x86inc
    fi

    # Position Independent Code (PIC) support, for building relocatable
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -137,9 +137,7 @@ for opt in "$@"; do
        ;;
        --lib) proj_kind="lib"
        ;;
-        --src-path-bare=*)
-            src_path_bare=$(fix_path "$optval")
-            src_path_bare=${src_path_bare%/}
+        --src-path-bare=*) src_path_bare=$(fix_path "$optval")
        ;;
        --static-crt) use_static_runtime=true
        ;;
@@ -153,9 +151,9 @@ for opt in "$@"; do
            esac
        ;;
        -I*)
+            opt="${opt%/}"
            opt=${opt##-I}
            opt=$(fix_path "$opt")
-            opt="${opt%/}"
            incs="${incs}${incs:+;}&quot;${opt}&quot;"
            yasmincs="${yasmincs} -I&quot;${opt}&quot;"
        ;;
@@ -416,7 +414,7 @@ generate_vcproj() {
                    vpx)
                        tag Tool \
                            Name="VCPreBuildEventTool" \
-                            CommandLine="call obj_int_extract.bat &quot;$src_path_bare&quot; $plat_no_ws\\\$(ConfigurationName)" \
+                            CommandLine="call obj_int_extract.bat $src_path_bare $plat_no_ws\\\$(ConfigurationName)" \

                        tag Tool \
                            Name="VCCLCompilerTool" \
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -157,9 +157,7 @@ for opt in "$@"; do
        ;;
        --lib) proj_kind="lib"
        ;;
-        --src-path-bare=*)
-            src_path_bare=$(fix_path "$optval")
-            src_path_bare=${src_path_bare%/}
+        --src-path-bare=*) src_path_bare=$(fix_path "$optval")
        ;;
        --static-crt) use_static_runtime=true
        ;;
@@ -175,9 +173,9 @@ for opt in "$@"; do
            esac
        ;;
        -I*)
+            opt="${opt%/}"
            opt=${opt##-I}
            opt=$(fix_path "$opt")
-            opt="${opt%/}"
            incs="${incs}${incs:+;}&quot;${opt}&quot;"
            yasmincs="${yasmincs} -I&quot;${opt}&quot;"
        ;;
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -1,248 +0,0 @@
-#!/bin/sh
-##
-##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license
-##  that can be found in the LICENSE file in the root of the source
-##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may
-##  be found in the AUTHORS file in the root of the source tree.
-##
-##
-## This script generates 'VPX.framework'. An iOS app can encode and decode VPx
-## video by including 'VPX.framework'.
-##
-## Run iosbuild.sh to create 'VPX.framework' in the current directory.
-##
-set -e
-devnull='> /dev/null 2>&1'
-
-BUILD_ROOT="_iosbuild"
-DIST_DIR="_dist"
-FRAMEWORK_DIR="VPX.framework"
-HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
-MAKE_JOBS=1
-LIBVPX_SOURCE_DIR=$(dirname "$0" | sed -e s,/build/make,,)
-LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
-ORIG_PWD="$(pwd)"
-TARGETS="arm64-darwin-gcc
-         armv6-darwin-gcc
-         armv7-darwin-gcc
-         armv7s-darwin-gcc
-         x86-iphonesimulator-gcc
-         x86_64-iphonesimulator-gcc"
-
-# Configures for the target specified by $1, and invokes make with the dist
-# target using $DIST_DIR as the distribution output directory.
-build_target() {
-  local target="$1"
-  local old_pwd="$(pwd)"
-
-  vlog "***Building target: ${target}***"
-
-  mkdir "${target}"
-  cd "${target}"
-  eval "../../${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
-      --disable-docs ${devnull}
-  export DIST_DIR
-  eval make -j ${MAKE_JOBS} dist ${devnull}
-  cd "${old_pwd}"
-
-  vlog "***Done building target: ${target}***"
-}
-
-# Returns the preprocessor symbol for the target specified by $1.
-target_to_preproc_symbol() {
-  target="$1"
-  case "${target}" in
-    arm64-*)
-      echo "__aarch64__"
-      ;;
-    armv6-*)
-      echo "__ARM_ARCH_6__"
-      ;;
-    armv7-*)
-      echo "__ARM_ARCH_7__"
-      ;;
-    armv7s-*)
-      echo "__ARM_ARCH_7S__"
-      ;;
-    x86-*)
-      echo "__i386__"
-      ;;
-    x86_64-*)
-      echo "__x86_64__"
-      ;;
-    *)
-      echo "#error ${target} unknown/unsupported"
-      return 1
-      ;;
-  esac
-}
-
-# Create a vpx_config.h shim that, based on preprocessor settings for the
-# current target CPU, includes the real vpx_config.h for the current target.
-# $1 is the list of targets.
-create_vpx_framework_config_shim() {
-  local targets="$1"
-  local config_file="${HEADER_DIR}/vpx_config.h"
-  local preproc_symbol=""
-  local target=""
-  local include_guard="VPX_FRAMEWORK_HEADERS_VPX_VPX_CONFIG_H_"
-
-  local file_header="/*
- *  Copyright (c) $(date +%Y) The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/* GENERATED FILE: DO NOT EDIT! */
-
-#ifndef ${include_guard}
-#define ${include_guard}
-
-#if defined"
-
-  printf "%s" "${file_header}" > "${config_file}"
-  for target in ${targets}; do
-    preproc_symbol=$(target_to_preproc_symbol "${target}")
-    printf " ${preproc_symbol}\n" >> "${config_file}"
-    printf "#include \"VPX/vpx/${target}/vpx_config.h\"\n" >> "${config_file}"
-    printf "#elif defined" >> "${config_file}"
-    mkdir "${HEADER_DIR}/${target}"
-    cp -p "${BUILD_ROOT}/${target}/vpx_config.h" "${HEADER_DIR}/${target}"
-  done
-
-  # Consume the last line of output from the loop: We don't want it.
-  sed -i '' -e '$d' "${config_file}"
-
-  printf "#endif\n\n" >> "${config_file}"
-  printf "#endif  // ${include_guard}" >> "${config_file}"
-}
-
-# Configures and builds each target specified by $1, and then builds
-# VPX.framework.
-build_framework() {
-  local lib_list=""
-  local targets="$1"
-  local target=""
-  local target_dist_dir=""
-
-  # Clean up from previous build(s).
-  rm -rf "${BUILD_ROOT}" "${FRAMEWORK_DIR}"
-
-  # Create output dirs.
-  mkdir -p "${BUILD_ROOT}"
-  mkdir -p "${HEADER_DIR}"
-
-  cd "${BUILD_ROOT}"
-
-  for target in ${targets}; do
-    build_target "${target}"
-    target_dist_dir="${BUILD_ROOT}/${target}/${DIST_DIR}"
-    lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.a"
-  done
-
-  cd "${ORIG_PWD}"
-
-  # The basic libvpx API includes are all the same; just grab the most recent
-  # set.
-  cp -p "${target_dist_dir}"/include/vpx/* "${HEADER_DIR}"
-
-  # Build the fat library.
-  ${LIPO} -create ${lib_list} -output ${FRAMEWORK_DIR}/VPX
-
-  # Create the vpx_config.h shim that allows usage of vpx_config.h from
-  # within VPX.framework.
-  create_vpx_framework_config_shim "${targets}"
-
-  # Copy in vpx_version.h.
-  cp -p "${BUILD_ROOT}/${target}/vpx_version.h" "${HEADER_DIR}"
-
-  vlog "Created fat library ${FRAMEWORK_DIR}/VPX containing:"
-  for lib in ${lib_list}; do
-    vlog "  $(echo ${lib} | awk -F / '{print $2, $NF}')"
-  done
-
-  # TODO(tomfinegan): Verify that expected targets are included within
-  # VPX.framework/VPX via lipo -info.
-}
-
-# Trap function. Cleans up the subtree used to build all targets contained in
-# $TARGETS.
-cleanup() {
-  cd "${ORIG_PWD}"
-
-  if [ "${PRESERVE_BUILD_OUTPUT}" != "yes" ]; then
-    rm -rf "${BUILD_ROOT}"
-  fi
-}
-
-iosbuild_usage() {
-cat << EOF
-  Usage: ${0##*/} [arguments]
-    --help: Display this message and exit.
-    --jobs: Number of make jobs.
-    --preserve-build-output: Do not delete the build directory.
-    --show-build-output: Show output from each library build.
-    --verbose: Output information about the environment and each stage of the
-               build.
-EOF
-}
-
-vlog() {
-  if [ "${VERBOSE}" = "yes" ]; then
-    echo "$@"
-  fi
-}
-
-trap cleanup EXIT
-
-# Parse the command line.
-while [ -n "$1" ]; do
-  case "$1" in
-    --help)
-      iosbuild_usage
-      exit
-      ;;
-    --jobs)
-      MAKE_JOBS="$2"
-      shift
-      ;;
-    --preserve-build-output)
-      PRESERVE_BUILD_OUTPUT=yes
-      ;;
-    --show-build-output)
-      devnull=
-      ;;
-    --verbose)
-      VERBOSE=yes
-      ;;
-    *)
-      iosbuild_usage
-      exit 1
-      ;;
-  esac
-  shift
-done
-
-if [ "${VERBOSE}" = "yes" ]; then
-cat << EOF
-  BUILD_ROOT=${BUILD_ROOT}
-  DIST_DIR=${DIST_DIR}
-  FRAMEWORK_DIR=${FRAMEWORK_DIR}
-  HEADER_DIR=${HEADER_DIR}
-  MAKE_JOBS=${MAKE_JOBS}
-  PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
-  LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR}
-  LIPO=${LIPO}
-  ORIG_PWD=${ORIG_PWD}
-  TARGETS="${TARGETS}"
-EOF
-fi
-
-build_framework "${TARGETS}"
--- a/5
+++ b/5
@@ -96,7 +96,6 @@ all_platforms="${all_platforms} armv6-darwin-gcc"
 all_platforms="${all_platforms} armv6-linux-rvct"
 all_platforms="${all_platforms} armv6-linux-gcc"
 all_platforms="${all_platforms} armv6-none-rvct"
-all_platforms="${all_platforms} arm64-darwin-gcc"
 all_platforms="${all_platforms} armv7-android-gcc"   #neon Cortex-A8
 all_platforms="${all_platforms} armv7-darwin-gcc"    #neon Cortex-A8
 all_platforms="${all_platforms} armv7-linux-rvct"    #neon Cortex-A8
@@ -104,7 +103,6 @@ all_platforms="${all_platforms} armv7-linux-gcc"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-none-rvct"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-win32-vs11"
 all_platforms="${all_platforms} armv7-win32-vs12"
-all_platforms="${all_platforms} armv7s-darwin-gcc"
 all_platforms="${all_platforms} mips32-linux-gcc"
 all_platforms="${all_platforms} ppc32-darwin8-gcc"
 all_platforms="${all_platforms} ppc32-darwin9-gcc"
@@ -273,8 +271,7 @@ EXPERIMENT_LIST="
    alpha
    multiple_arf
    spatial_svc
-    denoising
-    fp_mb_stats
+    transcode
 "
 CONFIG_LIST="
    external_build
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -28,6 +28,16 @@
 #include "vpx/vpx_encoder.h"
 #include "./vpxstats.h"

+static const struct arg_enum_list encoding_mode_enum[] = {
+  {"i", INTER_LAYER_PREDICTION_I},
+  {"alt-ip", ALT_INTER_LAYER_PREDICTION_IP},
+  {"ip", INTER_LAYER_PREDICTION_IP},
+  {"gf", USE_GOLDEN_FRAME},
+  {NULL, 0}
+};
+
+static const arg_def_t encoding_mode_arg = ARG_DEF_ENUM(
+    "m", "encoding-mode", 1, "Encoding mode algorithm", encoding_mode_enum);
 static const arg_def_t skip_frames_arg =
    ARG_DEF("s", "skip-frames", 1, "input frames to skip");
 static const arg_def_t frames_arg =
@@ -48,6 +58,9 @@ static const arg_def_t quantizers_arg =
    ARG_DEF("q", "quantizers", 1, "quantizers for non key frames, also will "
            "be applied to key frames if -qn is not specified (lowest to "
            "highest layer)");
+static const arg_def_t quantizers_keyframe_arg =
+    ARG_DEF("qn", "quantizers-keyframe", 1, "quantizers for key frames (lowest "
+        "to highest layer)");
 static const arg_def_t passes_arg =
    ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
 static const arg_def_t pass_arg =
@@ -64,13 +77,16 @@ static const arg_def_t max_bitrate_arg =
    ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");

 static const arg_def_t *svc_args[] = {
-  &frames_arg,        &width_arg,         &height_arg,
+  &encoding_mode_arg, &frames_arg,        &width_arg,       &height_arg,
  &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &layers_arg,
-  &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,  &passes_arg,
-  &pass_arg,          &fpf_name_arg,      &min_q_arg,       &max_q_arg,
-  &min_bitrate_arg,   &max_bitrate_arg,   NULL
+  &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,
+  &quantizers_keyframe_arg,               &passes_arg,      &pass_arg,
+  &fpf_name_arg,      &min_q_arg,         &max_q_arg,       &min_bitrate_arg,
+  &max_bitrate_arg,   NULL
 };

+static const SVC_ENCODING_MODE default_encoding_mode =
+    INTER_LAYER_PREDICTION_IP;
 static const uint32_t default_frames_to_skip = 0;
 static const uint32_t default_frames_to_code = 60 * 60;
 static const uint32_t default_width = 1920;
@@ -119,6 +135,7 @@ static void parse_command_line(int argc, const char **argv_,
  // initialize SvcContext with parameters that will be passed to vpx_svc_init
  svc_ctx->log_level = SVC_LOG_DEBUG;
  svc_ctx->spatial_layers = default_spatial_layers;
+  svc_ctx->encoding_mode = default_encoding_mode;

  // start with default encoder configuration
  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
@@ -144,7 +161,9 @@ static void parse_command_line(int argc, const char **argv_,
  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
    arg.argv_step = 1;

-    if (arg_match(&arg, &frames_arg, argi)) {
+    if (arg_match(&arg, &encoding_mode_arg, argi)) {
+      svc_ctx->encoding_mode = arg_parse_enum_or_int(&arg);
+    } else if (arg_match(&arg, &frames_arg, argi)) {
      app_input->frames_to_code = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &width_arg, argi)) {
      enc_cfg->g_w = arg_parse_uint(&arg);
@@ -164,7 +183,9 @@ static void parse_command_line(int argc, const char **argv_,
    } else if (arg_match(&arg, &scale_factors_arg, argi)) {
      vpx_svc_set_scale_factors(svc_ctx, arg.val);
    } else if (arg_match(&arg, &quantizers_arg, argi)) {
-      vpx_svc_set_quantizers(svc_ctx, arg.val);
+      vpx_svc_set_quantizers(svc_ctx, arg.val, 0);
+    } else if (arg_match(&arg, &quantizers_keyframe_arg, argi)) {
+      vpx_svc_set_quantizers(svc_ctx, arg.val, 1);
    } else if (arg_match(&arg, &passes_arg, argi)) {
      passes = arg_parse_uint(&arg);
      if (passes < 1 || passes > 2) {
@@ -249,12 +270,12 @@ static void parse_command_line(int argc, const char **argv_,

  printf(
      "Codec %s\nframes: %d, skip: %d\n"
-      "layers: %d\n"
+      "mode: %d, layers: %d\n"
      "width %d, height: %d,\n"
      "num: %d, den: %d, bitrate: %d,\n"
      "gop size: %d\n",
      vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
-      app_input->frames_to_skip,
+      app_input->frames_to_skip, svc_ctx->encoding_mode,
      svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
      enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
@@ -275,7 +296,6 @@ int main(int argc, const char **argv) {
  int frame_duration = 1; /* 1 timebase tick per frame */
  FILE *infile = NULL;
  int end_of_stream = 0;
-  int frame_size;

  memset(&svc_ctx, 0, sizeof(svc_ctx));
  svc_ctx.log_print = 1;
@@ -331,10 +351,11 @@ int main(int argc, const char **argv) {
      die_codec(&codec, "Failed to encode frame");
    }
    if (!(app_input.passes == 2 && app_input.pass == 1)) {
-      while ((frame_size = vpx_svc_get_frame_size(&svc_ctx)) > 0) {
+      if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
        vpx_video_writer_write_frame(writer,
                                     vpx_svc_get_buffer(&svc_ctx),
-                                     frame_size, pts);
+                                     vpx_svc_get_frame_size(&svc_ctx),
+                                     pts);
      }
    }
    if (vpx_svc_get_rc_stats_buffer_size(&svc_ctx) > 0) {
--- a/libs.mk
+++ b/libs.mk
@@ -170,7 +170,7 @@ CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_once.h
 CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
 INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
 ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
-INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
+CODEC_SRCS-$(BUILD_LIBVPX) += third_party/x86inc/x86inc.asm
 endif
 CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -35,10 +35,6 @@ class CodecFactory {
  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const = 0;

-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 const vpx_codec_flags_t flags,
-                                 unsigned long deadline) const = 0;  // NOLINT
-
  virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
                                 unsigned long deadline,
                                 const unsigned long init_flags,
@@ -76,10 +72,6 @@ class VP8Decoder : public Decoder {
  VP8Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
      : Decoder(cfg, deadline) {}

-  VP8Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
-             unsigned long deadline)  // NOLINT
-      : Decoder(cfg, flag, deadline) {}
-
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP8_DECODER
@@ -112,14 +104,8 @@ class VP8CodecFactory : public CodecFactory {

  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const {
-    return CreateDecoder(cfg, 0, deadline);
-  }
-
-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 const vpx_codec_flags_t flags,
-                                 unsigned long deadline) const {  // NOLINT
 #if CONFIG_VP8_DECODER
-    return new VP8Decoder(cfg, flags, deadline);
+    return new VP8Decoder(cfg, deadline);
 #else
    return NULL;
 #endif
@@ -168,10 +154,6 @@ class VP9Decoder : public Decoder {
  VP9Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
      : Decoder(cfg, deadline) {}

-  VP9Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
-             unsigned long deadline)  // NOLINT
-      : Decoder(cfg, flag, deadline) {}
-
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP9_DECODER
@@ -204,14 +186,8 @@ class VP9CodecFactory : public CodecFactory {

  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const {
-    return CreateDecoder(cfg, 0, deadline);
-  }
-
-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 const vpx_codec_flags_t flags,
-                                 unsigned long deadline) const {  // NOLINT
 #if CONFIG_VP9_DECODER
-    return new VP9Decoder(cfg, flags, deadline);
+    return new VP9Decoder(cfg, deadline);
 #else
    return NULL;
 #endif
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -645,26 +645,6 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
 #endif

 #if HAVE_AVX2
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
-                             uint8_t *dst, ptrdiff_t dst_stride,
-                             const int16_t *filter_x, int x_step_q4,
-                             const int16_t *filter_y, int y_step_q4,
-                             int w, int h);
-void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
-                              uint8_t *dst, ptrdiff_t dst_stride,
-                              const int16_t *filter_x, int x_step_q4,
-                              const int16_t *filter_y, int y_step_q4,
-                              int w, int h);
-void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
-                        uint8_t *dst, ptrdiff_t dst_stride,
-                        const int16_t *filter_x, int x_step_q4,
-                        const int16_t *filter_y, int y_step_q4,
-                        int w, int h);
-}
-
 const ConvolveFunctions convolve8_avx2(
    vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
    vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
@@ -675,10 +655,8 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
    make_tuple(8, 4, &convolve8_avx2),
    make_tuple(4, 8, &convolve8_avx2),
    make_tuple(8, 8, &convolve8_avx2),
-    make_tuple(8, 16, &convolve8_avx2)));
-
-INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, ConvolveTest, ::testing::Values(
    make_tuple(16, 8, &convolve8_avx2),
+    make_tuple(8, 16, &convolve8_avx2),
    make_tuple(16, 16, &convolve8_avx2),
    make_tuple(32, 16, &convolve8_avx2),
    make_tuple(16, 32, &convolve8_avx2),
--- a/test/cpu_speed_test.cc
+++ b/test/cpu_speed_test.cc
@@ -14,49 +14,30 @@
 #include "test/encode_test_driver.h"
 #include "test/i420_video_source.h"
 #include "test/util.h"
-#include "test/y4m_video_source.h"

 namespace {

-const int kMaxPSNR = 100;
-
 class CpuSpeedTest : public ::libvpx_test::EncoderTest,
    public ::libvpx_test::CodecTestWith2Params<
        libvpx_test::TestMode, int> {
 protected:
-  CpuSpeedTest()
-      : EncoderTest(GET_PARAM(0)),
-        encoding_mode_(GET_PARAM(1)),
-        set_cpu_used_(GET_PARAM(2)),
-        min_psnr_(kMaxPSNR) {}
+  CpuSpeedTest() : EncoderTest(GET_PARAM(0)) {}
  virtual ~CpuSpeedTest() {}

  virtual void SetUp() {
    InitializeConfig();
-    SetMode(encoding_mode_);
-    if (encoding_mode_ != ::libvpx_test::kRealTime) {
-      cfg_.g_lag_in_frames = 25;
-      cfg_.rc_end_usage = VPX_VBR;
-    } else {
-      cfg_.g_lag_in_frames = 0;
-      cfg_.rc_end_usage = VPX_CBR;
-    }
-  }
-
-  virtual void BeginPassHook(unsigned int /*pass*/) {
-    min_psnr_ = kMaxPSNR;
+    SetMode(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
  }

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
    if (video->frame() == 1) {
      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
-      if (encoding_mode_ != ::libvpx_test::kRealTime) {
-        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
-        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
-        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
-        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
-      }
+      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+      encoder->Control(VP8E_SET_ARNR_TYPE, 3);
    }
  }

@@ -64,15 +45,7 @@ class CpuSpeedTest : public ::libvpx_test::EncoderTest,
    if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
    }
  }
-
-  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
-    if (pkt->data.psnr.psnr[0] < min_psnr_)
-      min_psnr_ = pkt->data.psnr.psnr[0];
-  }
-
-  ::libvpx_test::TestMode encoding_mode_;
  int set_cpu_used_;
-  double min_psnr_;
 };

 TEST_P(CpuSpeedTest, TestQ0) {
@@ -80,6 +53,7 @@ TEST_P(CpuSpeedTest, TestQ0) {
  // without a mismatch when passing in a very low max q.  This pushes
  // the encoder to producing lots of big partitions which will likely
  // extend into the border and test the border condition.
+  cfg_.g_lag_in_frames = 25;
  cfg_.rc_2pass_vbr_minsection_pct = 5;
  cfg_.rc_2pass_vbr_minsection_pct = 2000;
  cfg_.rc_target_bitrate = 400;
@@ -89,32 +63,16 @@ TEST_P(CpuSpeedTest, TestQ0) {
  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
                                       20);

-  init_flags_ = VPX_CODEC_USE_PSNR;
-
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  EXPECT_GE(min_psnr_, kMaxPSNR);
 }

-TEST_P(CpuSpeedTest, TestScreencastQ0) {
-  ::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25);
-  cfg_.g_timebase = video.timebase();
-  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_minsection_pct = 2000;
-  cfg_.rc_target_bitrate = 400;
-  cfg_.rc_max_quantizer = 0;
-  cfg_.rc_min_quantizer = 0;
-
-  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  EXPECT_GE(min_psnr_, kMaxPSNR);
-}

 TEST_P(CpuSpeedTest, TestEncodeHighBitrate) {
  // Validate that this non multiple of 64 wide clip encodes and decodes
  // without a mismatch when passing in a very low max q.  This pushes
  // the encoder to producing lots of big partitions which will likely
  // extend into the border and test the border condition.
+  cfg_.g_lag_in_frames = 25;
  cfg_.rc_2pass_vbr_minsection_pct = 5;
  cfg_.rc_2pass_vbr_minsection_pct = 2000;
  cfg_.rc_target_bitrate = 12000;
@@ -131,6 +89,7 @@ TEST_P(CpuSpeedTest, TestLowBitrate) {
  // when passing in a very high min q.  This pushes the encoder to producing
  // lots of small partitions which might will test the other condition.

+  cfg_.g_lag_in_frames = 25;
  cfg_.rc_2pass_vbr_minsection_pct = 5;
  cfg_.rc_2pass_vbr_minsection_pct = 2000;
  cfg_.rc_target_bitrate = 200;
@@ -149,7 +108,6 @@ using std::tr1::make_tuple;

 VP9_INSTANTIATE_TEST_CASE(
    CpuSpeedTest,
-    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
-                      ::libvpx_test::kRealTime),
+    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
    ::testing::Range(0, 8));
 }  // namespace
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -576,7 +576,7 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
    // Expect some frame drops in this test: for this 200 frames test,
    // expect at least 10% and not more than 60% drops.
    ASSERT_GE(num_drops_, 20);
-    ASSERT_LE(num_drops_, 130);
+    ASSERT_LE(num_drops_, 120);
  }
 }

--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -606,29 +606,4 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0)));
 #endif
-
-#if HAVE_AVX2
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride);
-void vp9_fht16x16_avx2(const int16_t *input, int16_t *output, int stride,
-                       int tx_type);
-}
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, Trans16x16DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_avx2,
-                   &vp9_idct16x16_256_add_c, 0)));
-INSTANTIATE_TEST_CASE_P(
-    AVX2, Trans16x16HT,
-    ::testing::Values(
-        make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 3)));
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, Trans16x16HT,
-    ::testing::Values(
-        make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 0),
-        make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 1),
-        make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 2)));
-#endif
 }  // namespace
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -15,73 +15,27 @@

 namespace libvpx_test {

-const char kVP8Name[] = "WebM Project VP8";
-
-vpx_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size,
-                                    vpx_codec_stream_info_t *stream_info) {
-  return vpx_codec_peek_stream_info(CodecInterface(),
-                                    cxdata, static_cast<unsigned int>(size),
-                                    stream_info);
-}
-
 vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) {
-  return DecodeFrame(cxdata, size, NULL);
-}
-
-vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size,
-                                     void *user_priv) {
  vpx_codec_err_t res_dec;
  InitOnce();
  REGISTER_STATE_CHECK(
      res_dec = vpx_codec_decode(&decoder_,
                                 cxdata, static_cast<unsigned int>(size),
-                                 user_priv, 0));
+                                 NULL, 0));
  return res_dec;
 }

 void DecoderTest::RunLoop(CompressedVideoSource *video) {
-  Decoder* const decoder = codec_->CreateDecoder(cfg_, flags_, 0);
+  vpx_codec_dec_cfg_t dec_cfg = {0};
+  Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
  ASSERT_TRUE(decoder != NULL);
-  const char *codec_name = decoder->GetDecoderName();
-  const bool is_vp8 = strncmp(kVP8Name, codec_name, sizeof(kVP8Name) - 1) == 0;
-  bool end_of_file = false;

  // Decode frames.
-  for (video->Begin(); !::testing::Test::HasFailure() && !end_of_file;
-       video->Next()) {
+  for (video->Begin(); video->cxdata(); video->Next()) {
    PreDecodeFrameHook(*video, decoder);
-
-    vpx_codec_stream_info_t stream_info;
-    stream_info.sz = sizeof(stream_info);
-
-    if (video->cxdata() != NULL) {
-      const vpx_codec_err_t res_peek = decoder->PeekStream(video->cxdata(),
-                                                           video->frame_size(),
-                                                           &stream_info);
-      if (is_vp8) {
-        /* Vp8's implementation of PeekStream returns an error if the frame you
-         * pass it is not a keyframe, so we only expect VPX_CODEC_OK on the
-         * first frame, which must be a keyframe. */
-        if (video->frame_number() == 0)
-          ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: "
-              << vpx_codec_err_to_string(res_peek);
-      } else {
-        /* The Vp9 implementation of PeekStream returns an error only if the
-         * data passed to it isn't a valid Vp9 chunk. */
-        ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: "
-            << vpx_codec_err_to_string(res_peek);
-      }
-
-      vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(),
-                                                     video->frame_size());
-      if (!HandleDecodeResult(res_dec, *video, decoder))
-        break;
-    } else {
-      // Signal end of the file to the decoder.
-      const vpx_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0);
-      ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
-      end_of_file = true;
-    }
+    vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(),
+                                                   video->frame_size());
+    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();

    DxDataIterator dec_iter = decoder->GetDxData();
    const vpx_image_t *img = NULL;
@@ -93,12 +47,4 @@ void DecoderTest::RunLoop(CompressedVideoSource *video) {

  delete decoder;
 }
-
-void DecoderTest::set_cfg(const vpx_codec_dec_cfg_t &dec_cfg) {
-  memcpy(&cfg_, &dec_cfg, sizeof(cfg_));
-}
-
-void DecoderTest::set_flags(const vpx_codec_flags_t flags) {
-  flags_ = flags;
-}
 }  // namespace libvpx_test
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -41,13 +41,7 @@ class DxDataIterator {
 class Decoder {
 public:
  Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
-      : cfg_(cfg), flags_(0), deadline_(deadline), init_done_(false) {
-    memset(&decoder_, 0, sizeof(decoder_));
-  }
-
-  Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
-          unsigned long deadline)  // NOLINT
-      : cfg_(cfg), flags_(flag), deadline_(deadline), init_done_(false) {
+      : cfg_(cfg), deadline_(deadline), init_done_(false) {
    memset(&decoder_, 0, sizeof(decoder_));
  }

@@ -55,14 +49,8 @@ class Decoder {
    vpx_codec_destroy(&decoder_);
  }

-  vpx_codec_err_t PeekStream(const uint8_t *cxdata, size_t size,
-                             vpx_codec_stream_info_t *stream_info);
-
  vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size);

-  vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size,
-                              void *user_priv);
-
  DxDataIterator GetDxData() {
    return DxDataIterator(&decoder_);
  }
@@ -97,10 +85,6 @@ class Decoder {
        &decoder_, cb_get, cb_release, user_priv);
  }

-  const char* GetDecoderName() {
-    return vpx_codec_iface_name(CodecInterface());
-  }
-
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const = 0;

@@ -108,7 +92,7 @@ class Decoder {
    if (!init_done_) {
      const vpx_codec_err_t res = vpx_codec_dec_init(&decoder_,
                                                     CodecInterface(),
-                                                     &cfg_, flags_);
+                                                     &cfg_, 0);
      ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
      init_done_ = true;
    }
@@ -116,7 +100,6 @@ class Decoder {

  vpx_codec_ctx_t     decoder_;
  vpx_codec_dec_cfg_t cfg_;
-  vpx_codec_flags_t   flags_;
  unsigned int        deadline_;
  bool                init_done_;
 };
@@ -127,35 +110,20 @@ class DecoderTest {
  // Main decoding loop
  virtual void RunLoop(CompressedVideoSource *video);

-  virtual void set_cfg(const vpx_codec_dec_cfg_t &dec_cfg);
-  virtual void set_flags(const vpx_codec_flags_t flags);
-
  // Hook to be called before decompressing every frame.
  virtual void PreDecodeFrameHook(const CompressedVideoSource& video,
                                  Decoder *decoder) {}

-  // Hook to be called to handle decode result. Return true to continue.
-  virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
-                                  const CompressedVideoSource& /* video */,
-                                  Decoder *decoder) {
-    EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
-    return VPX_CODEC_OK == res_dec;
-  }
-
  // Hook to be called on every decompressed frame.
  virtual void DecompressedFrameHook(const vpx_image_t& img,
                                     const unsigned int frame_number) {}

 protected:
-  explicit DecoderTest(const CodecFactory *codec) : codec_(codec), flags_(0) {
-    memset(&cfg_, 0, sizeof(cfg_));
-  }
+  explicit DecoderTest(const CodecFactory *codec) : codec_(codec) {}

  virtual ~DecoderTest() {}

  const CodecFactory *codec_;
-  vpx_codec_dec_cfg_t cfg_;
-  vpx_codec_flags_t   flags_;
 };

 }  // namespace libvpx_test
--- a/test/decode_to_md5.sh
+++ b/test/decode_to_md5.sh
@@ -34,10 +34,7 @@ decode_to_md5() {
  local expected_md5="$3"
  local output_file="${VPX_TEST_OUTPUT_DIR}/decode_to_md5_${codec}"

-  if [ ! -x "${decoder}" ]; then
-    elog "${decoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${decoder}" ] || return 1

  eval "${decoder}" "${input_file}" "${output_file}" ${devnull}

--- a/test/decode_with_drops.sh
+++ b/test/decode_with_drops.sh
@@ -34,10 +34,7 @@ decode_with_drops() {
  local output_file="${VPX_TEST_OUTPUT_DIR}/decode_with_drops_${codec}"
  local drop_mode="$3"

-  if [ ! -x "${decoder}" ]; then
-    elog "${decoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${decoder}" ] || return 1

  eval "${decoder}" "${input_file}" "${output_file}" "${drop_mode}" ${devnull}

--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -376,19 +376,4 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3)));
 #endif

-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
-    AVX2, Trans4x4DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_avx2,
-                   &vp9_idct4x4_16_add_c, 0)));
-INSTANTIATE_TEST_CASE_P(
-    AVX2, Trans4x4HT,
-    ::testing::Values(
-        make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 0),
-        make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 1),
-        make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 2),
-        make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 3)));
-#endif
-
 }  // namespace
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -367,18 +367,4 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0)));
 #endif
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
-    AVX2, FwdTrans8x8DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_avx2, &vp9_idct8x8_64_add_c, 0)));
-INSTANTIATE_TEST_CASE_P(
-    AVX2, FwdTrans8x8HT,
-    ::testing::Values(
-        make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 0),
-        make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 1),
-        make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 2),
-        make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 3)));
-#endif
 }  // namespace
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -1,112 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include <vector>
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/ivf_video_source.h"
-#include "test/util.h"
-#if CONFIG_WEBM_IO
-#include "test/webm_video_source.h"
-#endif
-#include "vpx_mem/vpx_mem.h"
-
-namespace {
-
-class InvalidFileTest
-    : public ::libvpx_test::DecoderTest,
-      public ::libvpx_test::CodecTestWithParam<const char*> {
- protected:
-  InvalidFileTest() : DecoderTest(GET_PARAM(0)), res_file_(NULL) {}
-
-  virtual ~InvalidFileTest() {
-    if (res_file_ != NULL)
-      fclose(res_file_);
-  }
-
-  void OpenResFile(const std::string &res_file_name_) {
-    res_file_ = libvpx_test::OpenTestDataFile(res_file_name_);
-    ASSERT_TRUE(res_file_ != NULL) << "Result file open failed. Filename: "
-        << res_file_name_;
-  }
-
-  virtual bool HandleDecodeResult(
-      const vpx_codec_err_t res_dec,
-      const libvpx_test::CompressedVideoSource &video,
-      libvpx_test::Decoder *decoder) {
-    EXPECT_TRUE(res_file_ != NULL);
-    int expected_res_dec;
-
-    // Read integer result.
-    const int res = fscanf(res_file_, "%d", &expected_res_dec);
-    EXPECT_NE(res, EOF) << "Read result data failed";
-
-    // Check results match.
-    EXPECT_EQ(expected_res_dec, res_dec)
-        << "Results don't match: frame number = " << video.frame_number();
-
-    return !HasFailure();
-  }
-
- private:
-  FILE *res_file_;
-};
-
-TEST_P(InvalidFileTest, ReturnCode) {
-  const std::string filename = GET_PARAM(1);
-  libvpx_test::CompressedVideoSource *video = NULL;
-
-  // Open compressed video file.
-  if (filename.substr(filename.length() - 3, 3) == "ivf") {
-    video = new libvpx_test::IVFVideoSource(filename);
-  } else if (filename.substr(filename.length() - 4, 4) == "webm") {
-#if CONFIG_WEBM_IO
-    video = new libvpx_test::WebMVideoSource(filename);
-#else
-    fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n",
-            filename.c_str());
-    return;
-#endif
-  }
-  video->Init();
-
-  // Construct result file name. The file holds a list of expected integer
-  // results, one for each decoded frame.  Any result that doesn't match
-  // the files list will cause a test failure.
-  const std::string res_filename = filename + ".res";
-  OpenResFile(res_filename);
-
-  // Decode frame, and check the md5 matching.
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
-  delete video;
-}
-
-const char *const kVP9InvalidFileTests[] = {
-  "invalid-vp90-01.webm",
-  "invalid-vp90-02.webm",
-  "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf",
-  "invalid-vp90-03-v3.webm",
-  "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf",
-  "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf",
-};
-
-#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
-
-VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
-                          ::testing::ValuesIn(kVP9InvalidFileTests,
-                                              kVP9InvalidFileTests +
-                                              NELEMENTS(kVP9InvalidFileTests)));
-
-}  // namespace
--- a/test/md5_helper.h
+++ b/test/md5_helper.h
@@ -28,11 +28,10 @@ class MD5 {
      // plane, we never want to round down and thus skip a pixel so if
      // we are shifting by 1 (chroma_shift) we add 1 before doing the shift.
      // This works only for chroma_shift of 0 and 1.
-      const int bytes_per_sample = (img->fmt & VPX_IMG_FMT_HIGH) ? 2 : 1;
      const int h = plane ? (img->d_h + img->y_chroma_shift) >>
                    img->y_chroma_shift : img->d_h;
-      const int w = (plane ? (img->d_w + img->x_chroma_shift) >>
-                     img->x_chroma_shift : img->d_w) * bytes_per_sample;
+      const int w = plane ? (img->d_w + img->x_chroma_shift) >>
+                    img->x_chroma_shift : img->d_w;

      for (int y = 0; y < h; ++y) {
        MD5Update(&md5_, buf, w);
--- a/test/postproc.sh
+++ b/test/postproc.sh
@@ -32,10 +32,7 @@ postproc() {
  local codec="$2"
  local output_file="${VPX_TEST_OUTPUT_DIR}/postproc_${codec}.raw"

-  if [ ! -x "${decoder}" ]; then
-    elog "${decoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${decoder}" ] || return 1

  eval "${decoder}" "${input_file}" "${output_file}" ${devnull}

--- a/test/resize_util.sh
+++ b/test/resize_util.sh
@@ -33,10 +33,7 @@ resize_util() {

  # resize_util is available only when CONFIG_SHARED is disabled.
  if [ -z "$(vpx_config_option_enabled CONFIG_SHARED)" ]; then
-    if [ ! -x "${resizer}" ]; then
-      elog "${resizer} does not exist or is not executable."
-      return 1
-    fi
+    [ -x "${resizer}" ] || return 1

    eval "${resizer}" "${YUV_RAW_INPUT}" \
        "${YUV_RAW_INPUT_WIDTH}x${YUV_RAW_INPUT_HEIGHT}" \
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -627,24 +627,4 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
 #endif  // CONFIG_USE_X86INC
 #endif  // HAVE_SSSE3

-#if HAVE_AVX2
-#if CONFIG_VP9_ENCODER
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
-                          const uint8_t *const ref_ptr[], int ref_stride,
-                          unsigned int *sad_array);
-void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride,
-                          const uint8_t *const ref_ptr[], int ref_stride,
-                          unsigned int *sad_array);
-}
-const sad_n_by_n_by_4_fn_t sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
-const sad_n_by_n_by_4_fn_t sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
-INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, SADx4Test, ::testing::Values(
-                        make_tuple(32, 32, sad_32x32x4d_avx2),
-                        make_tuple(64, 64, sad_64x64x4d_avx2)));
-#endif  // CONFIG_VP9_ENCODER
-#endif  // HAVE_AVX2
-
 }  // namespace
--- a/test/simple_decoder.sh
+++ b/test/simple_decoder.sh
@@ -32,10 +32,7 @@ simple_decoder() {
  local codec="$2"
  local output_file="${VPX_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw"

-  if [ ! -x "${decoder}" ]; then
-    elog "${decoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${decoder}" ] || return 1

  eval "${decoder}" "${input_file}" "${output_file}" ${devnull}

--- a/test/simple_encoder.sh
+++ b/test/simple_encoder.sh
@@ -29,10 +29,7 @@ simple_encoder() {
  local codec="$1"
  local output_file="${VPX_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf"

-  if [ ! -x "${encoder}" ]; then
-    elog "${encoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${encoder}" ] || return 1

  eval "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 \
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -31,6 +31,7 @@ class SvcTest : public ::testing::Test {
  SvcTest()
      : codec_iface_(0),
        test_file_name_("hantro_collage_w352h288.yuv"),
+        stats_file_name_("hantro_collage_w352h288.stat"),
        codec_initialized_(false),
        decoder_(0) {
    memset(&svc_, 0, sizeof(svc_));
@@ -41,6 +42,7 @@ class SvcTest : public ::testing::Test {
  virtual ~SvcTest() {}

  virtual void SetUp() {
+    svc_.encoding_mode = INTER_LAYER_PREDICTION_IP;
    svc_.log_level = SVC_LOG_DEBUG;
    svc_.log_print = 0;

@@ -72,6 +74,7 @@ class SvcTest : public ::testing::Test {
  struct vpx_codec_enc_cfg codec_enc_;
  vpx_codec_iface_t *codec_iface_;
  std::string test_file_name_;
+  std::string stats_file_name_;
  bool codec_initialized_;
  Decoder *decoder_;
 };
@@ -130,13 +133,22 @@ TEST_F(SvcTest, SetLayersOption) {
  EXPECT_EQ(3, svc_.spatial_layers);
 }

+TEST_F(SvcTest, SetEncodingMode) {
+  vpx_codec_err_t res = vpx_svc_set_options(&svc_, "encoding-mode=alt-ip");
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  codec_initialized_ = true;
+  EXPECT_EQ(ALT_INTER_LAYER_PREDICTION_IP, svc_.encoding_mode);
+}
+
 TEST_F(SvcTest, SetMultipleOptions) {
-  vpx_codec_err_t res =
-      vpx_svc_set_options(&svc_, "layers=2 scale-factors=1/3,2/3");
+  vpx_codec_err_t res = vpx_svc_set_options(&svc_, "layers=2 encoding-mode=ip");
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_OK, res);
  codec_initialized_ = true;
  EXPECT_EQ(2, svc_.spatial_layers);
+  EXPECT_EQ(INTER_LAYER_PREDICTION_IP, svc_.encoding_mode);
 }

 TEST_F(SvcTest, SetScaleFactorsOption) {
@@ -167,20 +179,48 @@ TEST_F(SvcTest, SetQuantizersOption) {
  codec_initialized_ = true;
 }

-TEST_F(SvcTest, SetQuantizers) {
-  vpx_codec_err_t res = vpx_svc_set_quantizers(NULL, "40,30");
-  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
-
-  res = vpx_svc_set_quantizers(&svc_, NULL);
-  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
-
+TEST_F(SvcTest, SetKeyFrameQuantizersOption) {
  svc_.spatial_layers = 2;
-  res = vpx_svc_set_quantizers(&svc_, "40");
+  vpx_codec_err_t res = vpx_svc_set_options(&svc_,
+                                       "quantizers-keyframe=not-quantizers");
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);

-  res = vpx_svc_set_quantizers(&svc_, "40,30");
+  vpx_svc_set_options(&svc_, "quantizers-keyframe=40,45");
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  codec_initialized_ = true;
+}
+
+TEST_F(SvcTest, SetQuantizers) {
+  vpx_codec_err_t res = vpx_svc_set_quantizers(NULL, "40,30", 0);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_set_quantizers(&svc_, NULL, 0);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  svc_.spatial_layers = 2;
+  res = vpx_svc_set_quantizers(&svc_, "40", 0);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_set_quantizers(&svc_, "40,30", 0);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  codec_initialized_ = true;
+}
+
+TEST_F(SvcTest, SetKeyFrameQuantizers) {
+  vpx_codec_err_t res = vpx_svc_set_quantizers(NULL, "40,31", 1);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_set_quantizers(&svc_, NULL, 1);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_set_quantizers(&svc_, "40,30", 1);
  EXPECT_EQ(VPX_CODEC_OK, res);
  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  EXPECT_EQ(VPX_CODEC_OK, res);
@@ -211,7 +251,7 @@ TEST_F(SvcTest, SetScaleFactors) {
 TEST_F(SvcTest, FirstFrameHasLayers) {
  svc_.spatial_layers = 2;
  vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
-  vpx_svc_set_quantizers(&svc_, "40,30");
+  vpx_svc_set_quantizers(&svc_, "40,30", 0);

  vpx_codec_err_t res =
      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
@@ -227,17 +267,9 @@ TEST_F(SvcTest, FirstFrameHasLayers) {
                       video.duration(), VPX_DL_GOOD_QUALITY);
  EXPECT_EQ(VPX_CODEC_OK, res);

-  if (vpx_svc_get_frame_size(&svc_) == 0) {
-    // Flush encoder
-    res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
-                         video.duration(), VPX_DL_GOOD_QUALITY);
-    EXPECT_EQ(VPX_CODEC_OK, res);
-  }
-
-  int frame_size = vpx_svc_get_frame_size(&svc_);
-  EXPECT_GT(frame_size, 0);
  const vpx_codec_err_t res_dec = decoder_->DecodeFrame(
-      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));

  // this test fails with a decoder error
  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
@@ -246,10 +278,7 @@ TEST_F(SvcTest, FirstFrameHasLayers) {
 TEST_F(SvcTest, EncodeThreeFrames) {
  svc_.spatial_layers = 2;
  vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
-  vpx_svc_set_quantizers(&svc_, "40,30");
-  int decoded_frames = 0;
-  vpx_codec_err_t res_dec;
-  int frame_size;
+  vpx_svc_set_quantizers(&svc_, "40,30", 0);

  vpx_codec_err_t res =
      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
@@ -264,14 +293,13 @@ TEST_F(SvcTest, EncodeThreeFrames) {
  // This frame is a keyframe.
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));

-  if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
+  vpx_codec_err_t res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();

  // FRAME 1
  video.Next();
@@ -279,14 +307,12 @@ TEST_F(SvcTest, EncodeThreeFrames) {
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));

-  if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
+  res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();

  // FRAME 2
  video.Next();
@@ -294,35 +320,18 @@ TEST_F(SvcTest, EncodeThreeFrames) {
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));

-  if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
-
-  // Flush encoder
-  res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
-                       video.duration(), VPX_DL_GOOD_QUALITY);
-  EXPECT_EQ(VPX_CODEC_OK, res);
-
-  while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
-
-  EXPECT_EQ(decoded_frames, 3);
+  res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
 }

 TEST_F(SvcTest, GetLayerResolution) {
  svc_.spatial_layers = 2;
  vpx_svc_set_scale_factors(&svc_, "4/16,8/16");
-  vpx_svc_set_quantizers(&svc_, "40,30");
+  vpx_svc_set_quantizers(&svc_, "40,30", 0);

  vpx_codec_err_t res =
      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
@@ -355,13 +364,11 @@ TEST_F(SvcTest, GetLayerResolution) {
  EXPECT_EQ(kHeight * 8 / 16, layer_height);
 }

-TEST_F(SvcTest, TwoPassEncode) {
-  // First pass encode
-  std::string stats_buf;
+TEST_F(SvcTest, FirstPassEncode) {
  svc_.spatial_layers = 2;
  codec_enc_.g_pass = VPX_RC_FIRST_PASS;
  vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
-  vpx_svc_set_quantizers(&svc_, "40,30");
+  vpx_svc_set_quantizers(&svc_, "40,30", 0);

  vpx_codec_err_t res =
      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
@@ -376,61 +383,62 @@ TEST_F(SvcTest, TwoPassEncode) {
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
  ASSERT_EQ(VPX_CODEC_OK, res);
-  size_t stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
-  EXPECT_GT(stats_size, 0U);
-  const char *stats_data = vpx_svc_get_rc_stats_buffer(&svc_);
-  ASSERT_TRUE(stats_data != NULL);
-  stats_buf.append(stats_data, stats_size);
+  EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U);

  // FRAME 1
  video.Next();
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
-  stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
-  EXPECT_GT(stats_size, 0U);
-  stats_data = vpx_svc_get_rc_stats_buffer(&svc_);
-  ASSERT_TRUE(stats_data != NULL);
-  stats_buf.append(stats_data, stats_size);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U);

  // Flush encoder and test EOS packet
  res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
-  stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
-  EXPECT_GT(stats_size, 0U);
-  stats_data = vpx_svc_get_rc_stats_buffer(&svc_);
-  ASSERT_TRUE(stats_data != NULL);
-  stats_buf.append(stats_data, stats_size);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U);
+}

-  // Tear down encoder
-  vpx_svc_release(&svc_);
-  vpx_codec_destroy(&codec_);
-
-  // Second pass encode
-  int decoded_frames = 0;
-  vpx_codec_err_t res_dec;
-  int frame_size;
+TEST_F(SvcTest, SecondPassEncode) {
+  svc_.spatial_layers = 2;
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
-  codec_enc_.rc_twopass_stats_in.buf = &stats_buf[0];
-  codec_enc_.rc_twopass_stats_in.sz = stats_buf.size();

-  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  FILE *const stats_file = libvpx_test::OpenTestDataFile(stats_file_name_);
+  ASSERT_TRUE(stats_file != NULL) << "Stats file open failed. Filename: "
+      << stats_file;
+
+  struct vpx_fixed_buf stats_buf;
+  fseek(stats_file, 0, SEEK_END);
+  stats_buf.sz = static_cast<size_t>(ftell(stats_file));
+  fseek(stats_file, 0, SEEK_SET);
+
+  stats_buf.buf = malloc(stats_buf.sz);
+  ASSERT_TRUE(stats_buf.buf != NULL);
+  const size_t bytes_read = fread(stats_buf.buf, 1, stats_buf.sz, stats_file);
+  ASSERT_EQ(bytes_read, stats_buf.sz);
+  fclose(stats_file);
+  codec_enc_.rc_twopass_stats_in = stats_buf;
+
+  vpx_codec_err_t res =
+      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  ASSERT_EQ(VPX_CODEC_OK, res);
  codec_initialized_ = true;

+  libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+                                     codec_enc_.g_timebase.den,
+                                     codec_enc_.g_timebase.num, 0, 30);
  // FRAME 0
  video.Begin();
  // This frame is a keyframe.
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));

-  if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
+  vpx_codec_err_t res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();

  // FRAME 1
  video.Next();
@@ -438,14 +446,12 @@ TEST_F(SvcTest, TwoPassEncode) {
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));

-  if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
+  res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();

  // FRAME 2
  video.Next();
@@ -453,29 +459,14 @@ TEST_F(SvcTest, TwoPassEncode) {
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));

-  if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
+  res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();

-  // Flush encoder
-  res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
-                       video.duration(), VPX_DL_GOOD_QUALITY);
-  EXPECT_EQ(VPX_CODEC_OK, res);
-
-  while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
-
-  EXPECT_EQ(decoded_frames, 3);
+  free(stats_buf.buf);
 }

 }  // namespace
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -1,20 +1,6 @@
 d5dfb0151c9051f8c85999255645d7a23916d3c0  hantro_collage_w352h288.yuv
+998cec53307c94aa5835aaf8d5731f6a3c7c2e5a  hantro_collage_w352h288.stat
 b87815bf86020c592ccc7a846ba2e28ec8043902  hantro_odd.yuv
-fe346136b9b8c1e6f6084cc106485706915795e4  invalid-vp90-01.webm
-25751f5d3b05ff03f0719ad42cd625348eb8961e  invalid-vp90-01.webm.res
-d78e2fceba5ac942246503ec8366f879c4775ca5  invalid-vp90-02.webm
-2dadee5306245fa5eeb0f99652d0e17afbcba96d  invalid-vp90-02.webm.res
-df1a1453feb3c00d7d89746c7003b4163523bff3  invalid-vp90-03-v3.webm
-4935c62becc68c13642a03db1e6d3e2331c1c612  invalid-vp90-03-v3.webm.res
-a432f96ff0a787268e2f94a8092ab161a18d1b06  park_joy_90p_10_420.y4m
-0b194cc312c3a2e84d156a221b0a5eb615dfddc5  park_joy_90p_10_422.y4m
-ff0e0a21dc2adc95b8c1b37902713700655ced17  park_joy_90p_10_444.y4m
-614c32ae1eca391e867c70d19974f0d62664dd99  park_joy_90p_12_420.y4m
-c92825f1ea25c5c37855083a69faac6ac4641a9e  park_joy_90p_12_422.y4m
-b592189b885b6cc85db55cc98512a197d73d3b34  park_joy_90p_12_444.y4m
-4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c  park_joy_90p_8_420.y4m
-7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947  park_joy_90p_8_422.y4m
-bdb7856e6bc93599bdda05c2e773a9f22b6c6d03  park_joy_90p_8_444.y4m
 b1f1c3ec79114b9a0651af24ce634afb44a9a419  rush_hour_444.y4m
 5184c46ddca8b1fadd16742e8500115bc8f749da  vp80-00-comprehensive-001.ivf
 65bf1bbbced81b97bd030f376d1b7f61a224793f  vp80-00-comprehensive-002.ivf
@@ -544,6 +530,8 @@ b6524e4084d15b5d0caaa3d3d1368db30cbee69c  vp90-2-03-deltaq.webm
 7f6d8879336239a43dbb6c9f13178cb11cf7ed09  vp90-2-05-resize.ivf.md5
 bf61ddc1f716eba58d4c9837d4e91031d9ce4ffe  vp90-2-06-bilinear.webm
 f6235f937552e11d8eb331ec55da6b3aa596b9ac  vp90-2-06-bilinear.webm.md5
+495256cfd123fe777b2c0406862ed8468a1f4677  vp91-2-04-yv444.webm
+65e3a7ffef61ab340d9140f335ecc49125970c2c  vp91-2-04-yv444.webm.md5
 0c83a1e414fde3bccd6dc451bbaee68e59974c76  vp90-2-07-frame_parallel.webm
 e5c2c9fb383e5bf3b563480adaeba5b7e3475ecd  vp90-2-07-frame_parallel.webm.md5
 086c7edcffd699ae7d99d710fd7e53b18910ca5b  vp90-2-08-tile_1x2_frame_parallel.webm
@@ -589,8 +577,6 @@ d48c5db1b0f8e60521a7c749696b8067886033a3  vp90-2-09-aq2.webm
 54638c38009198c38c8f3b25c182b709b6c1fd2e  vp90-2-09-lf_deltas.webm.md5
 510d95f3beb3b51c572611fdaeeece12277dac30  vp90-2-10-show-existing-frame.webm
 14d631096f4bfa2d71f7f739aec1448fb3c33bad  vp90-2-10-show-existing-frame.webm.md5
-d2feea7728e8d2c615981d0f47427a4a5a45d881  vp90-2-10-show-existing-frame2.webm
-5f7c7811baa3e4f03be1dd78c33971b727846821  vp90-2-10-show-existing-frame2.webm.md5
 b4318e75f73a6a08992c7326de2fb589c2a794c7  vp90-2-11-size-351x287.webm
 b3c48382cf7d0454e83a02497c229d27720f9e20  vp90-2-11-size-351x287.webm.md5
 8e0096475ea2535bac71d3e2fc09e0c451c444df  vp90-2-11-size-351x288.webm
@@ -653,20 +639,4 @@ e615575ded499ea1d992f3b38e3baa434509cdcd  vp90-2-15-segkey.webm
 e3ab35d4316c5e81325c50f5236ceca4bc0d35df  vp90-2-15-segkey.webm.md5
 9b7ca2cac09d34c4a5d296c1900f93b1e2f69d0d  vp90-2-15-segkey_adpq.webm
 8f46ba5f785d0c2170591a153e0d0d146a7c8090  vp90-2-15-segkey_adpq.webm.md5
-0321d507ce62dedc8a51b4e9011f7a19aed9c3dc  vp91-2-04-yuv444.webm
-367e423dd41fdb49aa028574a2cfec5c2f325c5c  vp91-2-04-yuv444.webm.md5
-76024eb753cdac6a5e5703aaea189d35c3c30ac7  invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf
-d3964f9dad9f60363c81b688324d95b4ec7c8038  invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf.res
-83f50908c8dc0ef8760595447a2ff7727489542e  invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf
-456d1493e52d32a5c30edf44a27debc1fa6b253a  invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
-c123d1f9f02fb4143abb5e271916e3a3080de8f6  invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
-456d1493e52d32a5c30edf44a27debc1fa6b253a  invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
-f97088c7359fc8d3d5aa5eafe57bc7308b3ee124  vp90-2-20-big_superframe-01.webm
-47d7d409785afa33b123376de0c907336e6c7bd7  vp90-2-20-big_superframe-01.webm.md5
-65ade6d2786209582c50d34cfe22b3cdb033abaf  vp90-2-20-big_superframe-02.webm
-7c0ed8d04c4d06c5411dd2e5de2411d37f092db5  vp90-2-20-big_superframe-02.webm.md5
-667ec8718c982aef6be07eb94f083c2efb9d2d16  vp90-2-07-frame_parallel-1.webm
-bfc82bf848e9c05020d61e3ffc1e62f25df81d19  vp90-2-07-frame_parallel-1.webm.md5
-efd5a51d175cfdacd169ed23477729dc558030dc  invalid-vp90-2-07-frame_parallel-1.webm
-9f912712ec418be69adb910e2ca886a63c4cec08  invalid-vp90-2-07-frame_parallel-2.webm
-445f5a53ca9555341852997ccdd480a51540bd14  invalid-vp90-2-07-frame_parallel-3.webm
+
--- a/test/test.mk
+++ b/test/test.mk
@@ -15,7 +15,7 @@ LIBVPX_TEST_SRCS-yes += video_source.h
 ##
 ## Black box tests only use the public API.
 ##
-LIBVPX_TEST_SRCS-yes                   += ../md5_utils.h ../md5_utils.c
+LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../md5_utils.h ../md5_utils.c
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ivf_video_source.h
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += ../y4minput.h ../y4minput.c
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += aq_segment_test.cc
@@ -30,8 +30,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_frame_parallel_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
@@ -43,9 +41,6 @@ LIBVPX_TEST_SRCS-yes                   += decode_test_driver.h
 LIBVPX_TEST_SRCS-yes                   += encode_test_driver.cc
 LIBVPX_TEST_SRCS-yes                   += encode_test_driver.h

-## Y4m parsing.
-LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_test.cc ../y4menc.c ../y4menc.h
-
 ## WebM Parsing
 ifeq ($(CONFIG_WEBM_IO), yes)
 LIBWEBM_PARSER_SRCS                    += ../third_party/libwebm/mkvparser.cpp
@@ -59,7 +54,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.h
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += webm_video_source.h
 endif

-LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += invalid_file_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += test_vector_test.cc

 # Currently we only support decoder perf tests for vp9. Also they read from WebM
@@ -137,20 +131,9 @@ endif # CONFIG_SHARED
 ## TEST DATA
 ##
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.stat
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
-
-LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m
-LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m
-LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m
-LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m
-LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
-LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
-LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
-LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
-LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
-
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
-LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m

 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf.md5
@@ -684,8 +667,6 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel-1.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel-1.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm
@@ -710,8 +691,6 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-subpixel-00.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-subpixel-00.ivf.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame2.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame2.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x287.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x287.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x288.webm
@@ -726,6 +705,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_3.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_3.ivf.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm
@@ -774,29 +755,6 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-01.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-01.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-02.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-02.webm.md5
-
-# Invalid files for testing libvpx error checking.
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01.webm.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02.webm.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-1.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-2.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-3.webm

 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
 # BBB VP9 streams
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -12,7 +12,6 @@
 #include <cstdlib>
 #include <string>
 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "../tools_common.h"
 #include "./vpx_config.h"
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
@@ -27,24 +26,10 @@

 namespace {

-enum DecodeMode {
-  kSerialMode,
-  kFrameParallMode
-};
-
-const int kDecodeMode = 0;
-const int kThreads = 1;
-const int kFileName = 2;
-
-typedef std::tr1::tuple<int, int, const char *> DecodeParam;
-
 class TestVectorTest : public ::libvpx_test::DecoderTest,
-    public ::libvpx_test::CodecTestWithParam<DecodeParam> {
+    public ::libvpx_test::CodecTestWithParam<const char*> {
 protected:
-  TestVectorTest()
-      : DecoderTest(GET_PARAM(0)),
-        md5_file_(NULL) {
-  }
+  TestVectorTest() : DecoderTest(GET_PARAM(0)), md5_file_(NULL) {}

  virtual ~TestVectorTest() {
    if (md5_file_)
@@ -86,25 +71,8 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
 // checksums match the correct md5 data, then the test is passed. Otherwise,
 // the test failed.
 TEST_P(TestVectorTest, MD5Match) {
-  const DecodeParam input = GET_PARAM(1);
-  const std::string filename = std::tr1::get<kFileName>(input);
-  const int threads = std::tr1::get<kThreads>(input);
-  const int mode = std::tr1::get<kDecodeMode>(input);
+  const std::string filename = GET_PARAM(1);
  libvpx_test::CompressedVideoSource *video = NULL;
-  vpx_codec_flags_t flags = 0;
-  vpx_codec_dec_cfg_t cfg = {0};
-  char str[256];
-
-  if (mode == kFrameParallMode) {
-    flags |= VPX_CODEC_USE_FRAME_THREADING;
-  }
-
-  cfg.threads = threads;
-
-  snprintf(str, sizeof(str) / sizeof(str[0]) - 1,
-           "file: %s  mode: %s threads: %d",
-           filename.c_str(), mode == 0 ? "Serial" : "Parallel", threads);
-  SCOPED_TRACE(str);

  // Open compressed video file.
  if (filename.substr(filename.length() - 3, 3) == "ivf") {
@@ -124,53 +92,18 @@ TEST_P(TestVectorTest, MD5Match) {
  const std::string md5_filename = filename + ".md5";
  OpenMD5File(md5_filename);

-  // Set decode config and flags.
-  set_cfg(cfg);
-  set_flags(flags);
-
  // Decode frame, and check the md5 matching.
  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
  delete video;
 }

-// Test VP8 decode in serial mode with single thread.
-// NOTE: VP8 only support serial mode.
-INSTANTIATE_TEST_CASE_P(
-    VP8, TestVectorTest,
-    ::testing::Combine(
-        ::testing::Values(
-            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP8)),
-        ::testing::Combine(
-            ::testing::Values(0),  // Serial Mode.
-            ::testing::Values(1),  // Single thread.
-            ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
-                                libvpx_test::kVP8TestVectors +
-                                    libvpx_test::kNumVP8TestVectors))));
+VP8_INSTANTIATE_TEST_CASE(TestVectorTest,
+                          ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
+                                              libvpx_test::kVP8TestVectors +
+                                              libvpx_test::kNumVP8TestVectors));
+VP9_INSTANTIATE_TEST_CASE(TestVectorTest,
+                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
+                                              libvpx_test::kVP9TestVectors +
+                                              libvpx_test::kNumVP9TestVectors));

-// Test VP9 decode in serial mode with single thread.
-INSTANTIATE_TEST_CASE_P(
-    VP9, TestVectorTest,
-    ::testing::Combine(
-        ::testing::Values(
-            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
-        ::testing::Combine(
-            ::testing::Values(0),  // Serial Mode.
-            ::testing::Values(1),  // Single thread.
-            ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
-                                libvpx_test::kVP9TestVectors +
-                                    libvpx_test::kNumVP9TestVectors))));
-
-
-// Test VP9 decode in frame parallel mode with different number of threads.
-INSTANTIATE_TEST_CASE_P(
-    VP9MultiThreadedFrameParallel, TestVectorTest,
-    ::testing::Combine(
-        ::testing::Values(
-            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
-        ::testing::Combine(
-            ::testing::Values(1),        // Frame Parallel mode.
-            ::testing::Range(2, 9),      // With 2 ~ 8 threads.
-            ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
-                                libvpx_test::kVP9TestVectors +
-                                    libvpx_test::kNumVP9TestVectors))));
 }  // namespace
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -154,19 +154,17 @@ const char *const kVP9TestVectors[] = {
  "vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm",
  "vp90-2-03-size-226x226.webm", "vp90-2-03-deltaq.webm",
  "vp90-2-05-resize.ivf", "vp90-2-06-bilinear.webm",
-  "vp90-2-07-frame_parallel.webm", "vp90-2-07-frame_parallel-1.webm",
-  "vp90-2-08-tile_1x2_frame_parallel.webm",
+  "vp90-2-07-frame_parallel.webm", "vp90-2-08-tile_1x2_frame_parallel.webm",
  "vp90-2-08-tile_1x2.webm", "vp90-2-08-tile_1x4_frame_parallel.webm",
  "vp90-2-08-tile_1x4.webm", "vp90-2-08-tile_1x8_frame_parallel.webm",
  "vp90-2-08-tile_1x8.webm", "vp90-2-08-tile-4x4.webm",
  "vp90-2-08-tile-4x1.webm", "vp90-2-09-subpixel-00.ivf",
  "vp90-2-02-size-lf-1920x1080.webm", "vp90-2-09-aq2.webm",
  "vp90-2-09-lf_deltas.webm", "vp90-2-10-show-existing-frame.webm",
-  "vp90-2-10-show-existing-frame2.webm",
  "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm",
  "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf",
  "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf",
-  "vp90-2-13-largescaling.webm",
+  "vp90-2-13-largescaling.webm", "vp91-2-04-yv444.webm",
  "vp90-2-14-resize-fp-tiles-1-16.webm",
  "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
  "vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm",
@@ -180,9 +178,7 @@ const char *const kVP9TestVectors[] = {
  "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm",
  "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm",
  "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm",
-  "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm",
-  "vp91-2-04-yuv444.webm",
-  "vp90-2-20-big_superframe-01.webm", "vp90-2-20-big_superframe-02.webm",
+  "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm"
 };
 const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
 #endif  // CONFIG_VP9_DECODER
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -17,10 +17,6 @@ VPX_TEST_TOOLS_COMMON_SH=included
 set -e
 devnull='> /dev/null 2>&1'

-elog() {
-  echo "$@" 1>&2
-}
-
 vlog() {
  if [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ]; then
    echo "$@"
@@ -460,19 +456,10 @@ vlog "$(basename "${0%.*}") test configuration:
  LIBVPX_BIN_PATH=${LIBVPX_BIN_PATH}
  LIBVPX_CONFIG_PATH=${LIBVPX_CONFIG_PATH}
  LIBVPX_TEST_DATA_PATH=${LIBVPX_TEST_DATA_PATH}
-  VP8_IVF_FILE=${VP8_IVF_FILE}
-  VP9_IVF_FILE=${VP9_IVF_FILE}
-  VP9_WEBM_FILE=${VP9_WEBM_FILE}
-  VPX_TEST_EXE_SUFFIX=${VPX_TEST_EXE_SUFFIX}
-  VPX_TEST_FILTER=${VPX_TEST_FILTER}
  VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}
-  VPX_TEST_RAND=${VPX_TEST_RAND}
-  VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
-  VPX_TEST_SHOW_PROGRAM_OUTPUT=${VPX_TEST_SHOW_PROGRAM_OUTPUT}
-  VPX_TEST_TEMP_ROOT=${VPX_TEST_TEMP_ROOT}
  VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
-  YUV_RAW_INPUT=${YUV_RAW_INPUT}
-  YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH}
-  YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}"
+  VPX_TEST_FILTER=${VPX_TEST_FILTER}
+  VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
+  VPX_TEST_SHOW_PROGRAM_OUTPUT=${VPX_TEST_SHOW_PROGRAM_OUTPUT}"

 fi  # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard.
--- a/test/twopass_encoder.sh
+++ b/test/twopass_encoder.sh
@@ -29,10 +29,7 @@ twopass_encoder() {
  local codec="$1"
  local output_file="${VPX_TEST_OUTPUT_DIR}/twopass_encoder_${codec}.ivf"

-  if [ ! -x "${encoder}" ]; then
-    elog "${encoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${encoder}" ] || return 1

  eval "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
--- a/test/user_priv_test.cc
+++ b/test/user_priv_test.cc
@@ -1,100 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
-#include "test/acm_random.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/ivf_video_source.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#if CONFIG_WEBM_IO
-#include "test/webm_video_source.h"
-#endif
-#include "vpx_mem/vpx_mem.h"
-#include "vpx/vp8.h"
-
-namespace {
-
-using std::string;
-using libvpx_test::ACMRandom;
-
-#if CONFIG_WEBM_IO
-
-void CheckUserPrivateData(void *user_priv, int *target) {
-  // actual pointer value should be the same as expected.
-  EXPECT_EQ(reinterpret_cast<void *>(target), user_priv) <<
-      "user_priv pointer value does not match.";
-}
-
-// Decodes |filename|. Passes in user_priv data when calling DecodeFrame and
-// compares the user_priv from return img with the original user_priv to see if
-// they match. Both the pointer values and the values inside the addresses
-// should match.
-string DecodeFile(const string &filename) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  libvpx_test::WebMVideoSource video(filename);
-  video.Init();
-
-  vpx_codec_dec_cfg_t cfg = {0};
-  libvpx_test::VP9Decoder decoder(cfg, 0);
-
-  libvpx_test::MD5 md5;
-  int frame_num = 0;
-  for (video.Begin(); !::testing::Test::HasFailure() && video.cxdata();
-       video.Next()) {
-    void *user_priv = reinterpret_cast<void *>(&frame_num);
-    const vpx_codec_err_t res =
-        decoder.DecodeFrame(video.cxdata(), video.frame_size(),
-                            (frame_num == 0) ? NULL : user_priv);
-    if (res != VPX_CODEC_OK) {
-      EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
-      break;
-    }
-    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
-    const vpx_image_t *img = NULL;
-
-    // Get decompressed data.
-    while ((img = dec_iter.Next())) {
-      if (frame_num == 0) {
-        CheckUserPrivateData(img->user_priv, NULL);
-      } else {
-        CheckUserPrivateData(img->user_priv, &frame_num);
-
-        // Also test ctrl_get_reference api.
-        struct vp9_ref_frame ref;
-        // Randomly fetch a reference frame.
-        ref.idx = rnd.Rand8() % 3;
-        decoder.Control(VP9_GET_REFERENCE, &ref);
-
-        CheckUserPrivateData(ref.img.user_priv, NULL);
-      }
-      md5.Add(img);
-    }
-
-    frame_num++;
-  }
-  return string(md5.Get());
-}
-
-TEST(UserPrivTest, VideoDecode) {
-  // no tiles or frame parallel; this exercises the decoding to test the
-  // user_priv.
-  EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc",
-               DecodeFile("vp90-2-03-size-226x226.webm").c_str());
-}
-
-#endif  // CONFIG_WEBM_IO
-
-}  // namespace
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -702,57 +702,6 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(6, 6, subpel_avg_variance64x64_ssse3)));
 #endif
 #endif
-
-#if HAVE_AVX2
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-unsigned int vp9_sub_pixel_variance32x32_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_sub_pixel_variance64x64_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_sub_pixel_avg_variance32x32_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
-    const uint8_t *second_pred);
-unsigned int vp9_sub_pixel_avg_variance64x64_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
-    const uint8_t *second_pred);
-}
-const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2;
-const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2;
-const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2;
-const vp9_variance_fn_t variance64x32_avx2 = vp9_variance64x32_avx2;
-const vp9_variance_fn_t variance64x64_avx2 = vp9_variance64x64_avx2;
-INSTANTIATE_TEST_CASE_P(
-    AVX2, VP9VarianceTest,
-    ::testing::Values(make_tuple(4, 4, variance16x16_avx2),
-                      make_tuple(5, 4, variance32x16_avx2),
-                      make_tuple(5, 5, variance32x32_avx2),
-                      make_tuple(6, 5, variance64x32_avx2),
-                      make_tuple(6, 6, variance64x64_avx2)));
-
-const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
-    vp9_sub_pixel_variance32x32_avx2;
-const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
-    vp9_sub_pixel_variance64x64_avx2;
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, VP9SubpelVarianceTest,
-    ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2),
-                      make_tuple(6, 6, subpel_variance64x64_avx2)));
-
-const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 =
-    vp9_sub_pixel_avg_variance32x32_avx2;
-const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 =
-    vp9_sub_pixel_avg_variance64x64_avx2;
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, VP9SubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
-                      make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
-#endif  // HAVE_AVX2
 #endif  // CONFIG_VP9_ENCODER

 }  // namespace vp9
--- a/test/video_source.h
+++ b/test/video_source.h
@@ -50,15 +50,6 @@ static FILE *OpenTestDataFile(const std::string& file_name) {
  return fopen(path_to_source.c_str(), "rb");
 }

-static FILE *OpenTestOutFile(const std::string& file_name) {
-  const std::string path_to_source = GetDataPath() + "/" + file_name;
-  return fopen(path_to_source.c_str(), "wb");
-}
-
-static FILE *OpenTempOutFile() {
-  return tmpfile();
-}
-
 // Abstract base class for test video sources, which provide a stream of
 // vpx_image_t images with associated timestamps and duration.
 class VideoSource {
--- a/test/vp8cx_set_ref.sh
+++ b/test/vp8cx_set_ref.sh
@@ -34,10 +34,7 @@ vpx_set_ref() {
  local output_file="${VPX_TEST_OUTPUT_DIR}/vp8cx_set_ref_${codec}.ivf"
  local ref_frame_num=90

-  if [ ! -x "${encoder}" ]; then
-    elog "${encoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${encoder}" ] || return 1

  eval "${encoder}" "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
      "${YUV_RAW_INPUT}" "${output_file}" "${ref_frame_num}" \
--- a/test/vp9_frame_parallel_test.cc
+++ b/test/vp9_frame_parallel_test.cc
@@ -1,208 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/ivf_video_source.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#if CONFIG_WEBM_IO
-#include "test/webm_video_source.h"
-#endif
-#include "vpx_mem/vpx_mem.h"
-
-namespace {
-
-using std::string;
-
-#if CONFIG_WEBM_IO
-
-struct FileList {
-  const char *name;
-  // md5 sum for decoded frames which does not include skipped frames.
-  const char *expected_md5;
-  const int pause_frame_num;
-};
-
-// Decodes |filename| with |num_threads|. Pause at the specified frame_num,
-// seek to next key frame and then continue decoding until the end. Return
-// the md5 of the decoded frames which does not include skipped frames.
-string DecodeFile(const string &filename, int num_threads, int pause_num) {
-  libvpx_test::WebMVideoSource video(filename);
-  video.Init();
-  int in_frames = 0;
-  int out_frames = 0;
-
-  vpx_codec_dec_cfg_t cfg = {0};
-  cfg.threads = num_threads;
-  vpx_codec_flags_t flags = 0;
-  flags |= VPX_CODEC_USE_FRAME_THREADING;
-  libvpx_test::VP9Decoder decoder(cfg, flags, 0);
-
-  libvpx_test::MD5 md5;
-  video.Begin();
-
-  do {
-    ++in_frames;
-    const vpx_codec_err_t res =
-        decoder.DecodeFrame(video.cxdata(), video.frame_size());
-    if (res != VPX_CODEC_OK) {
-      EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
-      break;
-    }
-
-    // Pause at specified frame number.
-    if (in_frames == pause_num) {
-      // Flush the decoder and then seek to next key frame.
-      decoder.DecodeFrame(NULL, 0);
-      video.SeekToNextKeyFrame();
-    } else {
-      video.Next();
-    }
-
-    // Flush the decoder at the end of the video.
-    if (!video.cxdata())
-      decoder.DecodeFrame(NULL, 0);
-
-    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
-    const vpx_image_t *img;
-
-    // Get decompressed data
-    while ((img = dec_iter.Next())) {
-      ++out_frames;
-      md5.Add(img);
-    }
-  } while (video.cxdata() != NULL);
-
-  EXPECT_EQ(in_frames, out_frames) <<
-      "Input frame count does not match output frame count";
-
-  return string(md5.Get());
-}
-
-void DecodeFiles(const FileList files[]) {
-  for (const FileList *iter = files; iter->name != NULL; ++iter) {
-    SCOPED_TRACE(iter->name);
-    for (int t = 2; t <= 8; ++t) {
-      EXPECT_EQ(iter->expected_md5,
-                DecodeFile(iter->name, t, iter->pause_frame_num))
-          << "threads = " << t;
-    }
-  }
-}
-
-TEST(VP9MultiThreadedFrameParallel, PauseSeekResume) {
-  // vp90-2-07-frame_parallel-1.webm is a 40 frame video file with
-  // one key frame for every ten frames.
-  static const FileList files[] = {
-    { "vp90-2-07-frame_parallel-1.webm",
-      "6ea7c3875d67252e7caf2bc6e75b36b1", 6},
-    { "vp90-2-07-frame_parallel-1.webm",
-      "4bb634160c7356a8d7d4299b6dc83a45", 12},
-    { "vp90-2-07-frame_parallel-1.webm",
-      "89772591e6ef461f9fa754f916c78ed8", 26},
-    { NULL, NULL, 0},
-  };
-  DecodeFiles(files);
-}
-
-struct InvalidFileList {
-  const char *name;
-  // md5 sum for decoded frames which does not include corrupted frames.
-  const char *expected_md5;
-  // Expected number of decoded frames which does not include corrupted frames.
-  const int expected_frame_count;
-};
-
-// Decodes |filename| with |num_threads|. Return the md5 of the decoded
-// frames which does not include corrupted frames.
-string DecodeInvalidFile(const string &filename, int num_threads,
-                         int expected_frame_count) {
-  libvpx_test::WebMVideoSource video(filename);
-  video.Init();
-
-  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
-  cfg.threads = num_threads;
-  const vpx_codec_flags_t flags = VPX_CODEC_USE_FRAME_THREADING;
-  libvpx_test::VP9Decoder decoder(cfg, flags, 0);
-
-  libvpx_test::MD5 md5;
-  video.Begin();
-
-  int out_frames = 0;
-  do {
-    const vpx_codec_err_t res =
-        decoder.DecodeFrame(video.cxdata(), video.frame_size());
-    // TODO(hkuang): frame parallel mode should return an error on corruption.
-    if (res != VPX_CODEC_OK) {
-      EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
-      break;
-    }
-
-    video.Next();
-
-    // Flush the decoder at the end of the video.
-    if (!video.cxdata())
-      decoder.DecodeFrame(NULL, 0);
-
-    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
-    const vpx_image_t *img;
-
-    // Get decompressed data
-    while ((img = dec_iter.Next())) {
-      ++out_frames;
-      md5.Add(img);
-    }
-  } while (video.cxdata() != NULL);
-
-  EXPECT_EQ(expected_frame_count, out_frames) <<
-      "Input frame count does not match expected output frame count";
-
-  return string(md5.Get());
-}
-
-void DecodeInvalidFiles(const InvalidFileList files[]) {
-  for (const InvalidFileList *iter = files; iter->name != NULL; ++iter) {
-    SCOPED_TRACE(iter->name);
-    for (int t = 2; t <= 8; ++t) {
-      EXPECT_EQ(iter->expected_md5,
-                DecodeInvalidFile(iter->name, t, iter->expected_frame_count))
-          << "threads = " << t;
-    }
-  }
-}
-
-TEST(VP9MultiThreadedFrameParallel, InvalidFileTest) {
-  static const InvalidFileList files[] = {
-    // invalid-vp90-2-07-frame_parallel-1.webm is a 40 frame video file with
-    // one key frame for every ten frames. The 11th frame has corrupted data.
-    { "invalid-vp90-2-07-frame_parallel-1.webm",
-      "0549d0f45f60deaef8eb708e6c0eb6cb", 30},
-    // invalid-vp90-2-07-frame_parallel-2.webm is a 40 frame video file with
-    // one key frame for every ten frames. The 1st and 31st frames have
-    // corrupted data.
-    { "invalid-vp90-2-07-frame_parallel-2.webm",
-      "6a1f3cf6f9e7a364212fadb9580d525e", 20},
-    // invalid-vp90-2-07-frame_parallel-3.webm is a 40 frame video file with
-    // one key frame for every ten frames. The 13th frame has corrupted data.
-    { "invalid-vp90-2-07-frame_parallel-3.webm",
-      "a567c8259d27ad32b1b7f58db5ac89dd", 32},
-    { NULL, NULL, 0},
-  };
-  DecodeInvalidFiles(files);
-}
-
-#endif  // CONFIG_WEBM_IO
-}  // namespace
--- a/test/vp9_lossless_test.cc
+++ b/test/vp9_lossless_test.cc
@@ -36,17 +36,6 @@ class LosslessTestLarge : public ::libvpx_test::EncoderTest,
    SetMode(encoding_mode_);
  }

-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
-      // Only call Control if quantizer > 0 to verify that using quantizer
-      // alone will activate lossless
-      if (cfg_.rc_max_quantizer > 0 || cfg_.rc_min_quantizer > 0) {
-        encoder->Control(VP9E_SET_LOSSLESS, 1);
-      }
-    }
-  }
-
  virtual void BeginPassHook(unsigned int /*pass*/) {
    psnr_ = kMaxPsnr;
    nframes_ = 0;
@@ -102,24 +91,5 @@ TEST_P(LosslessTestLarge, TestLossLessEncoding444) {
  EXPECT_GE(psnr_lossless, kMaxPsnr);
 }

-TEST_P(LosslessTestLarge, TestLossLessEncodingCtrl) {
-  const vpx_rational timebase = { 33333333, 1000000000 };
-  cfg_.g_timebase = timebase;
-  cfg_.rc_target_bitrate = 2000;
-  cfg_.g_lag_in_frames = 25;
-  // Intentionally set Q > 0, to make sure control can be used to activate
-  // lossless
-  cfg_.rc_min_quantizer = 10;
-  cfg_.rc_max_quantizer = 20;
-
-  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                     timebase.den, timebase.num, 0, 10);
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  const double psnr_lossless = GetMinPsnr();
-  EXPECT_GE(psnr_lossless, kMaxPsnr);
-}
-
 VP9_INSTANTIATE_TEST_CASE(LosslessTestLarge, ALL_TEST_MODES);
 }  // namespace
--- a/test/vp9_spatial_svc_encoder.sh
+++ b/test/vp9_spatial_svc_encoder.sh
@@ -34,10 +34,7 @@ vp9_spatial_svc_encoder() {

  shift

-  if [ ! -x "${encoder}" ]; then
-    elog "${encoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${encoder}" ] || return 1

  eval "${encoder}" -w "${YUV_RAW_INPUT_WIDTH}" -h "${YUV_RAW_INPUT_HEIGHT}" \
      -k "${max_kf}" -f "${frames_to_encode}" "$@" "${YUV_RAW_INPUT}" \
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -18,7 +18,7 @@
 #if CONFIG_WEBM_IO
 #include "test/webm_video_source.h"
 #endif
-#include "vp9/common/vp9_thread.h"
+#include "vp9/decoder/vp9_thread.h"

 namespace {

@@ -28,11 +28,11 @@ class VP9WorkerThreadTest : public ::testing::TestWithParam<bool> {
 protected:
  virtual ~VP9WorkerThreadTest() {}
  virtual void SetUp() {
-    vp9_get_worker_interface()->init(&worker_);
+    vp9_worker_init(&worker_);
  }

  virtual void TearDown() {
-    vp9_get_worker_interface()->end(&worker_);
+    vp9_worker_end(&worker_);
  }

  VP9Worker worker_;
@@ -45,11 +45,10 @@ int ThreadHook(void* data, void* return_value) {
 }

 TEST_P(VP9WorkerThreadTest, HookSuccess) {
-  // should be a no-op.
-  EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
+  EXPECT_NE(vp9_worker_sync(&worker_), 0);  // should be a no-op.

  for (int i = 0; i < 2; ++i) {
-    EXPECT_NE(vp9_get_worker_interface()->reset(&worker_), 0);
+    EXPECT_NE(vp9_worker_reset(&worker_), 0);

    int hook_data = 0;
    int return_value = 1;  // return successfully from the hook
@@ -59,21 +58,20 @@ TEST_P(VP9WorkerThreadTest, HookSuccess) {

    const bool synchronous = GetParam();
    if (synchronous) {
-      vp9_get_worker_interface()->execute(&worker_);
+      vp9_worker_execute(&worker_);
    } else {
-      vp9_get_worker_interface()->launch(&worker_);
+      vp9_worker_launch(&worker_);
    }
-    EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
+    EXPECT_NE(vp9_worker_sync(&worker_), 0);
    EXPECT_FALSE(worker_.had_error);
    EXPECT_EQ(5, hook_data);

-    // should be a no-op.
-    EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
+    EXPECT_NE(vp9_worker_sync(&worker_), 0);  // should be a no-op.
  }
 }

 TEST_P(VP9WorkerThreadTest, HookFailure) {
-  EXPECT_NE(vp9_get_worker_interface()->reset(&worker_), 0);
+  EXPECT_NE(vp9_worker_reset(&worker_), 0);

  int hook_data = 0;
  int return_value = 0;  // return failure from the hook
@@ -83,49 +81,26 @@ TEST_P(VP9WorkerThreadTest, HookFailure) {

  const bool synchronous = GetParam();
  if (synchronous) {
-    vp9_get_worker_interface()->execute(&worker_);
+    vp9_worker_execute(&worker_);
  } else {
-    vp9_get_worker_interface()->launch(&worker_);
+    vp9_worker_launch(&worker_);
  }
-  EXPECT_FALSE(vp9_get_worker_interface()->sync(&worker_));
+  EXPECT_FALSE(vp9_worker_sync(&worker_));
  EXPECT_EQ(1, worker_.had_error);

  // Ensure _reset() clears the error and _launch() can be called again.
  return_value = 1;
-  EXPECT_NE(vp9_get_worker_interface()->reset(&worker_), 0);
+  EXPECT_NE(vp9_worker_reset(&worker_), 0);
  EXPECT_FALSE(worker_.had_error);
-  vp9_get_worker_interface()->launch(&worker_);
-  EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
+  vp9_worker_launch(&worker_);
+  EXPECT_NE(vp9_worker_sync(&worker_), 0);
  EXPECT_FALSE(worker_.had_error);
 }

-TEST(VP9WorkerThreadTest, TestInterfaceAPI) {
-  EXPECT_EQ(0, vp9_set_worker_interface(NULL));
-  EXPECT_TRUE(vp9_get_worker_interface() != NULL);
-  for (int i = 0; i < 6; ++i) {
-    VP9WorkerInterface winterface = *vp9_get_worker_interface();
-    switch (i) {
-      default:
-      case 0: winterface.init = NULL; break;
-      case 1: winterface.reset = NULL; break;
-      case 2: winterface.sync = NULL; break;
-      case 3: winterface.launch = NULL; break;
-      case 4: winterface.execute = NULL; break;
-      case 5: winterface.end = NULL; break;
-    }
-    EXPECT_EQ(0, vp9_set_worker_interface(&winterface));
-  }
-}
-
 // -----------------------------------------------------------------------------
 // Multi-threaded decode tests

 #if CONFIG_WEBM_IO
-struct FileList {
-  const char *name;
-  const char *expected_md5;
-};
-
 // Decodes |filename| with |num_threads|. Returns the md5 of the decoded frames.
 string DecodeFile(const string& filename, int num_threads) {
  libvpx_test::WebMVideoSource video(filename);
@@ -155,77 +130,39 @@ string DecodeFile(const string& filename, int num_threads) {
  return string(md5.Get());
 }

-void DecodeFiles(const FileList files[]) {
-  for (const FileList *iter = files; iter->name != NULL; ++iter) {
-    SCOPED_TRACE(iter->name);
-    for (int t = 2; t <= 8; ++t) {
-      EXPECT_EQ(iter->expected_md5, DecodeFile(iter->name, t))
-          << "threads = " << t;
-    }
-  }
-}
-
-// Trivial serialized thread worker interface implementation.
-// Note any worker that requires synchronization between other workers will
-// hang.
-namespace impl {
-
-void Init(VP9Worker *const worker) { memset(worker, 0, sizeof(*worker)); }
-int Reset(VP9Worker *const /*worker*/) { return 1; }
-int Sync(VP9Worker *const worker) { return !worker->had_error; }
-
-void Execute(VP9Worker *const worker) {
-  worker->had_error |= !worker->hook(worker->data1, worker->data2);
-}
-
-void Launch(VP9Worker *const worker) { Execute(worker); }
-void End(VP9Worker *const /*worker*/) {}
-
-}  // namespace impl
-
-TEST(VP9WorkerThreadTest, TestSerialInterface) {
-  static const VP9WorkerInterface serial_interface = {
-    impl::Init, impl::Reset, impl::Sync, impl::Launch, impl::Execute, impl::End
-  };
-  // TODO(jzern): Avoid using a file that will use the row-based thread
-  // loopfilter, with the simple serialized implementation it will hang. This is
-  // due to its expectation that rows will be run in parallel as they wait on
-  // progress in the row above before proceeding.
-  static const char expected_md5[] = "b35a1b707b28e82be025d960aba039bc";
-  static const char filename[] = "vp90-2-03-size-226x226.webm";
-  VP9WorkerInterface default_interface = *vp9_get_worker_interface();
-
-  EXPECT_NE(vp9_set_worker_interface(&serial_interface), 0);
-  EXPECT_EQ(expected_md5, DecodeFile(filename, 2));
-
-  // Reset the interface.
-  EXPECT_NE(vp9_set_worker_interface(&default_interface), 0);
-  EXPECT_EQ(expected_md5, DecodeFile(filename, 2));
-}
-
-TEST(VP9DecodeMultiThreadedTest, Decode) {
+TEST(VP9DecodeMTTest, MTDecode) {
  // no tiles or frame parallel; this exercises loop filter threading.
-  EXPECT_EQ("b35a1b707b28e82be025d960aba039bc",
-            DecodeFile("vp90-2-03-size-226x226.webm", 2));
+  EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc",
+               DecodeFile("vp90-2-03-size-226x226.webm", 2).c_str());
 }

-TEST(VP9DecodeMultiThreadedTest, Decode2) {
-  static const FileList files[] = {
+TEST(VP9DecodeMTTest, MTDecode2) {
+  static const struct {
+    const char *name;
+    const char *expected_md5;
+  } files[] = {
    { "vp90-2-08-tile_1x2_frame_parallel.webm",
      "68ede6abd66bae0a2edf2eb9232241b6" },
    { "vp90-2-08-tile_1x4_frame_parallel.webm",
      "368ebc6ebf3a5e478d85b2c3149b2848" },
    { "vp90-2-08-tile_1x8_frame_parallel.webm",
      "17e439da2388aff3a0f69cb22579c6c1" },
-    { NULL, NULL }
  };

-  DecodeFiles(files);
+  for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) {
+    for (int t = 2; t <= 8; ++t) {
+      EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str())
+          << "threads = " << t;
+    }
+  }
 }

 // Test tile quantity changes within one file.
-TEST(VP9DecodeMultiThreadedTest, Decode3) {
-  static const FileList files[] = {
+TEST(VP9DecodeMTTest, MTDecode3) {
+  static const struct {
+    const char *name;
+    const char *expected_md5;
+  } files[] = {
    { "vp90-2-14-resize-fp-tiles-1-16.webm",
      "0cd5e632c326297e975f38949c31ea94" },
    { "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
@@ -270,10 +207,14 @@ TEST(VP9DecodeMultiThreadedTest, Decode3) {
      "ae96f21f21b6370cc0125621b441fc52" },
    { "vp90-2-14-resize-fp-tiles-8-4.webm",
      "3eb4f24f10640d42218f7fd7b9fd30d4" },
-    { NULL, NULL }
  };

-  DecodeFiles(files);
+  for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) {
+    for (int t = 2; t <= 8; ++t) {
+      EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str())
+          << "threads = " << t;
+    }
+  }
 }
 #endif  // CONFIG_WEBM_IO

--- a/test/vpx_temporal_svc_encoder.sh
+++ b/test/vpx_temporal_svc_encoder.sh
@@ -39,10 +39,7 @@ vpx_tsvc_encoder() {

  shift 2

-  if [ ! -x "${encoder}" ]; then
-    elog "${encoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${encoder}" ] || return 1

  eval "${encoder}" "${YUV_RAW_INPUT}" "${output_file}" "${codec}" \
      "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
--- a/test/webm_video_source.h
+++ b/test/webm_video_source.h
@@ -69,18 +69,6 @@ class WebMVideoSource : public CompressedVideoSource {
    }
  }

-  void SeekToNextKeyFrame() {
-    ASSERT_TRUE(vpx_ctx_->file != NULL);
-    do {
-      const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_, &buf_sz_);
-      ASSERT_GE(status, 0) << "webm_read_frame failed";
-      ++frame_;
-      if (status == 1) {
-        end_of_file_ = true;
-      }
-    } while (!webm_ctx_->is_key_frame && !end_of_file_);
-  }
-
  virtual const uint8_t *cxdata() const {
    return end_of_file_ ? NULL : buf_;
  }
--- a/test/y4m_test.cc
+++ b/test/y4m_test.cc
@@ -1,193 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <string>
-#include "test/md5_helper.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
-#include "./y4menc.h"
-
-namespace {
-
-using std::string;
-using std::tr1::make_tuple;
-
-static const unsigned int kWidth  = 160;
-static const unsigned int kHeight = 90;
-static const unsigned int kFrames = 10;
-
-typedef std::tr1::tuple<const char *, const unsigned int,
-        const vpx_img_fmt, const char *> test_entry_type;
-
-static const test_entry_type kY4mTestVectors[] = {
-  make_tuple("park_joy_90p_8_420.y4m", 8, VPX_IMG_FMT_I420,
-             "e5406275b9fc6bb3436c31d4a05c1cab"),
-  make_tuple("park_joy_90p_8_422.y4m", 8, VPX_IMG_FMT_I422,
-             "284a47a47133b12884ec3a14e959a0b6"),
-  make_tuple("park_joy_90p_8_444.y4m", 8, VPX_IMG_FMT_I444,
-             "90517ff33843d85de712fd4fe60dbed0"),
-  make_tuple("park_joy_90p_10_420.y4m", 10, VPX_IMG_FMT_I42016,
-             "63f21f9f717d8b8631bd2288ee87137b"),
-  make_tuple("park_joy_90p_10_422.y4m", 10, VPX_IMG_FMT_I42216,
-             "48ab51fb540aed07f7ff5af130c9b605"),
-  make_tuple("park_joy_90p_10_444.y4m", 10, VPX_IMG_FMT_I44416,
-             "067bfd75aa85ff9bae91fa3e0edd1e3e"),
-  make_tuple("park_joy_90p_12_420.y4m", 12, VPX_IMG_FMT_I42016,
-             "9e6d8f6508c6e55625f6b697bc461cef"),
-  make_tuple("park_joy_90p_12_422.y4m", 12, VPX_IMG_FMT_I42216,
-             "b239c6b301c0b835485be349ca83a7e3"),
-  make_tuple("park_joy_90p_12_444.y4m", 12, VPX_IMG_FMT_I44416,
-             "5a6481a550821dab6d0192f5c63845e9")
-};
-
-static void write_image_file(const vpx_image_t *img, FILE *file) {
-  int plane, y;
-  for (plane = 0; plane < 3; ++plane) {
-    const unsigned char *buf = img->planes[plane];
-    const int stride = img->stride[plane];
-    const int bytes_per_sample = (img->fmt & VPX_IMG_FMT_HIGH) ? 2 : 1;
-    const int h = (plane ? (img->d_h + img->y_chroma_shift) >>
-                   img->y_chroma_shift : img->d_h);
-    const int w = (plane ? (img->d_w + img->x_chroma_shift) >>
-                   img->x_chroma_shift : img->d_w);
-    for (y = 0; y < h; ++y) {
-      fwrite(buf, bytes_per_sample, w, file);
-      buf += stride;
-    }
-  }
-}
-
-class Y4mVideoSourceTest
-    : public ::testing::TestWithParam<test_entry_type>,
-      public ::libvpx_test::Y4mVideoSource {
- protected:
-  Y4mVideoSourceTest() : Y4mVideoSource("", 0, 0) {}
-
-  virtual ~Y4mVideoSourceTest() {
-    CloseSource();
-  }
-
-  virtual void Init(const std::string &file_name, int limit) {
-    file_name_ = file_name;
-    start_ = 0;
-    limit_ = limit;
-    frame_ = 0;
-    Begin();
-  }
-
-  // Checks y4m header information
-  void HeaderChecks(unsigned int bit_depth, vpx_img_fmt_t fmt) {
-    ASSERT_TRUE(input_file_ != NULL);
-    ASSERT_EQ(y4m_.pic_w, (int)kWidth);
-    ASSERT_EQ(y4m_.pic_h, (int)kHeight);
-    ASSERT_EQ(img()->d_w, kWidth);
-    ASSERT_EQ(img()->d_h, kHeight);
-    ASSERT_EQ(y4m_.bit_depth, bit_depth);
-    ASSERT_EQ(y4m_.vpx_fmt, fmt);
-    if (fmt == VPX_IMG_FMT_I420 || fmt == VPX_IMG_FMT_I42016) {
-      ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3 / 2);
-      ASSERT_EQ(img()->x_chroma_shift, 1U);
-      ASSERT_EQ(img()->y_chroma_shift, 1U);
-    }
-    if (fmt == VPX_IMG_FMT_I422 || fmt == VPX_IMG_FMT_I42216) {
-      ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 2);
-      ASSERT_EQ(img()->x_chroma_shift, 1U);
-      ASSERT_EQ(img()->y_chroma_shift, 0U);
-    }
-    if (fmt == VPX_IMG_FMT_I444 || fmt == VPX_IMG_FMT_I44416) {
-      ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3);
-      ASSERT_EQ(img()->x_chroma_shift, 0U);
-      ASSERT_EQ(img()->y_chroma_shift, 0U);
-    }
-  }
-
-  // Checks MD5 of the raw frame data
-  void Md5Check(const string &expected_md5) {
-    ASSERT_TRUE(input_file_ != NULL);
-    libvpx_test::MD5 md5;
-    for (unsigned int i = start_; i < limit_; i++) {
-      md5.Add(img());
-      Next();
-    }
-    ASSERT_EQ(string(md5.Get()), expected_md5);
-  }
-};
-
-TEST_P(Y4mVideoSourceTest, SourceTest) {
-  const char *filename = GET_PARAM(0);
-  const unsigned int bit_depth = GET_PARAM(1);
-  const vpx_img_fmt format = GET_PARAM(2);
-  const char *md5raw = GET_PARAM(3);
-
-  Init(filename, kFrames);
-  HeaderChecks(bit_depth, format);
-  Md5Check(md5raw);
-}
-
-INSTANTIATE_TEST_CASE_P(C, Y4mVideoSourceTest,
-                        ::testing::ValuesIn(kY4mTestVectors));
-
-class Y4mVideoWriteTest
-    : public Y4mVideoSourceTest {
- protected:
-  Y4mVideoWriteTest() : Y4mVideoSourceTest() {}
-
-  virtual void ReplaceInputFp(FILE *input_file) {
-    CloseSource();
-    frame_ = 0;
-    input_file_ = input_file;
-    rewind(input_file_);
-    ReadSourceToStart();
-  }
-
-  // Writes out a y4m file and then reads it back
-  void WriteY4mAndReadBack() {
-    ASSERT_TRUE(input_file_ != NULL);
-    char buf[Y4M_BUFFER_SIZE] = {0};
-    const struct VpxRational framerate = {y4m_.fps_n, y4m_.fps_d};
-    FILE *out_file = libvpx_test::OpenTempOutFile();
-    ASSERT_TRUE(out_file != NULL);
-    y4m_write_file_header(buf, sizeof(buf),
-                          kWidth, kHeight,
-                          &framerate, y4m_.vpx_fmt,
-                          y4m_.bit_depth);
-    fputs(buf, out_file);
-    for (unsigned int i = start_; i < limit_; i++) {
-      y4m_write_frame_header(buf, sizeof(buf));
-      fputs(buf, out_file);
-      write_image_file(img(), out_file);
-      Next();
-    }
-    ReplaceInputFp(out_file);
-  }
-
-  virtual void Init(const std::string &file_name, int limit) {
-    Y4mVideoSourceTest::Init(file_name, limit);
-    WriteY4mAndReadBack();
-  }
-};
-
-TEST_P(Y4mVideoWriteTest, WriteTest) {
-  const char *filename = GET_PARAM(0);
-  const unsigned int bit_depth = GET_PARAM(1);
-  const vpx_img_fmt format = GET_PARAM(2);
-  const char *md5raw = GET_PARAM(3);
-
-  Init(filename, kFrames);
-  HeaderChecks(bit_depth, format);
-  Md5Check(md5raw);
-}
-
-INSTANTIATE_TEST_CASE_P(C, Y4mVideoWriteTest,
-                        ::testing::ValuesIn(kY4mTestVectors));
-
-}  // namespace
--- a/test/y4m_video_source.h
+++ b/test/y4m_video_source.h
@@ -38,28 +38,22 @@ class Y4mVideoSource : public VideoSource {
    CloseSource();
  }

-  virtual void OpenSource() {
+  virtual void Begin() {
    CloseSource();
    input_file_ = OpenTestDataFile(file_name_);
    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
-                                     << file_name_;
-  }
+        << file_name_;

-  virtual void ReadSourceToStart() {
-    ASSERT_TRUE(input_file_ != NULL);
-    ASSERT_FALSE(y4m_input_open(&y4m_, input_file_, NULL, 0, 0));
+    y4m_input_open(&y4m_, input_file_, NULL, 0, 0);
    framerate_numerator_ = y4m_.fps_n;
    framerate_denominator_ = y4m_.fps_d;
+
    frame_ = 0;
    for (unsigned int i = 0; i < start_; i++) {
-      Next();
+        Next();
    }
-    FillFrame();
-  }

-  virtual void Begin() {
-    OpenSource();
-    ReadSourceToStart();
+    FillFrame();
  }

  virtual void Next() {
--- a/third_party/libmkv/EbmlIDs.h
+++ b/third_party/libmkv/EbmlIDs.h
@@ -1,231 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#ifndef MKV_DEFS_HPP
-#define MKV_DEFS_HPP 1
-
-/* Commenting out values not available in webm, but available in matroska */
-
-enum mkv {
-  EBML = 0x1A45DFA3,
-  EBMLVersion = 0x4286,
-  EBMLReadVersion = 0x42F7,
-  EBMLMaxIDLength = 0x42F2,
-  EBMLMaxSizeLength = 0x42F3,
-  DocType = 0x4282,
-  DocTypeVersion = 0x4287,
-  DocTypeReadVersion = 0x4285,
-/* CRC_32 = 0xBF, */
-  Void = 0xEC,
-  SignatureSlot = 0x1B538667,
-  SignatureAlgo = 0x7E8A,
-  SignatureHash = 0x7E9A,
-  SignaturePublicKey = 0x7EA5,
-  Signature = 0x7EB5,
-  SignatureElements = 0x7E5B,
-  SignatureElementList = 0x7E7B,
-  SignedElement = 0x6532,
-  /* segment */
-  Segment = 0x18538067,
-  /* Meta Seek Information */
-  SeekHead = 0x114D9B74,
-  Seek = 0x4DBB,
-  SeekID = 0x53AB,
-  SeekPosition = 0x53AC,
-  /* Segment Information */
-  Info = 0x1549A966,
-/* SegmentUID = 0x73A4, */
-/* SegmentFilename = 0x7384, */
-/* PrevUID = 0x3CB923, */
-/* PrevFilename = 0x3C83AB, */
-/* NextUID = 0x3EB923, */
-/* NextFilename = 0x3E83BB, */
-/* SegmentFamily = 0x4444, */
-/* ChapterTranslate = 0x6924, */
-/* ChapterTranslateEditionUID = 0x69FC, */
-/* ChapterTranslateCodec = 0x69BF, */
-/* ChapterTranslateID = 0x69A5, */
-  TimecodeScale = 0x2AD7B1,
-  Segment_Duration = 0x4489,
-  DateUTC = 0x4461,
-/* Title = 0x7BA9, */
-  MuxingApp = 0x4D80,
-  WritingApp = 0x5741,
-  /* Cluster */
-  Cluster = 0x1F43B675,
-  Timecode = 0xE7,
-/* SilentTracks = 0x5854, */
-/* SilentTrackNumber = 0x58D7, */
-/* Position = 0xA7, */
-  PrevSize = 0xAB,
-  BlockGroup = 0xA0,
-  Block = 0xA1,
-/* BlockVirtual = 0xA2, */
-  BlockAdditions = 0x75A1,
-  BlockMore = 0xA6,
-  BlockAddID = 0xEE,
-  BlockAdditional = 0xA5,
-  BlockDuration = 0x9B,
-/* ReferencePriority = 0xFA, */
-  ReferenceBlock = 0xFB,
-/* ReferenceVirtual = 0xFD, */
-/* CodecState = 0xA4, */
-/* Slices = 0x8E, */
-/* TimeSlice = 0xE8, */
-  LaceNumber = 0xCC,
-/* FrameNumber = 0xCD, */
-/* BlockAdditionID = 0xCB, */
-/* MkvDelay = 0xCE, */
-/* Cluster_Duration = 0xCF, */
-  SimpleBlock = 0xA3,
-/* EncryptedBlock = 0xAF, */
-  /* Track */
-  Tracks = 0x1654AE6B,
-  TrackEntry = 0xAE,
-  TrackNumber = 0xD7,
-  TrackUID = 0x73C5,
-  TrackType = 0x83,
-  FlagEnabled = 0xB9,
-  FlagDefault = 0x88,
-  FlagForced = 0x55AA,
-  FlagLacing = 0x9C,
-/* MinCache = 0x6DE7, */
-/* MaxCache = 0x6DF8, */
-  DefaultDuration = 0x23E383,
-/* TrackTimecodeScale = 0x23314F, */
-/* TrackOffset = 0x537F, */
-  MaxBlockAdditionID = 0x55EE,
-  Name = 0x536E,
-  Language = 0x22B59C,
-  CodecID = 0x86,
-  CodecPrivate = 0x63A2,
-  CodecName = 0x258688,
-/* AttachmentLink = 0x7446, */
-/* CodecSettings = 0x3A9697, */
-/* CodecInfoURL = 0x3B4040, */
-/* CodecDownloadURL = 0x26B240, */
-/* CodecDecodeAll = 0xAA, */
-/* TrackOverlay = 0x6FAB, */
-/* TrackTranslate = 0x6624, */
-/* TrackTranslateEditionUID = 0x66FC, */
-/* TrackTranslateCodec = 0x66BF, */
-/* TrackTranslateTrackID = 0x66A5, */
-  /* video */
-  Video = 0xE0,
-  FlagInterlaced = 0x9A,
-  StereoMode = 0x53B8,
-  AlphaMode = 0x53C0,
-  PixelWidth = 0xB0,
-  PixelHeight = 0xBA,
-  PixelCropBottom = 0x54AA,
-  PixelCropTop = 0x54BB,
-  PixelCropLeft = 0x54CC,
-  PixelCropRight = 0x54DD,
-  DisplayWidth = 0x54B0,
-  DisplayHeight = 0x54BA,
-  DisplayUnit = 0x54B2,
-  AspectRatioType = 0x54B3,
-/* ColourSpace = 0x2EB524, */
-/* GammaValue = 0x2FB523, */
-  FrameRate = 0x2383E3,
-  /* end video */
-  /* audio */
-  Audio = 0xE1,
-  SamplingFrequency = 0xB5,
-  OutputSamplingFrequency = 0x78B5,
-  Channels = 0x9F,
-/* ChannelPositions = 0x7D7B, */
-  BitDepth = 0x6264,
-  /* end audio */
-  /* content encoding */
-/* ContentEncodings = 0x6d80, */
-/* ContentEncoding = 0x6240, */
-/* ContentEncodingOrder = 0x5031, */
-/* ContentEncodingScope = 0x5032, */
-/* ContentEncodingType = 0x5033, */
-/* ContentCompression = 0x5034, */
-/* ContentCompAlgo = 0x4254, */
-/* ContentCompSettings = 0x4255, */
-/* ContentEncryption = 0x5035, */
-/* ContentEncAlgo = 0x47e1, */
-/* ContentEncKeyID = 0x47e2, */
-/* ContentSignature = 0x47e3, */
-/* ContentSigKeyID = 0x47e4, */
-/* ContentSigAlgo = 0x47e5, */
-/* ContentSigHashAlgo = 0x47e6, */
-  /* end content encoding */
-  /* Cueing Data */
-  Cues = 0x1C53BB6B,
-  CuePoint = 0xBB,
-  CueTime = 0xB3,
-  CueTrackPositions = 0xB7,
-  CueTrack = 0xF7,
-  CueClusterPosition = 0xF1,
-  CueBlockNumber = 0x5378
-/* CueCodecState = 0xEA, */
-/* CueReference = 0xDB, */
-/* CueRefTime = 0x96, */
-/* CueRefCluster = 0x97, */
-/* CueRefNumber = 0x535F, */
-/* CueRefCodecState = 0xEB, */
-  /* Attachment */
-/* Attachments = 0x1941A469, */
-/* AttachedFile = 0x61A7, */
-/* FileDescription = 0x467E, */
-/* FileName = 0x466E, */
-/* FileMimeType = 0x4660, */
-/* FileData = 0x465C, */
-/* FileUID = 0x46AE, */
-/* FileReferral = 0x4675, */
-  /* Chapters */
-/* Chapters = 0x1043A770, */
-/* EditionEntry = 0x45B9, */
-/* EditionUID = 0x45BC, */
-/* EditionFlagHidden = 0x45BD, */
-/* EditionFlagDefault = 0x45DB, */
-/* EditionFlagOrdered = 0x45DD, */
-/* ChapterAtom = 0xB6, */
-/* ChapterUID = 0x73C4, */
-/* ChapterTimeStart = 0x91, */
-/* ChapterTimeEnd = 0x92, */
-/* ChapterFlagHidden = 0x98, */
-/* ChapterFlagEnabled = 0x4598, */
-/* ChapterSegmentUID = 0x6E67, */
-/* ChapterSegmentEditionUID = 0x6EBC, */
-/* ChapterPhysicalEquiv = 0x63C3, */
-/* ChapterTrack = 0x8F, */
-/* ChapterTrackNumber = 0x89, */
-/* ChapterDisplay = 0x80, */
-/* ChapString = 0x85, */
-/* ChapLanguage = 0x437C, */
-/* ChapCountry = 0x437E, */
-/* ChapProcess = 0x6944, */
-/* ChapProcessCodecID = 0x6955, */
-/* ChapProcessPrivate = 0x450D, */
-/* ChapProcessCommand = 0x6911, */
-/* ChapProcessTime = 0x6922, */
-/* ChapProcessData = 0x6933, */
-  /* Tagging */
-/* Tags = 0x1254C367, */
-/* Tag = 0x7373, */
-/* Targets = 0x63C0, */
-/* TargetTypeValue = 0x68CA, */
-/* TargetType = 0x63CA, */
-/* Tagging_TrackUID = 0x63C5, */
-/* Tagging_EditionUID = 0x63C9, */
-/* Tagging_ChapterUID = 0x63C4, */
-/* AttachmentUID = 0x63C6, */
-/* SimpleTag = 0x67C8, */
-/* TagName = 0x45A3, */
-/* TagLanguage = 0x447A, */
-/* TagDefault = 0x4484, */
-/* TagString = 0x4487, */
-/* TagBinary = 0x4485, */
-};
-#endif
--- a/third_party/libmkv/EbmlWriter.c
+++ b/third_party/libmkv/EbmlWriter.c
@@ -1,157 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#include "EbmlWriter.h"
-#include <stdlib.h>
-#include <wchar.h>
-#include <string.h>
-#include <limits.h>
-#if defined(_MSC_VER)
-#define LITERALU64(n) n
-#else
-#define LITERALU64(n) n##LLU
-#endif
-
-void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) {
-  /* TODO check and make sure we are not > than 0x0100000000000000LLU */
-  unsigned char size = 8; /* size in bytes to output */
-
-  /* mask to compare for byte size */
-  int64_t minVal = 0xff;
-
-  for (size = 1; size < 8; size ++) {
-    if (val < minVal)
-      break;
-
-    minVal = (minVal << 7);
-  }
-
-  val |= (((uint64_t)0x80) << ((size - 1) * 7));
-
-  Ebml_Serialize(glob, (void *) &val, sizeof(val), size);
-}
-
-void Ebml_WriteString(EbmlGlobal *glob, const char *str) {
-  const size_t size_ = strlen(str);
-  const uint64_t  size = size_;
-  Ebml_WriteLen(glob, size);
-  /* TODO: it's not clear from the spec whether the nul terminator
-   * should be serialized too.  For now we omit the null terminator.
-   */
-  Ebml_Write(glob, str, (unsigned long)size);
-}
-
-void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) {
-  const size_t strlen = wcslen(wstr);
-
-  /* TODO: it's not clear from the spec whether the nul terminator
-   * should be serialized too.  For now we include it.
-   */
-  const uint64_t  size = strlen;
-
-  Ebml_WriteLen(glob, size);
-  Ebml_Write(glob, wstr, (unsigned long)size);
-}
-
-void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) {
-  int len;
-
-  if (class_id >= 0x01000000)
-    len = 4;
-  else if (class_id >= 0x00010000)
-    len = 3;
-  else if (class_id >= 0x00000100)
-    len = 2;
-  else
-    len = 1;
-
-  Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len);
-}
-
-void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) {
-  unsigned char sizeSerialized = 8 | 0x80;
-  Ebml_WriteID(glob, class_id);
-  Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
-  Ebml_Serialize(glob, &ui, sizeof(ui), 8);
-}
-
-void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) {
-  unsigned char size = 8; /* size in bytes to output */
-  unsigned char sizeSerialized = 0;
-  unsigned long minVal;
-
-  Ebml_WriteID(glob, class_id);
-  minVal = 0x7fLU; /* mask to compare for byte size */
-
-  for (size = 1; size < 4; size ++) {
-    if (ui < minVal) {
-      break;
-    }
-
-    minVal <<= 7;
-  }
-
-  sizeSerialized = 0x80 | size;
-  Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
-  Ebml_Serialize(glob, &ui, sizeof(ui), size);
-}
-/* TODO: perhaps this is a poor name for this id serializer helper function */
-void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) {
-  int size;
-  for (size = 4; size > 1; size--) {
-    if (bin & (unsigned int)0x000000ff << ((size - 1) * 8))
-      break;
-  }
-  Ebml_WriteID(glob, class_id);
-  Ebml_WriteLen(glob, size);
-  Ebml_WriteID(glob, bin);
-}
-
-void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d) {
-  unsigned char len = 0x88;
-
-  Ebml_WriteID(glob, class_id);
-  Ebml_Serialize(glob, &len, sizeof(len), 1);
-  Ebml_Serialize(glob,  &d, sizeof(d), 8);
-}
-
-void Ebml_WriteSigned16(EbmlGlobal *glob, short val) {
-  signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8;
-  Ebml_Serialize(glob, &out, sizeof(out), 3);
-}
-
-void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s) {
-  Ebml_WriteID(glob, class_id);
-  Ebml_WriteString(glob, s);
-}
-
-void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s) {
-  Ebml_WriteID(glob,  class_id);
-  Ebml_WriteUTF8(glob,  s);
-}
-
-void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length) {
-  Ebml_WriteID(glob, class_id);
-  Ebml_WriteLen(glob, data_length);
-  Ebml_Write(glob,  data, data_length);
-}
-
-void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) {
-  unsigned char tmp = 0;
-  unsigned long i = 0;
-
-  Ebml_WriteID(glob, 0xEC);
-  Ebml_WriteLen(glob, vSize);
-
-  for (i = 0; i < vSize; i++) {
-    Ebml_Write(glob, &tmp, 1);
-  }
-}
-
-/* TODO Serialize Date */
--- a/third_party/libmkv/EbmlWriter.h
+++ b/third_party/libmkv/EbmlWriter.h
@@ -1,42 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#ifndef EBMLWRITER_HPP
-#define EBMLWRITER_HPP
-#include <stddef.h>
-#include "vpx/vpx_integer.h"
-
-/* note: you must define write and serialize functions as well as your own
- * EBML_GLOBAL
- *
- * These functions MUST be implemented
- */
-
-typedef struct EbmlGlobal EbmlGlobal;
-void  Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long);
-void  Ebml_Write(EbmlGlobal *glob, const void *, unsigned long);
-
-/*****/
-
-void Ebml_WriteLen(EbmlGlobal *glob, int64_t val);
-void Ebml_WriteString(EbmlGlobal *glob, const char *str);
-void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr);
-void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id);
-void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui);
-void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
-void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
-void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d);
-/* TODO make this more generic to signed */
-void Ebml_WriteSigned16(EbmlGlobal *glob, short val);
-void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s);
-void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s);
-void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length);
-void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize);
-/* TODO need date function */
-#endif
--- a/tools_common.h
+++ b/tools_common.h
@@ -90,7 +90,6 @@ struct VpxInputContext {
  uint32_t width;
  uint32_t height;
  vpx_img_fmt_t fmt;
-  vpx_bit_depth_t bit_depth;
  int only_i420;
  uint32_t fourcc;
  struct VpxRational framerate;
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -393,12 +393,12 @@ void vp8_de_noise(VP8_COMMON                 *cm,
                  int                         low_var_thresh,
                  int                         flag)
 {
-    int mbr;
    double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
    int ppl = (int)(level + .5);
-    int mb_rows = cm->mb_rows;
-    int mb_cols = cm->mb_cols;
+    int mb_rows = source->y_width >> 4;
+    int mb_cols = source->y_height >> 4;
    unsigned char *limits = cm->pp_limits_buffer;;
+    int mbr, mbc;
    (void) post;
    (void) low_var_thresh;
    (void) flag;
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -463,7 +463,9 @@ $vp8_short_walsh4x4_neon_asm=vp8_short_walsh4x4_neon;
 # Quantizer
 #
 add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
-specialize qw/vp8_regular_quantize_b sse2 sse4_1/;
+specialize qw/vp8_regular_quantize_b sse2/;
+# TODO(johann) Update sse4 implementation and re-enable
+#$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4;

 add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
 specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon_asm/;
@@ -552,9 +554,6 @@ $vp8_yv12_copy_partial_frame_neon_asm=vp8_yv12_copy_partial_frame_neon;
 if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
    add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
    specialize qw/vp8_denoiser_filter sse2 neon/;
-    add_proto qw/int vp8_denoiser_filter_uv/, "unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
-    specialize qw/vp8_denoiser_filter_uv sse2/;
-
 }

 # End of encoder only functions
--- a/vp8/common/x86/postproc_mmx.asm
+++ b/vp8/common/x86/postproc_mmx.asm
@@ -246,6 +246,7 @@ sym(vp8_mbpost_proc_down_mmx):
 ;                            unsigned char whiteclamp[16],
 ;                            unsigned char bothclamp[16],
 ;                            unsigned int Width, unsigned int Height, int Pitch)
+extern sym(rand)
 global sym(vp8_plane_add_noise_mmx) PRIVATE
 sym(vp8_plane_add_noise_mmx):
    push        rbp
@@ -257,7 +258,7 @@ sym(vp8_plane_add_noise_mmx):
    ; end prolog

 .addnoise_loop:
-    call sym(LIBVPX_RAND) WRT_PLT
+    call sym(rand) WRT_PLT
    mov     rcx, arg(1) ;noise
    and     rax, 0xff
    add     rcx, rax
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -660,6 +660,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
 ;                            unsigned char whiteclamp[16],
 ;                            unsigned char bothclamp[16],
 ;                            unsigned int Width, unsigned int Height, int Pitch)
+extern sym(rand)
 global sym(vp8_plane_add_noise_wmt) PRIVATE
 sym(vp8_plane_add_noise_wmt):
    push        rbp
@@ -671,7 +672,7 @@ sym(vp8_plane_add_noise_wmt):
    ; end prolog

 .addnoise_loop:
-    call sym(LIBVPX_RAND) WRT_PLT
+    call sym(rand) WRT_PLT
    mov     rcx, arg(1) ;noise
    and     rax, 0xff
    add     rcx, rax
--- a/vp8/common/x86/postproc_x86.c
+++ b/vp8/common/x86/postproc_x86.c
@@ -0,0 +1,24 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* On Android NDK, rand is inlined function, but postproc needs rand symbol */
+#if defined(__ANDROID__)
+#define rand __rand
+#include <stdlib.h>
+#undef rand
+
+extern int rand(void)
+{
+  return __rand();
+}
+#else
+/* ISO C forbids an empty translation unit. */
+int vp8_unused;
+#endif
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -18,18 +18,18 @@ extern "C" {

 #if HAVE_EDSP
 void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
-                             vp8_token *,
+                             const vp8_token *,
                             const vp8_extra_bit_struct *,
                             const vp8_tree_index *);
 void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *,
                                             unsigned char * cx_data,
                                             const unsigned char *cx_data_end,
                                             int num_parts,
-                                             vp8_token *,
+                                             const vp8_token *,
                                             const vp8_extra_bit_struct *,
                                             const vp8_tree_index *);
 void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
-                                    vp8_token *,
+                                    const vp8_token *,
                                    const vp8_extra_bit_struct *,
                                    const vp8_tree_index *);
 # define pack_tokens(a,b,c)                  \
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -191,154 +191,10 @@ int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride,
    return FILTER_BLOCK;
 }

-int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg_uv,
-                             int mc_avg_uv_stride,
-                             unsigned char *running_avg_uv,
-                             int avg_uv_stride,
-                             unsigned char *sig,
-                             int sig_stride,
-                             unsigned int motion_magnitude,
-                             int increase_denoising) {
-    unsigned char *running_avg_uv_start = running_avg_uv;
-    unsigned char *sig_start = sig;
-    int sum_diff_thresh;
-    int r, c;
-    int sum_diff = 0;
-    int sum_block = 0;
-    int adj_val[3] = {3, 4, 6};
-    int shift_inc1 = 0;
-    int shift_inc2 = 1;
-    /* If motion_magnitude is small, making the denoiser more aggressive by
-     * increasing the adjustment for each level. Add another increment for
-     * blocks that are labeled for increase denoising. */
-    if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) {
-      if (increase_denoising) {
-        shift_inc1 = 1;
-        shift_inc2 = 2;
-      }
-      adj_val[0] += shift_inc2;
-      adj_val[1] += shift_inc2;
-      adj_val[2] += shift_inc2;
-    }
-
-    // Avoid denoising color signal if its close to average level.
-    for (r = 0; r < 8; ++r) {
-      for (c = 0; c < 8; ++c) {
-        sum_block += sig[c];
-      }
-      sig += sig_stride;
-    }
-    if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) {
-      return COPY_BLOCK;
-    }
-
-    sig -= sig_stride * 8;
-    for (r = 0; r < 8; ++r) {
-      for (c = 0; c < 8; ++c) {
-        int diff = 0;
-        int adjustment = 0;
-        int absdiff = 0;
-
-        diff = mc_running_avg_uv[c] - sig[c];
-        absdiff = abs(diff);
-
-        // When |diff| <= |3 + shift_inc1|, use pixel value from
-        // last denoised raw.
-        if (absdiff <= 3 + shift_inc1) {
-          running_avg_uv[c] = mc_running_avg_uv[c];
-          sum_diff += diff;
-        } else {
-          if (absdiff >= 4 && absdiff <= 7)
-            adjustment = adj_val[0];
-          else if (absdiff >= 8 && absdiff <= 15)
-            adjustment = adj_val[1];
-          else
-            adjustment = adj_val[2];
-          if (diff > 0) {
-            if ((sig[c] + adjustment) > 255)
-              running_avg_uv[c] = 255;
-            else
-              running_avg_uv[c] = sig[c] + adjustment;
-            sum_diff += adjustment;
-          } else {
-            if ((sig[c] - adjustment) < 0)
-              running_avg_uv[c] = 0;
-            else
-              running_avg_uv[c] = sig[c] - adjustment;
-            sum_diff -= adjustment;
-          }
-        }
-      }
-      /* Update pointers for next iteration. */
-      sig += sig_stride;
-      mc_running_avg_uv += mc_avg_uv_stride;
-      running_avg_uv += avg_uv_stride;
-    }
-
-    sum_diff_thresh= SUM_DIFF_THRESHOLD_UV;
-    if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV;
-    if (abs(sum_diff) > sum_diff_thresh) {
-      // Before returning to copy the block (i.e., apply no denoising), check
-      // if we can still apply some (weaker) temporal filtering to this block,
-      // that would otherwise not be denoised at all. Simplest is to apply
-      // an additional adjustment to running_avg_y to bring it closer to sig.
-      // The adjustment is capped by a maximum delta, and chosen such that
-      // in most cases the resulting sum_diff will be within the
-      // accceptable range given by sum_diff_thresh.
-
-      // The delta is set by the excess of absolute pixel diff over threshold.
-      int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1;
-      // Only apply the adjustment for max delta up to 3.
-      if (delta < 4) {
-        sig -= sig_stride * 8;
-        mc_running_avg_uv -= mc_avg_uv_stride * 8;
-        running_avg_uv -= avg_uv_stride * 8;
-        for (r = 0; r < 8; ++r) {
-          for (c = 0; c < 8; ++c) {
-            int diff = mc_running_avg_uv[c] - sig[c];
-            int adjustment = abs(diff);
-            if (adjustment > delta)
-              adjustment = delta;
-            if (diff > 0) {
-              // Bring denoised signal down.
-              if (running_avg_uv[c] - adjustment < 0)
-                running_avg_uv[c] = 0;
-              else
-                running_avg_uv[c] = running_avg_uv[c] - adjustment;
-              sum_diff -= adjustment;
-            } else if (diff < 0) {
-              // Bring denoised signal up.
-              if (running_avg_uv[c] + adjustment > 255)
-                running_avg_uv[c] = 255;
-              else
-                running_avg_uv[c] = running_avg_uv[c] + adjustment;
-              sum_diff += adjustment;
-            }
-          }
-          // TODO(marpan): Check here if abs(sum_diff) has gone below the
-          // threshold sum_diff_thresh, and if so, we can exit the row loop.
-          sig += sig_stride;
-          mc_running_avg_uv += mc_avg_uv_stride;
-          running_avg_uv += avg_uv_stride;
-        }
-        if (abs(sum_diff) > sum_diff_thresh)
-          return COPY_BLOCK;
-      } else {
-        return COPY_BLOCK;
-      }
-    }
-
-    vp8_copy_mem8x8(running_avg_uv_start, avg_uv_stride, sig_start,
-                    sig_stride);
-    return FILTER_BLOCK;
-}
-
-int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
-                          int num_mb_rows, int num_mb_cols)
+int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height)
 {
    int i;
    assert(denoiser);
-    denoiser->num_mb_cols = num_mb_cols;

    for (i = 0; i < MAX_REF_FRAMES; i++)
    {
@@ -366,10 +222,6 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,

    vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
               denoiser->yv12_mc_running_avg.frame_size);
-
-    denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1);
-    vpx_memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
-
    return 0;
 }

@@ -383,7 +235,6 @@ void vp8_denoiser_free(VP8_DENOISER *denoiser)
        vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg[i]);
    }
    vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg);
-    vpx_free(denoiser->denoise_state);
 }


@@ -392,28 +243,17 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
                             unsigned int best_sse,
                             unsigned int zero_mv_sse,
                             int recon_yoffset,
-                             int recon_uvoffset,
-                             loop_filter_info_n *lfi_n,
-                             int mb_row,
-                             int mb_col,
-                             int block_index)
+                             int recon_uvoffset)
 {
    int mv_row;
    int mv_col;
    unsigned int motion_magnitude2;
    unsigned int sse_thresh;
    int sse_diff_thresh = 0;
-    // Denoise the UV channel.
-    int apply_color_denoise = 0;
-    // Spatial loop filter: only applied selectively based on
-    // temporal filter state of block relative to top/left neighbors.
-    int apply_spatial_loop_filter = 1;
    MV_REFERENCE_FRAME frame = x->best_reference_frame;
    MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;

    enum vp8_denoiser_decision decision = FILTER_BLOCK;
-    enum vp8_denoiser_decision decision_u = FILTER_BLOCK;
-    enum vp8_denoiser_decision decision_v = FILTER_BLOCK;

    if (zero_frame)
    {
@@ -423,11 +263,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
        MB_MODE_INFO saved_mbmi;
        MACROBLOCKD *filter_xd = &x->e_mbd;
        MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi;
-        int sse_diff = 0;
-        // Bias on zero motion vector sse.
-        int zero_bias = 95;
-        zero_mv_sse = (unsigned int)((int64_t)zero_mv_sse * zero_bias / 100);
-        sse_diff = zero_mv_sse - best_sse;
+        int sse_diff = zero_mv_sse - best_sse;

        saved_mbmi = *mbmi;

@@ -523,37 +359,9 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,

        /* Filter. */
        decision = vp8_denoiser_filter(mc_running_avg_y, mc_avg_y_stride,
-                                       running_avg_y, avg_y_stride,
-                                       x->thismb, 16, motion_magnitude2,
-                                       x->increase_denoising);
-        denoiser->denoise_state[block_index] = motion_magnitude2 > 0 ?
-            kFilterNonZeroMV : kFilterZeroMV;
-        // Only denoise UV for zero motion, and if y channel was denoised.
-        if (apply_color_denoise &&
-            motion_magnitude2 == 0 &&
-            decision == FILTER_BLOCK) {
-          unsigned char *mc_running_avg_u =
-              denoiser->yv12_mc_running_avg.u_buffer + recon_uvoffset;
-          unsigned char *running_avg_u =
-              denoiser->yv12_running_avg[INTRA_FRAME].u_buffer + recon_uvoffset;
-          unsigned char *mc_running_avg_v =
-              denoiser->yv12_mc_running_avg.v_buffer + recon_uvoffset;
-          unsigned char *running_avg_v =
-              denoiser->yv12_running_avg[INTRA_FRAME].v_buffer + recon_uvoffset;
-          int mc_avg_uv_stride = denoiser->yv12_mc_running_avg.uv_stride;
-          int avg_uv_stride = denoiser->yv12_running_avg[INTRA_FRAME].uv_stride;
-          int signal_stride = x->block[16].src_stride;
-          decision_u =
-              vp8_denoiser_filter_uv(mc_running_avg_u, mc_avg_uv_stride,
-                                      running_avg_u, avg_uv_stride,
-                                      x->block[16].src + *x->block[16].base_src,
-                                      signal_stride, motion_magnitude2, 0);
-          decision_v =
-              vp8_denoiser_filter_uv(mc_running_avg_v, mc_avg_uv_stride,
-                                      running_avg_v, avg_uv_stride,
-                                      x->block[20].src + *x->block[20].base_src,
-                                      signal_stride, motion_magnitude2, 0);
-        }
+                                         running_avg_y, avg_y_stride,
+                                         x->thismb, 16, motion_magnitude2,
+                                         x->increase_denoising);
    }
    if (decision == COPY_BLOCK)
    {
@@ -564,73 +372,5 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
                x->thismb, 16,
                denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
                denoiser->yv12_running_avg[INTRA_FRAME].y_stride);
-        denoiser->denoise_state[block_index] = kNoFilter;
-    }
-    if (apply_color_denoise) {
-      if (decision_u == COPY_BLOCK) {
-        vp8_copy_mem8x8(
-            x->block[16].src + *x->block[16].base_src, x->block[16].src_stride,
-            denoiser->yv12_running_avg[INTRA_FRAME].u_buffer + recon_uvoffset,
-            denoiser->yv12_running_avg[INTRA_FRAME].uv_stride);
-      }
-      if (decision_v == COPY_BLOCK) {
-        vp8_copy_mem8x8(
-            x->block[20].src + *x->block[20].base_src, x->block[16].src_stride,
-            denoiser->yv12_running_avg[INTRA_FRAME].v_buffer + recon_uvoffset,
-            denoiser->yv12_running_avg[INTRA_FRAME].uv_stride);
-      }
-    }
-    // Option to selectively deblock the denoised signal, for y channel only.
-    if (apply_spatial_loop_filter) {
-      loop_filter_info lfi;
-      int apply_filter_col = 0;
-      int apply_filter_row = 0;
-      int apply_filter = 0;
-      int y_stride = denoiser->yv12_running_avg[INTRA_FRAME].y_stride;
-      int uv_stride =denoiser->yv12_running_avg[INTRA_FRAME].uv_stride;
-
-      // Fix filter level to some nominal value for now.
-      int filter_level = 32;
-
-      int hev_index = lfi_n->hev_thr_lut[INTER_FRAME][filter_level];
-      lfi.mblim = lfi_n->mblim[filter_level];
-      lfi.blim = lfi_n->blim[filter_level];
-      lfi.lim = lfi_n->lim[filter_level];
-      lfi.hev_thr = lfi_n->hev_thr[hev_index];
-
-      // Apply filter if there is a difference in the denoiser filter state
-      // between the current and left/top block, or if non-zero motion vector
-      // is used for the motion-compensated filtering.
-      if (mb_col > 0) {
-        apply_filter_col = !((denoiser->denoise_state[block_index] ==
-            denoiser->denoise_state[block_index - 1]) &&
-            denoiser->denoise_state[block_index] != kFilterNonZeroMV);
-        if (apply_filter_col) {
-          // Filter left vertical edge.
-          apply_filter = 1;
-          vp8_loop_filter_mbv(
-              denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
-              NULL, NULL, y_stride, uv_stride, &lfi);
-        }
-      }
-      if (mb_row > 0) {
-        apply_filter_row = !((denoiser->denoise_state[block_index] ==
-            denoiser->denoise_state[block_index - denoiser->num_mb_cols]) &&
-            denoiser->denoise_state[block_index] != kFilterNonZeroMV);
-        if (apply_filter_row) {
-          // Filter top horizontal edge.
-          apply_filter = 1;
-          vp8_loop_filter_mbh(
-              denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
-              NULL, NULL, y_stride, uv_stride, &lfi);
-        }
-      }
-      if (apply_filter) {
-        // Update the signal block |x|. Pixel changes are only to top and/or
-        // left boundary pixels: can we avoid full block copy here.
-        vp8_copy_mem16x16(
-            denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
-            y_stride, x->thismb, 16);
-      }
    }
 }
--- a/vp8/encoder/denoising.h
+++ b/vp8/encoder/denoising.h
@@ -12,7 +12,6 @@
 #define VP8_ENCODER_DENOISING_H_

 #include "block.h"
-#include "vp8/common/loopfilter.h"

 #ifdef __cplusplus
 extern "C" {
@@ -22,33 +21,19 @@ extern "C" {
 #define SUM_DIFF_THRESHOLD_HIGH (16 * 16 * 3)
 #define MOTION_MAGNITUDE_THRESHOLD (8*3)

-#define SUM_DIFF_THRESHOLD_UV (96)   // (8 * 8 * 1.5)
-#define SUM_DIFF_THRESHOLD_HIGH_UV (8 * 8 * 2)
-#define SUM_DIFF_FROM_AVG_THRESH_UV (8 * 8 * 4)
-#define MOTION_MAGNITUDE_THRESHOLD_UV (8*3)
-
 enum vp8_denoiser_decision
 {
  COPY_BLOCK,
  FILTER_BLOCK
 };

-enum vp8_denoiser_filter_state {
-  kNoFilter,
-  kFilterZeroMV,
-  kFilterNonZeroMV
-};
-
 typedef struct vp8_denoiser
 {
    YV12_BUFFER_CONFIG yv12_running_avg[MAX_REF_FRAMES];
    YV12_BUFFER_CONFIG yv12_mc_running_avg;
-    unsigned char* denoise_state;
-    int num_mb_cols;
 } VP8_DENOISER;

-int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
-                          int num_mb_rows, int num_mb_cols);
+int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height);

 void vp8_denoiser_free(VP8_DENOISER *denoiser);

@@ -57,11 +42,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
                             unsigned int best_sse,
                             unsigned int zero_mv_sse,
                             int recon_yoffset,
-                             int recon_uvoffset,
-                             loop_filter_info_n *lfi_n,
-                             int mb_row,
-                             int mb_col,
-                             int block_index);
+                             int recon_uvoffset);

 #ifdef __cplusplus
 }  // extern "C"
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1246,7 +1246,7 @@ int vp8cx_encode_inter_macroblock
            x->zbin_mode_boost_enabled = 0;
        }
        vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
-                               &distortion, &intra_error, mb_row, mb_col);
+                               &distortion, &intra_error);

        /* switch back to the regular quantizer for the encode */
        if (cpi->sf.improved_quant)
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -98,9 +98,6 @@ extern double vp8_calc_ssimg
 #ifdef OUTPUT_YUV_SRC
 FILE *yuv_file;
 #endif
-#ifdef OUTPUT_YUV_DENOISED
-FILE *yuv_denoised_file;
-#endif

 #if 0
 FILE *framepsnr;
@@ -1751,8 +1748,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
      {
        int width = (cpi->oxcf.Width + 15) & ~15;
        int height = (cpi->oxcf.Height + 15) & ~15;
-        vp8_denoiser_allocate(&cpi->denoiser, width, height,
-                              cpi->common.mb_rows, cpi->common.mb_cols);
+        vp8_denoiser_allocate(&cpi->denoiser, width, height);
      }
    }
 #endif
@@ -1965,9 +1961,6 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
 #ifdef OUTPUT_YUV_SRC
    yuv_file = fopen("bd.yuv", "ab");
 #endif
-#ifdef OUTPUT_YUV_DENOISED
-    yuv_denoised_file = fopen("denoised.yuv", "ab");
-#endif

 #if 0
    framepsnr = fopen("framepsnr.stt", "a");
@@ -2417,9 +2410,6 @@ void vp8_remove_compressor(VP8_COMP **ptr)
 #ifdef OUTPUT_YUV_SRC
    fclose(yuv_file);
 #endif
-#ifdef OUTPUT_YUV_DENOISED
-    fclose(yuv_denoised_file);
-#endif

 #if 0

@@ -2620,7 +2610,7 @@ int vp8_update_entropy(VP8_COMP *cpi, int update)
 }


-#if defined(OUTPUT_YUV_SRC) || defined(OUTPUT_YUV_DENOISED)
+#if OUTPUT_YUV_SRC
 void vp8_write_yuv_frame(FILE *yuv_file, YV12_BUFFER_CONFIG *s)
 {
    unsigned char *src = s->y_buffer;
@@ -4440,11 +4430,6 @@ static void encode_frame_to_data_rate

    update_reference_frames(cpi);

-#ifdef OUTPUT_YUV_DENOISED
-    vp8_write_yuv_frame(yuv_denoised_file,
-                        &cpi->denoiser.yv12_running_avg[INTRA_FRAME]);
-#endif
-
 #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
    if (cpi->oxcf.error_resilient_mode)
    {
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -1168,7 +1168,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
 #if CONFIG_TEMPORAL_DENOISING
    if (cpi->oxcf.noise_sensitivity)
    {
-        int block_index = mb_row * cpi->common.mb_cols + mb_col;
        if (x->best_sse_inter_mode == DC_PRED)
        {
            /* No best MV found. */
@@ -1180,9 +1179,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
        }
        x->increase_denoising = 0;
        vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
-                                recon_yoffset, recon_uvoffset,
-                                &cpi->common.lf_info, mb_row, mb_col,
-                                block_index);
+                                recon_yoffset, recon_uvoffset);


        /* Reevaluate ZEROMV after denoising. */
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1935,8 +1935,7 @@ static void update_best_mode(BEST_MODE* best_mode, int this_rd,

 void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
                            int recon_uvoffset, int *returnrate,
-                            int *returndistortion, int *returnintra,
-                            int mb_row, int mb_col)
+                            int *returndistortion, int *returnintra)
 {
    BLOCK *b = &x->block[0];
    BLOCKD *d = &x->e_mbd.block[0];
@@ -2511,7 +2510,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
 #if CONFIG_TEMPORAL_DENOISING
    if (cpi->oxcf.noise_sensitivity)
    {
-        int block_index = mb_row * cpi->common.mb_cols + mb_col;
        if (x->best_sse_inter_mode == DC_PRED)
        {
            /* No best MV found. */
@@ -2522,9 +2520,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
            best_sse = best_rd_sse;
        }
        vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
-                                recon_yoffset, recon_uvoffset,
-                                &cpi->common.lf_info, mb_row, mb_col,
-                                block_index);
+                                recon_yoffset, recon_uvoffset);


        /* Reevaluate ZEROMV after denoising. */
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -70,10 +70,7 @@ static void insertsortsad(int arr[],int idx[], int len)
 }

 extern void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue);
-extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x,
-                                   int recon_yoffset, int recon_uvoffset,
-                                   int *returnrate, int *returndistortion,
-                                   int *returnintra, int mb_row, int mb_col);
+extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
 extern void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate);


--- a/vp8/encoder/x86/denoising_sse2.c
+++ b/vp8/encoder/x86/denoising_sse2.c
@@ -17,23 +17,10 @@
 #include <emmintrin.h>
 #include "vpx_ports/emmintrin_compat.h"

-/* Compute the sum of all pixel differences of this MB. */
-static INLINE unsigned int abs_sum_diff_16x1(__m128i acc_diff) {
-  const __m128i k_1 = _mm_set1_epi16(1);
-  const __m128i acc_diff_lo = _mm_srai_epi16(
-      _mm_unpacklo_epi8(acc_diff, acc_diff), 8);
-  const __m128i acc_diff_hi = _mm_srai_epi16(
-      _mm_unpackhi_epi8(acc_diff, acc_diff), 8);
-  const __m128i acc_diff_16 = _mm_add_epi16(acc_diff_lo, acc_diff_hi);
-  const __m128i hg_fe_dc_ba = _mm_madd_epi16(acc_diff_16, k_1);
-  const __m128i hgfe_dcba = _mm_add_epi32(hg_fe_dc_ba,
-                                          _mm_srli_si128(hg_fe_dc_ba, 8));
-  const __m128i hgfedcba = _mm_add_epi32(hgfe_dcba,
-                                         _mm_srli_si128(hgfe_dcba, 4));
-  unsigned int sum_diff = _mm_cvtsi128_si32(hgfedcba);
-
-  return abs(sum_diff);
-}
+union sum_union {
+    __m128i v;
+    signed char e[16];
+};

 int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,
                             int mc_avg_y_stride,
@@ -44,7 +31,7 @@ int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,
 {
    unsigned char *running_avg_y_start = running_avg_y;
    unsigned char *sig_start = sig;
-    unsigned int sum_diff_thresh;
+    int sum_diff_thresh;
    int r;
    int shift_inc  = (increase_denoising &&
        motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0;
@@ -116,10 +103,16 @@ int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,

    {
        /* Compute the sum of all pixel differences of this MB. */
-        unsigned int abs_sum_diff = abs_sum_diff_16x1(acc_diff);
+        union sum_union s;
+        int sum_diff = 0;
+        s.v = acc_diff;
+        sum_diff = s.e[0] + s.e[1] + s.e[2] + s.e[3] + s.e[4] + s.e[5]
+                 + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11]
+                 + s.e[12] + s.e[13] + s.e[14] + s.e[15];
+
        sum_diff_thresh = SUM_DIFF_THRESHOLD;
        if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
-        if (abs_sum_diff > sum_diff_thresh) {
+        if (abs(sum_diff) > sum_diff_thresh) {
          // Before returning to copy the block (i.e., apply no denoising),
          // checK if we can still apply some (weaker) temporal filtering to
          // this block, that would otherwise not be denoised at all. Simplest
@@ -130,7 +123,7 @@ int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,

          // The delta is set by the excess of absolute pixel diff over the
          // threshold.
-          int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1;
+          int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1;
          // Only apply the adjustment for max delta up to 3.
          if (delta < 4) {
            const __m128i k_delta = _mm_set1_epi8(delta);
@@ -169,9 +162,16 @@ int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,
             mc_running_avg_y += mc_avg_y_stride;
             running_avg_y += avg_y_stride;
            }
-            abs_sum_diff = abs_sum_diff_16x1(acc_diff);
-            if (abs_sum_diff > sum_diff_thresh) {
-              return COPY_BLOCK;
+            {
+              // Update the sum of all pixel differences of this MB.
+              union sum_union s;
+              s.v = acc_diff;
+              sum_diff = s.e[0] + s.e[1] + s.e[2] + s.e[3] + s.e[4] + s.e[5]
+                       + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11]
+                       + s.e[12] + s.e[13] + s.e[14] + s.e[15];
+              if (abs(sum_diff) > sum_diff_thresh) {
+                return COPY_BLOCK;
+              }
            }
          } else {
            return COPY_BLOCK;
@@ -182,198 +182,3 @@ int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,
    vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride);
    return FILTER_BLOCK;
 }
-
-int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg,
-                             int mc_avg_stride,
-                             unsigned char *running_avg, int avg_stride,
-                             unsigned char *sig, int sig_stride,
-                             unsigned int motion_magnitude,
-                             int increase_denoising) {
-    unsigned char *running_avg_start = running_avg;
-    unsigned char *sig_start = sig;
-    unsigned int sum_diff_thresh;
-    int r;
-    int shift_inc  = (increase_denoising &&
-        motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 1 : 0;
-    __m128i acc_diff = _mm_setzero_si128();
-    const __m128i k_0 = _mm_setzero_si128();
-    const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
-    const __m128i k_8 = _mm_set1_epi8(8);
-    const __m128i k_16 = _mm_set1_epi8(16);
-    /* Modify each level's adjustment according to motion_magnitude. */
-    const __m128i l3 = _mm_set1_epi8(
-                       (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ?
-                        7 + shift_inc : 6);
-    /* Difference between level 3 and level 2 is 2. */
-    const __m128i l32 = _mm_set1_epi8(2);
-    /* Difference between level 2 and level 1 is 1. */
-    const __m128i l21 = _mm_set1_epi8(1);
-
-    {
-      const __m128i k_1 = _mm_set1_epi16(1);
-      __m128i vec_sum_block = _mm_setzero_si128();
-
-      // Avoid denoising color signal if its close to average level.
-      for (r = 0; r < 8; ++r) {
-        const __m128i v_sig = _mm_loadl_epi64((__m128i *)(&sig[0]));
-        const __m128i v_sig_unpack = _mm_unpacklo_epi8(v_sig, k_0);
-        vec_sum_block = _mm_add_epi16(vec_sum_block, v_sig_unpack);
-        sig += sig_stride;
-      }
-      sig -= sig_stride * 8;
-      {
-        const __m128i hg_fe_dc_ba = _mm_madd_epi16(vec_sum_block, k_1);
-        const __m128i hgfe_dcba = _mm_add_epi32(hg_fe_dc_ba,
-                                                _mm_srli_si128(hg_fe_dc_ba, 8));
-        const __m128i hgfedcba = _mm_add_epi32(hgfe_dcba,
-                                               _mm_srli_si128(hgfe_dcba, 4));
-        const int sum_block = _mm_cvtsi128_si32(hgfedcba);
-        if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) {
-          return COPY_BLOCK;
-        }
-      }
-    }
-
-    for (r = 0; r < 4; ++r) {
-        /* Calculate differences */
-        const __m128i v_sig_low = _mm_castpd_si128(
-            _mm_load_sd((double *)(&sig[0])));
-        const __m128i v_sig = _mm_castpd_si128(
-            _mm_loadh_pd(_mm_castsi128_pd(v_sig_low),
-                         (double *)(&sig[sig_stride])));
-        const __m128i v_mc_running_avg_low = _mm_castpd_si128(
-            _mm_load_sd((double *)(&mc_running_avg[0])));
-        const __m128i v_mc_running_avg = _mm_castpd_si128(
-            _mm_loadh_pd(_mm_castsi128_pd(v_mc_running_avg_low),
-                         (double *)(&mc_running_avg[mc_avg_stride])));
-        const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg, v_sig);
-        const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg);
-        /* Obtain the sign. FF if diff is negative. */
-        const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
-        /* Clamp absolute difference to 16 to be used to get mask. Doing this
-         * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */
-        const __m128i clamped_absdiff = _mm_min_epu8(
-                                        _mm_or_si128(pdiff, ndiff), k_16);
-        /* Get masks for l2 l1 and l0 adjustments */
-        const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff);
-        const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff);
-        const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff);
-        /* Get adjustments for l2, l1, and l0 */
-        __m128i adj2 = _mm_and_si128(mask2, l32);
-        const __m128i adj1 = _mm_and_si128(mask1, l21);
-        const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
-        __m128i adj,  padj, nadj;
-        __m128i v_running_avg;
-
-        /* Combine the adjustments and get absolute adjustments. */
-        adj2 = _mm_add_epi8(adj2, adj1);
-        adj = _mm_sub_epi8(l3, adj2);
-        adj = _mm_andnot_si128(mask0, adj);
-        adj = _mm_or_si128(adj, adj0);
-
-        /* Restore the sign and get positive and negative adjustments. */
-        padj = _mm_andnot_si128(diff_sign, adj);
-        nadj = _mm_and_si128(diff_sign, adj);
-
-        /* Calculate filtered value. */
-        v_running_avg = _mm_adds_epu8(v_sig, padj);
-        v_running_avg = _mm_subs_epu8(v_running_avg, nadj);
-
-        _mm_storel_pd((double *)&running_avg[0],
-                      _mm_castsi128_pd(v_running_avg));
-        _mm_storeh_pd((double *)&running_avg[avg_stride],
-                      _mm_castsi128_pd(v_running_avg));
-
-        /* Adjustments <=7, and each element in acc_diff can fit in signed
-         * char.
-         */
-        acc_diff = _mm_adds_epi8(acc_diff, padj);
-        acc_diff = _mm_subs_epi8(acc_diff, nadj);
-
-        /* Update pointers for next iteration. */
-        sig += sig_stride * 2;
-        mc_running_avg += mc_avg_stride * 2;
-        running_avg += avg_stride * 2;
-    }
-
-    {
-        unsigned int abs_sum_diff = abs_sum_diff_16x1(acc_diff);
-        sum_diff_thresh = SUM_DIFF_THRESHOLD_UV;
-        if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV;
-        if (abs_sum_diff > sum_diff_thresh) {
-          // Before returning to copy the block (i.e., apply no denoising),
-          // checK if we can still apply some (weaker) temporal filtering to
-          // this block, that would otherwise not be denoised at all. Simplest
-          // is to apply an additional adjustment to running_avg_y to bring it
-          // closer to sig. The adjustment is capped by a maximum delta, and
-          // chosen such that in most cases the resulting sum_diff will be
-          // within the accceptable range given by sum_diff_thresh.
-
-          // The delta is set by the excess of absolute pixel diff over the
-          // threshold.
-          int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1;
-          // Only apply the adjustment for max delta up to 3.
-          if (delta < 4) {
-            const __m128i k_delta = _mm_set1_epi8(delta);
-            sig -= sig_stride * 8;
-            mc_running_avg -= mc_avg_stride * 8;
-            running_avg -= avg_stride * 8;
-            for (r = 0; r < 4; ++r) {
-              // Calculate differences.
-              const __m128i v_sig_low = _mm_castpd_si128(
-                  _mm_load_sd((double *)(&sig[0])));
-              const __m128i v_sig = _mm_castpd_si128(
-                  _mm_loadh_pd(_mm_castsi128_pd(v_sig_low),
-                               (double *)(&sig[sig_stride])));
-              const __m128i v_mc_running_avg_low = _mm_castpd_si128(
-                  _mm_load_sd((double *)(&mc_running_avg[0])));
-              const __m128i v_mc_running_avg = _mm_castpd_si128(
-                  _mm_loadh_pd(_mm_castsi128_pd(v_mc_running_avg_low),
-                               (double *)(&mc_running_avg[mc_avg_stride])));
-              const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg, v_sig);
-              const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg);
-              // Obtain the sign. FF if diff is negative.
-              const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
-              // Clamp absolute difference to delta to get the adjustment.
-              const __m128i adj =
-                  _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
-              // Restore the sign and get positive and negative adjustments.
-              __m128i padj, nadj;
-              const __m128i v_running_avg_low = _mm_castpd_si128(
-                  _mm_load_sd((double *)(&running_avg[0])));
-              __m128i v_running_avg = _mm_castpd_si128(
-                  _mm_loadh_pd(_mm_castsi128_pd(v_running_avg_low),
-                               (double *)(&running_avg[avg_stride])));
-              padj = _mm_andnot_si128(diff_sign, adj);
-              nadj = _mm_and_si128(diff_sign, adj);
-              // Calculate filtered value.
-              v_running_avg = _mm_subs_epu8(v_running_avg, padj);
-              v_running_avg = _mm_adds_epu8(v_running_avg, nadj);
-
-              _mm_storel_pd((double *)&running_avg[0],
-                            _mm_castsi128_pd(v_running_avg));
-              _mm_storeh_pd((double *)&running_avg[avg_stride],
-                            _mm_castsi128_pd(v_running_avg));
-
-             // Accumulate the adjustments.
-             acc_diff = _mm_subs_epi8(acc_diff, padj);
-             acc_diff = _mm_adds_epi8(acc_diff, nadj);
-
-             // Update pointers for next iteration.
-             sig += sig_stride * 2;
-             mc_running_avg += mc_avg_stride * 2;
-             running_avg += avg_stride * 2;
-            }
-            abs_sum_diff = abs_sum_diff_16x1(acc_diff);
-            if (abs_sum_diff > sum_diff_thresh) {
-              return COPY_BLOCK;
-            }
-          } else {
-            return COPY_BLOCK;
-          }
-        }
-    }
-
-    vp8_copy_mem8x8(running_avg_start, avg_stride, sig_start, sig_stride);
-    return FILTER_BLOCK;
-}
--- a/vp8/encoder/x86/quantize_sse2.c
+++ b/vp8/encoder/x86/quantize_sse2.c
@@ -26,10 +26,11 @@
        int cmp = (x[z] < boost) | (y[z] == 0); \
        zbin_boost_ptr++; \
        if (cmp) \
-            break; \
+            goto select_eob_end_##i; \
        qcoeff_ptr[z] = y[z]; \
        eob = i; \
        zbin_boost_ptr = b->zrun_zbin_boost; \
+        select_eob_end_##i:; \
    } while (0)

 void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)
--- a/vp8/encoder/x86/quantize_sse4.asm
+++ b/vp8/encoder/x86/quantize_sse4.asm
@@ -0,0 +1,256 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+%include "vp8_asm_enc_offsets.asm"
+
+
+; void vp8_regular_quantize_b_sse4 | arg
+;  (BLOCK  *b,                     |  0
+;   BLOCKD *d)                     |  1
+
+global sym(vp8_regular_quantize_b_sse4) PRIVATE
+sym(vp8_regular_quantize_b_sse4):
+
+%if ABI_IS_32BIT
+    push        rbp
+    mov         rbp, rsp
+    GET_GOT     rbx
+    push        rdi
+    push        rsi
+
+    ALIGN_STACK 16, rax
+    %define qcoeff      0 ; 32
+    %define stack_size 32
+    sub         rsp, stack_size
+%else
+  %if LIBVPX_YASM_WIN64
+    SAVE_XMM 8, u
+    push        rdi
+    push        rsi
+  %endif
+%endif
+    ; end prolog
+
+%if ABI_IS_32BIT
+    mov         rdi, arg(0)                 ; BLOCK *b
+    mov         rsi, arg(1)                 ; BLOCKD *d
+%else
+  %if LIBVPX_YASM_WIN64
+    mov         rdi, rcx                    ; BLOCK *b
+    mov         rsi, rdx                    ; BLOCKD *d
+  %else
+    ;mov         rdi, rdi                    ; BLOCK *b
+    ;mov         rsi, rsi                    ; BLOCKD *d
+  %endif
+%endif
+
+    mov         rax, [rdi + vp8_block_coeff]
+    mov         rcx, [rdi + vp8_block_zbin]
+    mov         rdx, [rdi + vp8_block_round]
+    movd        xmm7, [rdi + vp8_block_zbin_extra]
+
+    ; z
+    movdqa      xmm0, [rax]
+    movdqa      xmm1, [rax + 16]
+
+    ; duplicate zbin_oq_value
+    pshuflw     xmm7, xmm7, 0
+    punpcklwd   xmm7, xmm7
+
+    movdqa      xmm2, xmm0
+    movdqa      xmm3, xmm1
+
+    ; sz
+    psraw       xmm0, 15
+    psraw       xmm1, 15
+
+    ; (z ^ sz)
+    pxor        xmm2, xmm0
+    pxor        xmm3, xmm1
+
+    ; x = abs(z)
+    psubw       xmm2, xmm0
+    psubw       xmm3, xmm1
+
+    ; zbin
+    movdqa      xmm4, [rcx]
+    movdqa      xmm5, [rcx + 16]
+
+    ; *zbin_ptr + zbin_oq_value
+    paddw       xmm4, xmm7
+    paddw       xmm5, xmm7
+
+    movdqa      xmm6, xmm2
+    movdqa      xmm7, xmm3
+
+    ; x - (*zbin_ptr + zbin_oq_value)
+    psubw       xmm6, xmm4
+    psubw       xmm7, xmm5
+
+    ; round
+    movdqa      xmm4, [rdx]
+    movdqa      xmm5, [rdx + 16]
+
+    mov         rax, [rdi + vp8_block_quant_shift]
+    mov         rcx, [rdi + vp8_block_quant]
+    mov         rdx, [rdi + vp8_block_zrun_zbin_boost]
+
+    ; x + round
+    paddw       xmm2, xmm4
+    paddw       xmm3, xmm5
+
+    ; quant
+    movdqa      xmm4, [rcx]
+    movdqa      xmm5, [rcx + 16]
+
+    ; y = x * quant_ptr >> 16
+    pmulhw      xmm4, xmm2
+    pmulhw      xmm5, xmm3
+
+    ; y += x
+    paddw       xmm2, xmm4
+    paddw       xmm3, xmm5
+
+    pxor        xmm4, xmm4
+%if ABI_IS_32BIT
+    movdqa      [rsp + qcoeff], xmm4
+    movdqa      [rsp + qcoeff + 16], xmm4
+%else
+    pxor        xmm8, xmm8
+%endif
+
+    ; quant_shift
+    movdqa      xmm5, [rax]
+
+    ; zrun_zbin_boost
+    mov         rax, rdx
+
+%macro ZIGZAG_LOOP 5
+    ; x
+    pextrw      ecx, %4, %2
+
+    ; if (x >= zbin)
+    sub         cx, WORD PTR[rdx]           ; x - zbin
+    lea         rdx, [rdx + 2]              ; zbin_boost_ptr++
+    jl          .rq_zigzag_loop_%1          ; x < zbin
+
+    pextrw      edi, %3, %2                 ; y
+
+    ; downshift by quant_shift[rc]
+    pextrb      ecx, xmm5, %1               ; quant_shift[rc]
+    sar         edi, cl                     ; also sets Z bit
+    je          .rq_zigzag_loop_%1          ; !y
+%if ABI_IS_32BIT
+    mov         WORD PTR[rsp + qcoeff + %1 *2], di
+%else
+    pinsrw      %5, edi, %2                 ; qcoeff[rc]
+%endif
+    mov         rdx, rax                    ; reset to b->zrun_zbin_boost
+.rq_zigzag_loop_%1:
+%endmacro
+; in vp8_default_zig_zag1d order: see vp8/common/entropy.c
+ZIGZAG_LOOP  0, 0, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  1, 1, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  4, 4, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  8, 0, xmm3, xmm7, xmm8
+ZIGZAG_LOOP  5, 5, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  2, 2, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  3, 3, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  6, 6, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  9, 1, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8
+ZIGZAG_LOOP  7, 7, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
+
+    mov         rcx, [rsi + vp8_blockd_dequant]
+    mov         rdi, [rsi + vp8_blockd_dqcoeff]
+
+%if ABI_IS_32BIT
+    movdqa      xmm4, [rsp + qcoeff]
+    movdqa      xmm5, [rsp + qcoeff + 16]
+%else
+    %define     xmm5 xmm8
+%endif
+
+    ; y ^ sz
+    pxor        xmm4, xmm0
+    pxor        xmm5, xmm1
+    ; x = (y ^ sz) - sz
+    psubw       xmm4, xmm0
+    psubw       xmm5, xmm1
+
+    ; dequant
+    movdqa      xmm0, [rcx]
+    movdqa      xmm1, [rcx + 16]
+
+    mov         rcx, [rsi + vp8_blockd_qcoeff]
+
+    pmullw      xmm0, xmm4
+    pmullw      xmm1, xmm5
+
+    ; store qcoeff
+    movdqa      [rcx], xmm4
+    movdqa      [rcx + 16], xmm5
+
+    ; store dqcoeff
+    movdqa      [rdi], xmm0
+    movdqa      [rdi + 16], xmm1
+
+    mov         rcx, [rsi + vp8_blockd_eob]
+
+    ; select the last value (in zig_zag order) for EOB
+    pxor        xmm6, xmm6
+    pcmpeqw     xmm4, xmm6
+    pcmpeqw     xmm5, xmm6
+
+    packsswb    xmm4, xmm5
+    pshufb      xmm4, [GLOBAL(zig_zag1d)]
+    pmovmskb    edx, xmm4
+    xor         rdi, rdi
+    mov         eax, -1
+    xor         dx, ax
+    bsr         eax, edx
+    sub         edi, edx
+    sar         edi, 31
+    add         eax, 1
+    and         eax, edi
+
+    mov         BYTE PTR [rcx], al          ; store eob
+
+    ; begin epilog
+%if ABI_IS_32BIT
+    add         rsp, stack_size
+    pop         rsp
+
+    pop         rsi
+    pop         rdi
+    RESTORE_GOT
+    pop         rbp
+%else
+  %undef xmm5
+  %if LIBVPX_YASM_WIN64
+    pop         rsi
+    pop         rdi
+    RESTORE_XMM
+  %endif
+%endif
+
+    ret
+
+SECTION_RODATA
+align 16
+; vp8/common/entropy.c: vp8_default_zig_zag1d
+zig_zag1d:
+    db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
--- a/vp8/encoder/x86/quantize_sse4.c
+++ b/vp8/encoder/x86/quantize_sse4.c
@@ -1,128 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include <smmintrin.h> /* SSE4.1 */
-
-#include "./vp8_rtcd.h"
-#include "vp8/encoder/block.h"
-#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */
-
-#define SELECT_EOB(i, z, x, y, q) \
-    do { \
-        short boost = *zbin_boost_ptr; \
-        short x_z = _mm_extract_epi16(x, z); \
-        short y_z = _mm_extract_epi16(y, z); \
-        int cmp = (x_z < boost) | (y_z == 0); \
-        zbin_boost_ptr++; \
-        if (cmp) \
-            break; \
-        q = _mm_insert_epi16(q, y_z, z); \
-        eob = i; \
-        zbin_boost_ptr = b->zrun_zbin_boost; \
-    } while (0)
-
-void vp8_regular_quantize_b_sse4_1(BLOCK *b, BLOCKD *d) {
-    char eob = 0;
-    short *zbin_boost_ptr  = b->zrun_zbin_boost;
-
-    __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1,
-            dqcoeff0, dqcoeff1;
-    __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift));
-    __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8));
-    __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
-    __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8));
-    __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra);
-    __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin));
-    __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8));
-    __m128i round0 = _mm_load_si128((__m128i *)(b->round));
-    __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
-    __m128i quant0 = _mm_load_si128((__m128i *)(b->quant));
-    __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8));
-    __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
-    __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
-    __m128i qcoeff0 = _mm_setzero_si128();
-    __m128i qcoeff1 = _mm_setzero_si128();
-
-    /* Duplicate to all lanes. */
-    zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);
-    zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra);
-
-    /* Sign of z: z >> 15 */
-    sz0 = _mm_srai_epi16(z0, 15);
-    sz1 = _mm_srai_epi16(z1, 15);
-
-    /* x = abs(z): (z ^ sz) - sz */
-    x0 = _mm_xor_si128(z0, sz0);
-    x1 = _mm_xor_si128(z1, sz1);
-    x0 = _mm_sub_epi16(x0, sz0);
-    x1 = _mm_sub_epi16(x1, sz1);
-
-    /* zbin[] + zbin_extra */
-    zbin0 = _mm_add_epi16(zbin0, zbin_extra);
-    zbin1 = _mm_add_epi16(zbin1, zbin_extra);
-
-    /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance
-     * the equation because boost is the only value which can change:
-     * x - (zbin[] + extra) >= boost */
-    x_minus_zbin0 = _mm_sub_epi16(x0, zbin0);
-    x_minus_zbin1 = _mm_sub_epi16(x1, zbin1);
-
-    /* All the remaining calculations are valid whether they are done now with
-     * simd or later inside the loop one at a time. */
-    x0 = _mm_add_epi16(x0, round0);
-    x1 = _mm_add_epi16(x1, round1);
-
-    y0 = _mm_mulhi_epi16(x0, quant0);
-    y1 = _mm_mulhi_epi16(x1, quant1);
-
-    y0 = _mm_add_epi16(y0, x0);
-    y1 = _mm_add_epi16(y1, x1);
-
-    /* Instead of shifting each value independently we convert the scaling
-     * factor with 1 << (16 - shift) so we can use multiply/return high half. */
-    y0 = _mm_mulhi_epi16(y0, quant_shift0);
-    y1 = _mm_mulhi_epi16(y1, quant_shift1);
-
-    /* Return the sign: (y ^ sz) - sz */
-    y0 = _mm_xor_si128(y0, sz0);
-    y1 = _mm_xor_si128(y1, sz1);
-    y0 = _mm_sub_epi16(y0, sz0);
-    y1 = _mm_sub_epi16(y1, sz1);
-
-    /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */
-    SELECT_EOB(1, 0, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(2, 1, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(3, 4, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(4, 0, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(5, 5, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(6, 2, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(7, 3, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(8, 6, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(9, 1, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(10, 4, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(11, 5, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(12, 2, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(13, 7, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(14, 3, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(15, 6, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(16, 7, x_minus_zbin1, y1, qcoeff1);
-
-    _mm_store_si128((__m128i *)(d->qcoeff), qcoeff0);
-    _mm_store_si128((__m128i *)(d->qcoeff + 8), qcoeff1);
-
-    dqcoeff0 = _mm_mullo_epi16(qcoeff0, dequant0);
-    dqcoeff1 = _mm_mullo_epi16(qcoeff1, dequant1);
-
-    _mm_store_si128((__m128i *)(d->dqcoeff), dqcoeff0);
-    _mm_store_si128((__m128i *)(d->dqcoeff + 8), dqcoeff1);
-
-    *d->eob = eob;
-}
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -107,6 +107,7 @@ VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/variance_impl_ssse3.asm
 VP8_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/sad_sse4.asm

 ifeq ($(CONFIG_POSTPROC),yes)
+VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/postproc_x86.c
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -60,7 +60,6 @@ struct vpx_codec_alg_priv
    vpx_decrypt_cb          decrypt_cb;
    void                    *decrypt_state;
    vpx_image_t             img;
-    int                     flushed;
    int                     img_setup;
    struct frame_buffers    yv12_frame_buffers;
    void                    *user_priv;
@@ -90,7 +89,6 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx)
    ctx->priv->alg_priv->decrypt_cb = NULL;
    ctx->priv->alg_priv->decrypt_state = NULL;
    ctx->priv->init_flags = ctx->init_flags;
-    ctx->priv->alg_priv->flushed = 0;

    if (ctx->config.dec)
    {
@@ -329,13 +327,6 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t  *ctx,
    unsigned int resolution_change = 0;
    unsigned int w, h;

-    if (data == NULL && data_sz == 0) {
-      ctx->flushed = 1;
-      return VPX_CODEC_OK;
-    }
-
-    /* Reset flushed when receiving a valid frame */
-    ctx->flushed = 0;

    /* Update the input fragment data */
    if(update_fragments(ctx, data, data_sz, &res) <= 0)
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -89,7 +89,6 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
 VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.c
-VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.c

 ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c
@@ -98,6 +97,7 @@ endif
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
+VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
 VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt_x86_64.asm
--- a/vp9/common/arm/neon/vp9_convolve_neon.c
+++ b/vp9/common/arm/neon/vp9_convolve_neon.c
@@ -25,14 +25,12 @@ void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
  // Account for the vertical phase needing 3 lines prior and 4 lines post
  int intermediate_height = h + 7;

-  if (x_step_q4 != 16 || y_step_q4 != 16) {
-    vp9_convolve8_c(src, src_stride,
-                    dst, dst_stride,
-                    filter_x, x_step_q4,
-                    filter_y, y_step_q4,
-                    w, h);
-    return;
-  }
+  if (x_step_q4 != 16 || y_step_q4 != 16)
+    return vp9_convolve8_c(src, src_stride,
+                           dst, dst_stride,
+                           filter_x, x_step_q4,
+                           filter_y, y_step_q4,
+                           w, h);

  /* Filter starting 3 lines back. The neon implementation will ignore the
   * given height and filter a multiple of 4 lines. Since this goes in to
@@ -59,14 +57,12 @@ void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
  DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
  int intermediate_height = h + 7;

-  if (x_step_q4 != 16 || y_step_q4 != 16) {
-    vp9_convolve8_avg_c(src, src_stride,
-                        dst, dst_stride,
-                        filter_x, x_step_q4,
-                        filter_y, y_step_q4,
-                        w, h);
-    return;
-  }
+  if (x_step_q4 != 16 || y_step_q4 != 16)
+    return vp9_convolve8_avg_c(src, src_stride,
+                               dst, dst_stride,
+                               filter_x, x_step_q4,
+                               filter_y, y_step_q4,
+                               w, h);

  /* This implementation has the same issues as above. In addition, we only want
   * to average the values after both passes.
--- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
+++ b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
@@ -9,7 +9,6 @@
 */

 #include "./vp9_rtcd.h"
-#include "vpx/vpx_integer.h"

 void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
                                    const uint8_t *blimit0,
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -12,37 +12,11 @@
 #include "vpx_mem/vpx_mem.h"

 #include "vp9/common/vp9_blockd.h"
-#include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_entropymode.h"
 #include "vp9/common/vp9_entropymv.h"
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_systemdependent.h"

-// TODO(hkuang): Don't need to lock the whole pool after implementing atomic
-// frame reference count.
-void lock_buffer_pool(BufferPool *const pool) {
-#if CONFIG_MULTITHREAD
-  pthread_mutex_lock(&pool->pool_mutex);
-#else
-  (void)pool;
-#endif
-}
-
-void unlock_buffer_pool(BufferPool *const pool) {
-#if CONFIG_MULTITHREAD
-  pthread_mutex_unlock(&pool->pool_mutex);
-#else
-  (void)pool;
-#endif
-}
-
-static INLINE void alloc_mi_array(VP9_COMMON *cm, int mi_size, int idx) {
-  CHECK_MEM_ERROR(cm, cm->mip_array[idx],
-                  vpx_calloc(mi_size, sizeof(*cm->mip_array[0])));
-  CHECK_MEM_ERROR(cm, cm->mi_grid_base_array[idx],
-                  vpx_calloc(mi_size, sizeof(*cm->mi_grid_base_array[0])));
-}
-
 static void clear_mi_border(const VP9_COMMON *cm, MODE_INFO *mi) {
  int i;

@@ -75,47 +49,40 @@ static void setup_mi(VP9_COMMON *cm) {
  vpx_memset(cm->mi_grid_base, 0, cm->mi_stride * (cm->mi_rows + 1) *
                                      sizeof(*cm->mi_grid_base));

-  // Only clear mi border in non frame-parallel decode. In frame-parallel
-  // decode, prev_mip is managed by previous decoding thread. While in
-  // non frame-parallel decode, prev_mip and mip are both managed by
-  // current decoding thread.
-  if (!cm->frame_parallel_decode)
-    clear_mi_border(cm, cm->prev_mip);
+  clear_mi_border(cm, cm->prev_mip);
 }

 static int alloc_mi(VP9_COMMON *cm, int mi_size) {
  int i;

-  for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
-    // Delay reallocation as another thread is accessing prev_mi.
-    if (cm->frame_parallel_decode && i == cm->prev_mi_idx) {
-      cm->update_prev_mi = 1;
-      continue;
-    }
-    alloc_mi_array(cm, mi_size, i);
+  for (i = 0; i < 2; ++i) {
+    cm->mip_array[i] =
+        (MODE_INFO *)vpx_calloc(mi_size, sizeof(*cm->mip));
+    if (cm->mip_array[i] == NULL)
+      return 1;
+
+    cm->mi_grid_base_array[i] =
+        (MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->mi_grid_base));
+    if (cm->mi_grid_base_array[i] == NULL)
+      return 1;
  }

+  // Init the index.
+  cm->mi_idx = 0;
+  cm->prev_mi_idx = 1;
+
  cm->mip = cm->mip_array[cm->mi_idx];
+  cm->prev_mip = cm->mip_array[cm->prev_mi_idx];
  cm->mi_grid_base = cm->mi_grid_base_array[cm->mi_idx];
-
-  if (!cm->frame_parallel_decode) {
-    cm->mi_idx = 0;
-    cm->prev_mi_idx = 1;
-    // In frame-parallel decode, prev_mip comes from another thread,
-    // so current decoding thread should not touch it.
-    cm->prev_mip = cm->mip_array[cm->prev_mi_idx];
-    cm->prev_mi_grid_base = cm->mi_grid_base_array[cm->prev_mi_idx];
-  }
+  cm->prev_mi_grid_base = cm->mi_grid_base_array[cm->prev_mi_idx];

  return 0;
 }

-static void free_mi(VP9_COMMON *cm, int decode_done) {
+static void free_mi(VP9_COMMON *cm) {
  int i;

-  for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
-    if (cm->frame_parallel_decode && i == cm->prev_mi_idx && !decode_done)
-      continue;
+  for (i = 0; i < 2; ++i) {
    vpx_free(cm->mip_array[i]);
    cm->mip_array[i] = NULL;
    vpx_free(cm->mi_grid_base_array[i]);
@@ -123,71 +90,30 @@ static void free_mi(VP9_COMMON *cm, int decode_done) {
  }

  cm->mip = NULL;
+  cm->prev_mip = NULL;
  cm->mi_grid_base = NULL;
-
-  if (!cm->frame_parallel_decode) {
-    cm->prev_mip = NULL;
-    cm->prev_mi_grid_base = NULL;
-  }
-}
-
-static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) {
-  int i;
-
-  for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
-    cm->seg_map_array[i] = (uint8_t *)vpx_calloc(seg_map_size, 1);
-    if (cm->seg_map_array[i] == NULL)
-      return 1;
-  }
-
-  // Init the index.
-  cm->seg_map_idx = 0;
-  cm->prev_seg_map_idx = 1;
-
-  cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
-
-  if (!cm->frame_parallel_decode) {
-    cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
-  }
-
-  return 0;
-}
-
-static void free_seg_map(VP9_COMMON *cm) {
-  int i;
-
-  for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
-    vpx_free(cm->seg_map_array[i]);
-    cm->seg_map_array[i] = NULL;
-  }
-
-  cm->current_frame_seg_map = NULL;
-
-  if (!cm->frame_parallel_decode) {
-    cm->last_frame_seg_map = NULL;
-  }
+  cm->prev_mi_grid_base = NULL;
 }

 void vp9_free_frame_buffers(VP9_COMMON *cm) {
  int i;
-  BufferPool *const pool = cm->buffer_pool;

  for (i = 0; i < FRAME_BUFFERS; ++i) {
-    vp9_free_frame_buffer(&pool->frame_bufs[i].buf);
+    vp9_free_frame_buffer(&cm->frame_bufs[i].buf);

-    if (pool->frame_bufs[i].ref_count > 0 &&
-        pool->frame_bufs[i].raw_frame_buffer.data != NULL) {
-      pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer);
-      pool->frame_bufs[i].ref_count = 0;
+    if (cm->frame_bufs[i].ref_count > 0 &&
+        cm->frame_bufs[i].raw_frame_buffer.data != NULL) {
+      cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer);
+      cm->frame_bufs[i].ref_count = 0;
    }
  }

  vp9_free_frame_buffer(&cm->post_proc_buffer);
-}

-void vp9_free_context_buffers(VP9_COMMON *cm) {
-  free_mi(cm, 1);
-  free_seg_map(cm);
+  free_mi(cm);
+
+  vpx_free(cm->last_frame_seg_map);
+  cm->last_frame_seg_map = NULL;

  vpx_free(cm->above_context);
  cm->above_context = NULL;
@@ -199,27 +125,25 @@ void vp9_free_context_buffers(VP9_COMMON *cm) {
 int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
  const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
  const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
-#if CONFIG_INTERNAL_STATS || CONFIG_VP9_POSTPROC
  const int ss_x = cm->subsampling_x;
  const int ss_y = cm->subsampling_y;

-  // TODO(agrange): this should be conditionally allocated.
  if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
                               VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0)
    goto fail;
-#endif

  set_mb_mi(cm, aligned_width, aligned_height);

-  free_mi(cm, 0);
+  free_mi(cm);
  if (alloc_mi(cm, cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE)))
    goto fail;

  setup_mi(cm);

  // Create the segmentation map structure and set to 0.
-  free_seg_map(cm);
-  if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols))
+  vpx_free(cm->last_frame_seg_map);
+  cm->last_frame_seg_map = (uint8_t *)vpx_calloc(cm->mi_rows * cm->mi_cols, 1);
+  if (!cm->last_frame_seg_map)
    goto fail;

  vpx_free(cm->above_context);
@@ -241,58 +165,36 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {

 fail:
  vp9_free_frame_buffers(cm);
-  vp9_free_context_buffers(cm);
  return 1;
 }

-static void init_frame_bufs(VP9_COMMON *cm) {
-  int i;
-  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
-
-  cm->new_fb_idx = FRAME_BUFFERS - 1;
-  frame_bufs[cm->new_fb_idx].ref_count = 1;
-
-  for (i = 0; i < REF_FRAMES; ++i) {
-    cm->ref_frame_map[i] = i;
-    frame_bufs[i].ref_count = 1;
-  }
-}
-
 int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
-  int i;
+  const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
+  const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
  const int ss_x = cm->subsampling_x;
  const int ss_y = cm->subsampling_y;
-  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+  int i;

  vp9_free_frame_buffers(cm);

-  for (i = 0; i < FRAME_BUFFERS; ++i) {
-    frame_bufs[i].ref_count = 0;
-    if (vp9_alloc_frame_buffer(&frame_bufs[i].buf, width, height,
+  for (i = 0; i < FRAME_BUFFERS; i++) {
+    cm->frame_bufs[i].ref_count = 0;
+    if (vp9_alloc_frame_buffer(&cm->frame_bufs[i].buf, width, height,
                               ss_x, ss_y, VP9_ENC_BORDER_IN_PIXELS) < 0)
      goto fail;
  }

-  init_frame_bufs(cm);
+  cm->new_fb_idx = FRAME_BUFFERS - 1;
+  cm->frame_bufs[cm->new_fb_idx].ref_count = 1;
+
+  for (i = 0; i < REF_FRAMES; i++) {
+    cm->ref_frame_map[i] = i;
+    cm->frame_bufs[i].ref_count = 1;
+  }

-#if CONFIG_INTERNAL_STATS || CONFIG_VP9_POSTPROC
  if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
                             VP9_ENC_BORDER_IN_PIXELS) < 0)
    goto fail;
-#endif
-
-  return 0;
-
- fail:
-  vp9_free_frame_buffers(cm);
-  return 1;
-}
-
-int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
-  const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
-  const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
-
-  vp9_free_context_buffers(cm);

  set_mb_mi(cm, aligned_width, aligned_height);

@@ -322,13 +224,13 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
  return 0;

 fail:
-  vp9_free_context_buffers(cm);
+  vp9_free_frame_buffers(cm);
  return 1;
 }

 void vp9_remove_common(VP9_COMMON *cm) {
  vp9_free_frame_buffers(cm);
-  vp9_free_context_buffers(cm);
+  vp9_free_internal_frame_buffers(&cm->int_frame_buffers);
 }

 void vp9_update_frame_size(VP9_COMMON *cm) {
@@ -339,27 +241,13 @@ void vp9_update_frame_size(VP9_COMMON *cm) {
  setup_mi(cm);

  // Initialize the previous frame segment map to 0.
-  if (cm->current_frame_seg_map)
-    vpx_memset(cm->current_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
+  if (cm->last_frame_seg_map)
+    vpx_memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
 }

 void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
  // Swap indices.
  const int tmp = cm->mi_idx;
-
-  // Only used in frame parallel decode: Update the prev_mi buffer if
-  // needed. The worker that was accessing it must already finish decoding.
-  // So it can be resized safely now.
-  if (cm->update_prev_mi) {
-    const int mi_size = cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE);
-    vpx_free(cm->mip_array[cm->prev_mi_idx]);
-    vpx_free(cm->mi_grid_base_array[cm->prev_mi_idx]);
-    cm->mip_array[cm->prev_mi_idx] = NULL;
-    cm->mi_grid_base_array[cm->prev_mi_idx] = NULL;
-    alloc_mi_array(cm, mi_size, cm->prev_mi_idx);
-    cm->update_prev_mi = 0;
-  }
-
  cm->mi_idx = cm->prev_mi_idx;
  cm->prev_mi_idx = tmp;

@@ -375,13 +263,3 @@ void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
  cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
  cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 }
-
-void vp9_swap_current_and_last_seg_map(VP9_COMMON *cm) {
-  // Swap indices.
-  const int tmp = cm->seg_map_idx;
-  cm->seg_map_idx = cm->prev_seg_map_idx;
-  cm->prev_seg_map_idx = tmp;
-
-  cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
-  cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
-}
--- a/vp9/common/vp9_alloccommon.h
+++ b/vp9/common/vp9_alloccommon.h
@@ -23,19 +23,13 @@ void vp9_remove_common(struct VP9Common *cm);
 int vp9_resize_frame_buffers(struct VP9Common *cm, int width, int height);

 int vp9_alloc_frame_buffers(struct VP9Common *cm, int width, int height);
-int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height);
-int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height);

 void vp9_free_frame_buffers(struct VP9Common *cm);
-void vp9_free_state_buffers(struct VP9Common *cm);
-void vp9_free_context_buffers(struct VP9Common *cm);

 void vp9_update_frame_size(struct VP9Common *cm);

 void vp9_swap_mi_and_prev_mi(struct VP9Common *cm);

-void vp9_swap_current_and_last_seg_map(struct VP9Common *cm);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/vp9/common/vp9_blockd.c
+++ b/vp9/common/vp9_blockd.c
@@ -44,7 +44,7 @@ void vp9_foreach_transformed_block_in_plane(
  // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
  // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
  // transform size varies per plane, look it up in a common way.
-  const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd)
+  const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi)
                                : mbmi->tx_size;
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
  const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -270,20 +270,18 @@ static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,

 void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);

-static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize,
-                                          int xss, int yss) {
+static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize) {
  if (bsize < BLOCK_8X8) {
    return TX_4X4;
  } else {
-    const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss];
+    // TODO(dkovalev): Assuming YUV420 (ss_x == 1, ss_y == 1)
+    const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][1][1];
    return MIN(y_tx_size, max_txsize_lookup[plane_bsize]);
  }
 }

-static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi,
-                                     const struct macroblockd_plane *pd) {
-  return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x,
-                             pd->subsampling_y);
+static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
+  return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type);
 }

 static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -117,25 +117,17 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride,
                     const InterpKernel *const y_filters,
                     int y0_q4, int y_step_q4,
                     int w, int h) {
-  // Note: Fixed size intermediate buffer, temp, places limits on parameters.
-  // 2d filtering proceeds in 2 steps:
-  //   (1) Interpolate horizontally into an intermediate buffer, temp.
-  //   (2) Interpolate temp vertically to derive the sub-pixel result.
-  // Deriving the maximum number of rows in the temp buffer (135):
-  // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
-  // --Largest block size is 64x64 pixels.
-  // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
-  //   original frame (in 1/16th pixel units).
-  // --Must round-up because block may be located at sub-pixel position.
-  // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
-  // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
-  uint8_t temp[135 * 64];
+  // Fixed size intermediate buffer places limits on parameters.
+  // Maximum intermediate_height is 324, for y_step_q4 == 80,
+  // h == 64, taps == 8.
+  // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
+  uint8_t temp[64 * 324];
  int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS;

  assert(w <= 64);
  assert(h <= 64);
-  assert(y_step_q4 <= 32);
-  assert(x_step_q4 <= 32);
+  assert(y_step_q4 <= 80);
+  assert(x_step_q4 <= 80);

  if (intermediate_height < h)
    intermediate_height = h;
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -439,13 +439,9 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
  int i;
  vp9_clearall_segfeatures(&cm->seg);
  cm->seg.abs_delta = SEGMENT_DELTADATA;
-
-  if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
+  if (cm->last_frame_seg_map)
    vpx_memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));

-  if (cm->current_frame_seg_map)
-    vpx_memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
-
  // Reset the mode ref deltas for loop filter
  vp9_zero(lf->last_ref_deltas);
  vp9_zero(lf->last_mode_deltas);
@@ -468,7 +464,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
    cm->frame_contexts[cm->frame_context_idx] = cm->fc;
  }

-  if (frame_is_intra_only(cm) && !cm->frame_parallel_decode)
+  if (frame_is_intra_only(cm))
    vpx_memset(cm->prev_mip, 0, cm->mi_stride * (cm->mi_rows + 1) *
                                    sizeof(*cm->prev_mip));

--- a/vp9/common/vp9_frame_buffers.c
+++ b/vp9/common/vp9_frame_buffers.c
@@ -76,7 +76,6 @@ int vp9_get_frame_buffer(void *cb_priv, size_t min_size,
 int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) {
  InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv;
  (void)cb_priv;
-  if (int_fb)
-    int_fb->in_use = 0;
+  int_fb->in_use = 0;
  return 0;
 }
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -502,7 +502,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
  const MB_MODE_INFO *mbmi = &mi->mbmi;
  const BLOCK_SIZE block_size = mbmi->sb_type;
  const TX_SIZE tx_size_y = mbmi->tx_size;
-  const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1);
+  const TX_SIZE tx_size_uv = get_uv_tx_size(mbmi);
  const int filter_level = get_filter_level(lfi_n, mbmi);
  uint64_t *const left_y = &lfm->left_y[tx_size_y];
  uint64_t *const above_y = &lfm->above_y[tx_size_y];
@@ -939,7 +939,7 @@ static void filter_block_plane_non420(VP9_COMMON *cm,
          !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1;
      const int skip_this_r = skip_this && !block_edge_above;
      const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
-                            ? get_uv_tx_size(&mi[0].mbmi, plane)
+                            ? get_uv_tx_size(&mi[0].mbmi)
                            : mi[0].mbmi.tx_size;
      const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
      const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -11,18 +11,195 @@

 #include "vp9/common/vp9_mvref_common.h"

+#define MVREF_NEIGHBOURS 8
+
+typedef struct position {
+  int row;
+  int col;
+} POSITION;
+
+typedef enum {
+  BOTH_ZERO = 0,
+  ZERO_PLUS_PREDICTED = 1,
+  BOTH_PREDICTED = 2,
+  NEW_PLUS_NON_INTRA = 3,
+  BOTH_NEW = 4,
+  INTRA_PLUS_NON_INTRA = 5,
+  BOTH_INTRA = 6,
+  INVALID_CASE = 9
+} motion_vector_context;
+
+// This is used to figure out a context for the ref blocks. The code flattens
+// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
+// adding 9 for each intra block, 3 for each zero mv and 1 for each new
+// motion vector. This single number is then converted into a context
+// with a single lookup ( counter_to_context ).
+static const int mode_2_counter[MB_MODE_COUNT] = {
+  9,  // DC_PRED
+  9,  // V_PRED
+  9,  // H_PRED
+  9,  // D45_PRED
+  9,  // D135_PRED
+  9,  // D117_PRED
+  9,  // D153_PRED
+  9,  // D207_PRED
+  9,  // D63_PRED
+  9,  // TM_PRED
+  0,  // NEARESTMV
+  0,  // NEARMV
+  3,  // ZEROMV
+  1,  // NEWMV
+};
+
+// There are 3^3 different combinations of 3 counts that can be either 0,1 or
+// 2. However the actual count can never be greater than 2 so the highest
+// counter we need is 18. 9 is an invalid counter that's never used.
+static const int counter_to_context[19] = {
+  BOTH_PREDICTED,  // 0
+  NEW_PLUS_NON_INTRA,  // 1
+  BOTH_NEW,  // 2
+  ZERO_PLUS_PREDICTED,  // 3
+  NEW_PLUS_NON_INTRA,  // 4
+  INVALID_CASE,  // 5
+  BOTH_ZERO,  // 6
+  INVALID_CASE,  // 7
+  INVALID_CASE,  // 8
+  INTRA_PLUS_NON_INTRA,  // 9
+  INTRA_PLUS_NON_INTRA,  // 10
+  INVALID_CASE,  // 11
+  INTRA_PLUS_NON_INTRA,  // 12
+  INVALID_CASE,  // 13
+  INVALID_CASE,  // 14
+  INVALID_CASE,  // 15
+  INVALID_CASE,  // 16
+  INVALID_CASE,  // 17
+  BOTH_INTRA  // 18
+};
+
+static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
+  // 4X4
+  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
+  // 4X8
+  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
+  // 8X4
+  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
+  // 8X8
+  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
+  // 8X16
+  {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}},
+  // 16X8
+  {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}},
+  // 16X16
+  {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
+  // 16X32
+  {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}},
+  // 32X16
+  {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
+  // 32X32
+  {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
+  // 32X64
+  {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}},
+  // 64X32
+  {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
+  // 64X64
+  {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}
+};
+
+static const int idx_n_column_to_subblock[4][2] = {
+  {1, 2},
+  {1, 3},
+  {3, 2},
+  {3, 3}
+};
+
+// clamp_mv_ref
+#define MV_BORDER (16 << 3)  // Allow 16 pels in 1/8th pel units
+
+static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
+  clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
+               xd->mb_to_right_edge + MV_BORDER,
+               xd->mb_to_top_edge - MV_BORDER,
+               xd->mb_to_bottom_edge + MV_BORDER);
+}
+
+// This function returns either the appropriate sub block or block's mv
+// on whether the block_size < 8x8 and we have check_sub_blocks set.
+static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
+                                      int search_col, int block_idx) {
+  return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
+          ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
+              .as_mv[which_mv]
+          : candidate->mbmi.mv[which_mv];
+}
+
+
+// Performs mv sign inversion if indicated by the reference frame combination.
+static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
+                              const MV_REFERENCE_FRAME this_ref_frame,
+                              const int *ref_sign_bias) {
+  int_mv mv = mbmi->mv[ref];
+  if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
+    mv.as_mv.row *= -1;
+    mv.as_mv.col *= -1;
+  }
+  return mv;
+}
+
+// This macro is used to add a motion vector mv_ref list if it isn't
+// already in the list.  If it's the second motion vector it will also
+// skip all additional processing and jump to done!
+#define ADD_MV_REF_LIST(mv) \
+  do { \
+    if (refmv_count) { \
+      if ((mv).as_int != mv_ref_list[0].as_int) { \
+        mv_ref_list[refmv_count] = (mv); \
+        goto Done; \
+      } \
+    } else { \
+      mv_ref_list[refmv_count++] = (mv); \
+    } \
+  } while (0)
+
+// If either reference frame is different, not INTRA, and they
+// are different from each other scale and add the mv to our list.
+#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \
+  do { \
+    if (is_inter_block(mbmi)) { \
+      if ((mbmi)->ref_frame[0] != ref_frame) \
+        ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \
+      if (has_second_ref(mbmi) && \
+          (mbmi)->ref_frame[1] != ref_frame && \
+          (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
+        ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \
+    } \
+  } while (0)
+
+
+// Checks that the given mi_row, mi_col and search point
+// are inside the borders of the tile.
+static INLINE int is_inside(const TileInfo *const tile,
+                            int mi_col, int mi_row, int mi_rows,
+                            const POSITION *mi_pos) {
+  return !(mi_row + mi_pos->row < 0 ||
+           mi_col + mi_pos->col < tile->mi_col_start ||
+           mi_row + mi_pos->row >= mi_rows ||
+           mi_col + mi_pos->col >= tile->mi_col_end);
+}
+
 // This function searches the neighbourhood of a given MB/SB
 // to try and find candidate reference vectors.
 static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                             const TileInfo *const tile,
                             MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                             int_mv *mv_ref_list,
-                             int block, int mi_row, int mi_col,
-                             find_mv_refs_sync sync, void *const data) {
+                             int block, int mi_row, int mi_col) {
  const int *ref_sign_bias = cm->ref_frame_sign_bias;
  int i, refmv_count = 0;
-  MODE_INFO *prev_mi = NULL;
-  MB_MODE_INFO *prev_mbmi = NULL;
+  const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi
+        ? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]
+        : NULL;
+  const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
+

  const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];

@@ -69,14 +246,6 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
    }
  }

-  // Synchronize here for frame parallel decode if sync function is provided.
-  if (sync != NULL) {
-    sync(data, mi_row);
-  }
-  prev_mi = cm->coding_use_prev_mi && cm->prev_mi ?
-            cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col] : NULL;
-  prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
-
  // Check the last frame's mode and mv info.
  if (prev_mbmi) {
    if (prev_mbmi->ref_frame[0] == ref_frame)
@@ -115,13 +284,12 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
 }

 void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
-                      const TileInfo *const tile,
-                      MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
-                      int_mv *mv_ref_list,
-                      int mi_row, int mi_col,
-                      find_mv_refs_sync sync, void *const data) {
+                                    const TileInfo *const tile,
+                                    MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+                                    int_mv *mv_ref_list,
+                                    int mi_row, int mi_col) {
  find_mv_refs_idx(cm, xd, tile, mi, ref_frame, mv_ref_list, -1,
-                   mi_row, mi_col, sync, data);
+                   mi_row, mi_col);
 }

 static void lower_mv_precision(MV *mv, int allow_hp) {
@@ -159,7 +327,7 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
  assert(MAX_MV_REF_CANDIDATES == 2);

  find_mv_refs_idx(cm, xd, tile, mi, mi->mbmi.ref_frame[ref], mv_list, block,
-                   mi_row, mi_col, NULL, NULL);
+                   mi_row, mi_col);

  near->as_int = 0;
  switch (block) {
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -21,181 +21,6 @@ extern "C" {
 #define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\
                                VP9_INTERP_EXTEND) << 3)

-#define MVREF_NEIGHBOURS 8
-
-typedef struct position {
-  int row;
-  int col;
-} POSITION;
-
-typedef enum {
-  BOTH_ZERO = 0,
-  ZERO_PLUS_PREDICTED = 1,
-  BOTH_PREDICTED = 2,
-  NEW_PLUS_NON_INTRA = 3,
-  BOTH_NEW = 4,
-  INTRA_PLUS_NON_INTRA = 5,
-  BOTH_INTRA = 6,
-  INVALID_CASE = 9
-} motion_vector_context;
-
-// This is used to figure out a context for the ref blocks. The code flattens
-// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
-// adding 9 for each intra block, 3 for each zero mv and 1 for each new
-// motion vector. This single number is then converted into a context
-// with a single lookup ( counter_to_context ).
-static const int mode_2_counter[MB_MODE_COUNT] = {
-  9,  // DC_PRED
-  9,  // V_PRED
-  9,  // H_PRED
-  9,  // D45_PRED
-  9,  // D135_PRED
-  9,  // D117_PRED
-  9,  // D153_PRED
-  9,  // D207_PRED
-  9,  // D63_PRED
-  9,  // TM_PRED
-  0,  // NEARESTMV
-  0,  // NEARMV
-  3,  // ZEROMV
-  1,  // NEWMV
-};
-
-// There are 3^3 different combinations of 3 counts that can be either 0,1 or
-// 2. However the actual count can never be greater than 2 so the highest
-// counter we need is 18. 9 is an invalid counter that's never used.
-static const int counter_to_context[19] = {
-  BOTH_PREDICTED,  // 0
-  NEW_PLUS_NON_INTRA,  // 1
-  BOTH_NEW,  // 2
-  ZERO_PLUS_PREDICTED,  // 3
-  NEW_PLUS_NON_INTRA,  // 4
-  INVALID_CASE,  // 5
-  BOTH_ZERO,  // 6
-  INVALID_CASE,  // 7
-  INVALID_CASE,  // 8
-  INTRA_PLUS_NON_INTRA,  // 9
-  INTRA_PLUS_NON_INTRA,  // 10
-  INVALID_CASE,  // 11
-  INTRA_PLUS_NON_INTRA,  // 12
-  INVALID_CASE,  // 13
-  INVALID_CASE,  // 14
-  INVALID_CASE,  // 15
-  INVALID_CASE,  // 16
-  INVALID_CASE,  // 17
-  BOTH_INTRA  // 18
-};
-
-static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
-  // 4X4
-  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
-  // 4X8
-  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
-  // 8X4
-  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
-  // 8X8
-  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
-  // 8X16
-  {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}},
-  // 16X8
-  {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}},
-  // 16X16
-  {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
-  // 16X32
-  {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}},
-  // 32X16
-  {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
-  // 32X32
-  {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
-  // 32X64
-  {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}},
-  // 64X32
-  {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
-  // 64X64
-  {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}
-};
-
-static const int idx_n_column_to_subblock[4][2] = {
-  {1, 2},
-  {1, 3},
-  {3, 2},
-  {3, 3}
-};
-
-// clamp_mv_ref
-#define MV_BORDER (16 << 3)  // Allow 16 pels in 1/8th pel units
-
-static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
-  clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
-               xd->mb_to_right_edge + MV_BORDER,
-               xd->mb_to_top_edge - MV_BORDER,
-               xd->mb_to_bottom_edge + MV_BORDER);
-}
-
-// This function returns either the appropriate sub block or block's mv
-// on whether the block_size < 8x8 and we have check_sub_blocks set.
-static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
-                                      int search_col, int block_idx) {
-  return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
-          ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
-              .as_mv[which_mv]
-          : candidate->mbmi.mv[which_mv];
-}
-
-
-// Performs mv sign inversion if indicated by the reference frame combination.
-static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
-                              const MV_REFERENCE_FRAME this_ref_frame,
-                              const int *ref_sign_bias) {
-  int_mv mv = mbmi->mv[ref];
-  if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
-    mv.as_mv.row *= -1;
-    mv.as_mv.col *= -1;
-  }
-  return mv;
-}
-
-// This macro is used to add a motion vector mv_ref list if it isn't
-// already in the list.  If it's the second motion vector it will also
-// skip all additional processing and jump to done!
-#define ADD_MV_REF_LIST(mv) \
-  do { \
-    if (refmv_count) { \
-      if ((mv).as_int != mv_ref_list[0].as_int) { \
-        mv_ref_list[refmv_count] = (mv); \
-        goto Done; \
-      } \
-    } else { \
-      mv_ref_list[refmv_count++] = (mv); \
-    } \
-  } while (0)
-
-// If either reference frame is different, not INTRA, and they
-// are different from each other scale and add the mv to our list.
-#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \
-  do { \
-    if (is_inter_block(mbmi)) { \
-      if ((mbmi)->ref_frame[0] != ref_frame) \
-        ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \
-      if (has_second_ref(mbmi) && \
-          (mbmi)->ref_frame[1] != ref_frame && \
-          (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
-        ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \
-    } \
-  } while (0)
-
-
-// Checks that the given mi_row, mi_col and search point
-// are inside the borders of the tile.
-static INLINE int is_inside(const TileInfo *const tile,
-                            int mi_col, int mi_row, int mi_rows,
-                            const POSITION *mi_pos) {
-  return !(mi_row + mi_pos->row < 0 ||
-           mi_col + mi_pos->col < tile->mi_col_start ||
-           mi_row + mi_pos->row >= mi_rows ||
-           mi_col + mi_pos->col >= tile->mi_col_end);
-}
-
 // TODO(jingning): this mv clamping function should be block size dependent.
 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
  clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
@@ -204,12 +29,10 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
               xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
 }

-typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
 void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                      const TileInfo *const tile,
                      MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
-                      int_mv *mv_ref_list, int mi_row, int mi_col,
-                      find_mv_refs_sync sync, void *const data);
+                      int_mv *mv_ref_list, int mi_row, int mi_col);

 // check a list of motion vectors by sad score using a number rows of pixels
 // above and a number cols of pixels in the left to select the one with best
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -20,7 +20,6 @@
 #include "vp9/common/vp9_entropymode.h"
 #include "vp9/common/vp9_frame_buffers.h"
 #include "vp9/common/vp9_quant_common.h"
-#include "vp9/common/vp9_thread.h"
 #include "vp9/common/vp9_tile_common.h"

 #if CONFIG_VP9_POSTPROC
@@ -36,19 +35,14 @@ extern "C" {
 #define REF_FRAMES_LOG2 3
 #define REF_FRAMES (1 << REF_FRAMES_LOG2)

-// 4 scratch frames for the new frames to support a maximum of 4 cores decoding
-// in parallel, 3 for scaled references on the encoder.
-// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number
-// of framebuffers.
+// 1 scratch frame for the new frame, 3 for scaled references on the encoder
 // TODO(jkoleszar): These 3 extra references could probably come from the
 // normal reference pool.
-#define FRAME_BUFFERS (REF_FRAMES + 7)
+#define FRAME_BUFFERS (REF_FRAMES + 4)

 #define FRAME_CONTEXTS_LOG2 2
 #define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2)

-#define NUM_PING_PONG_BUFFERS 2
-
 extern const struct {
  PARTITION_CONTEXT above;
  PARTITION_CONTEXT left;
@@ -67,40 +61,8 @@ typedef struct {
  int ref_count;
  vpx_codec_frame_buffer_t raw_frame_buffer;
  YV12_BUFFER_CONFIG buf;
-
-  // The Following variables will only be used in frame parallel decode.
-
-  // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means
-  // that no FrameWorker owns, or is decoding, this buffer.
-  VP9Worker *frame_worker_owner;
-
-  // row and col indicate which position frame has been decoded to in real
-  // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX
-  // when the frame is fully decoded.
-  int row;
-  int col;
 } RefCntBuffer;

-typedef struct {
-  // Protect BufferPool from being accessed by several FrameWorkers at
-  // the same time during frame parallel decode.
-  // TODO(hkuang): Try to use atomic variable instead of locking the whole pool.
-#if CONFIG_MULTITHREAD
-  pthread_mutex_t pool_mutex;
-#endif
-
-  // Private data associated with the frame buffer callbacks.
-  void *cb_priv;
-
-  vpx_get_frame_buffer_cb_fn_t get_fb_cb;
-  vpx_release_frame_buffer_cb_fn_t release_fb_cb;
-
-  RefCntBuffer frame_bufs[FRAME_BUFFERS];
-
-  // Frame buffers allocated internally by the codec.
-  InternalFrameBufferList int_frame_buffers;
-} BufferPool;
-
 typedef struct VP9Common {
  struct vpx_internal_error_info  error;

@@ -127,11 +89,9 @@ typedef struct VP9Common {

  YV12_BUFFER_CONFIG *frame_to_show;

-  int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */
+  RefCntBuffer frame_bufs[FRAME_BUFFERS];

-  // Prepare ref_frame_map for the next frame.
-  // Only used in frame parallel decode.
-  int next_ref_frame_map[REF_FRAMES];
+  int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */

  // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and
  // roll new_fb_idx into it.
@@ -184,8 +144,8 @@ typedef struct VP9Common {

  int mi_idx;
  int prev_mi_idx;
-  MODE_INFO *mip_array[NUM_PING_PONG_BUFFERS];
-  MODE_INFO **mi_grid_base_array[NUM_PING_PONG_BUFFERS];
+  MODE_INFO *mip_array[2];
+  MODE_INFO **mi_grid_base_array[2];

  MODE_INFO *mip; /* Base of allocated array */
  MODE_INFO *mi;  /* Corresponds to upper left visible macroblock */
@@ -197,16 +157,8 @@ typedef struct VP9Common {
  MODE_INFO **prev_mi_grid_base;
  MODE_INFO **prev_mi_grid_visible;

-  // Used in frame parallel decode for delay resizing prev_mi.
-  int update_prev_mi;
-
  // Persistent mb segment id map used in prediction.
-  int seg_map_idx;
-  int prev_seg_map_idx;
-
-  uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS];
-  uint8_t *last_frame_seg_map;
-  uint8_t *current_frame_seg_map;
+  unsigned char *last_frame_seg_map;

  INTERP_FILTER interp_filter;

@@ -219,10 +171,6 @@ typedef struct VP9Common {
  struct loopfilter lf;
  struct segmentation seg;

-  // TODO(hkuang): Remove this as it is the same as frame_parallel_decode
-  // in pbi.
-  int frame_parallel_decode;  // frame-based threading.
-
  // Context probabilities for reference frame prediction
  int allow_comp_inter_inter;
  MV_REFERENCE_FRAME comp_fixed_ref;
@@ -254,34 +202,34 @@ typedef struct VP9Common {

  int log2_tile_cols, log2_tile_rows;

-  // External BufferPool passed from outside.
-  BufferPool *buffer_pool;
+  // Private data associated with the frame buffer callbacks.
+  void *cb_priv;
+  vpx_get_frame_buffer_cb_fn_t get_fb_cb;
+  vpx_release_frame_buffer_cb_fn_t release_fb_cb;
+
+  // Handles memory for the codec.
+  InternalFrameBufferList int_frame_buffers;

  PARTITION_CONTEXT *above_seg_context;
  ENTROPY_CONTEXT *above_context;
+
+#if CONFIG_TRANSCODE
+  FILE *mi_array_pf;
+#endif
 } VP9_COMMON;

-// TODO(hkuang): Don't need to lock the whole pool after implementing atomic
-// frame reference count.
-void lock_buffer_pool(BufferPool *const pool);
-void unlock_buffer_pool(BufferPool *const pool);
-
 static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
-  return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf;
+  return &cm->frame_bufs[cm->new_fb_idx].buf;
 }

 static INLINE int get_free_fb(VP9_COMMON *cm) {
-  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
  int i;
-
-  lock_buffer_pool(cm->buffer_pool);
-  for (i = 0; i < FRAME_BUFFERS; ++i)
-    if (frame_bufs[i].ref_count == 0)
+  for (i = 0; i < FRAME_BUFFERS; i++)
+    if (cm->frame_bufs[i].ref_count == 0)
      break;

  assert(i < FRAME_BUFFERS);
-  frame_bufs[i].ref_count = 1;
-  unlock_buffer_pool(cm->buffer_pool);
+  cm->frame_bufs[i].ref_count = 1;
  return i;
 }

@@ -366,6 +314,7 @@ static INLINE void update_partition_context(MACROBLOCKD *xd,
  PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col;
  PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK);

+  // num_4x4_blocks_wide_lookup[bsize] / 2
  const int bs = num_8x8_blocks_wide_lookup[bsize];

  // update the partition context at the end notes. set partition bits
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -12,6 +12,7 @@
 #include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_seg_common.h"

+#if 1
 static const int16_t dc_qlookup[QINDEX_RANGE] = {
  4,       8,    8,    9,   10,   11,   12,   12,
  13,     14,   15,   16,   17,   18,   19,   19,
@@ -82,6 +83,44 @@ static const int16_t ac_qlookup[QINDEX_RANGE] = {
  1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
 };

+void vp9_init_quant_tables(void) { }
+#else
+static int16_t dc_qlookup[QINDEX_RANGE];
+static int16_t ac_qlookup[QINDEX_RANGE];
+
+#define ACDC_MIN 8
+
+// TODO(dkovalev) move to common and reuse
+static double poly3(double a, double b, double c, double d, double x) {
+  return a*x*x*x + b*x*x + c*x + d;
+}
+
+void vp9_init_quant_tables() {
+  int i, val = 4;
+
+  // A "real" q of 1.0 forces lossless mode.
+  // In practice non lossless Q's between 1.0 and 2.0 (represented here by
+  // integer values from 5-7 give poor rd results (lower psnr and often
+  // larger size than the lossless encode. To block out those "not very useful"
+  // values we increment the ac and dc q lookup values by 4 after position 0.
+  ac_qlookup[0] = val;
+  dc_qlookup[0] = val;
+  val += 4;
+
+  for (i = 1; i < QINDEX_RANGE; i++) {
+    const int ac_val = val;
+
+    val = (int)(val * 1.01975);
+    if (val == ac_val)
+      ++val;
+
+    ac_qlookup[i] = (int16_t)ac_val;
+    dc_qlookup[i] = (int16_t)MAX(ACDC_MIN, poly3(0.000000305, -0.00065, 0.9,
+                                                 0.5, ac_val));
+  }
+}
+#endif
+
 int16_t vp9_dc_quant(int qindex, int delta) {
  return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
 }
--- a/vp9/common/vp9_quant_common.h
+++ b/vp9/common/vp9_quant_common.h
@@ -22,6 +22,8 @@ extern "C" {
 #define QINDEX_RANGE (MAXQ - MINQ + 1)
 #define QINDEX_BITS 8

+void vp9_init_quant_tables();
+
 int16_t vp9_dc_quant(int qindex, int delta);
 int16_t vp9_ac_quant(int qindex, int delta);

--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -20,7 +20,50 @@
 #include "vp9/common/vp9_reconinter.h"
 #include "vp9/common/vp9_reconintra.h"

-void inter_predictor(const uint8_t *src, int src_stride,
+static void build_mc_border(const uint8_t *src, int src_stride,
+                            uint8_t *dst, int dst_stride,
+                            int x, int y, int b_w, int b_h, int w, int h) {
+  // Get a pointer to the start of the real data for this row.
+  const uint8_t *ref_row = src - x - y * src_stride;
+
+  if (y >= h)
+    ref_row += (h - 1) * src_stride;
+  else if (y > 0)
+    ref_row += y * src_stride;
+
+  do {
+    int right = 0, copy;
+    int left = x < 0 ? -x : 0;
+
+    if (left > b_w)
+      left = b_w;
+
+    if (x + b_w > w)
+      right = x + b_w - w;
+
+    if (right > b_w)
+      right = b_w;
+
+    copy = b_w - left - right;
+
+    if (left)
+      memset(dst, ref_row[0], left);
+
+    if (copy)
+      memcpy(dst + left, ref_row + x + left, copy);
+
+    if (right)
+      memset(dst + left + copy, ref_row[w - 1], right);
+
+    dst += dst_stride;
+    ++y;
+
+    if (y > 0 && y < h)
+      ref_row += src_stride;
+  } while (--b_h);
+}
+
+static void inter_predictor(const uint8_t *src, int src_stride,
                            uint8_t *dst, int dst_stride,
                            const int subpel_x,
                            const int subpel_y,
@@ -70,18 +113,6 @@ static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) {
  return res;
 }

-static INLINE int round_mv_comp_q2(int value) {
-  return (value < 0 ? value - 1 : value + 1) / 2;
-}
-
-static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) {
-  MV res = { round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.row +
-                              mi->bmi[block1].as_mv[idx].as_mv.row),
-             round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.col +
-                              mi->bmi[block1].as_mv[idx].as_mv.col) };
-  return res;
-}
-
 // TODO(jkoleszar): yet another mv clamping function :-(
 MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv,
                             int bw, int bh, int ss_x, int ss_y) {
@@ -108,30 +139,7 @@ MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv,
  return clamped_mv;
 }

-MV average_split_mvs(const struct macroblockd_plane *pd, int plane,
-                            const MODE_INFO *mi, int ref, int block) {
-  const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0);
-  MV res = {0, 0};
-  switch (ss_idx) {
-    case 0:
-      res = mi->bmi[block].as_mv[ref].as_mv;
-      break;
-    case 1:
-      res = mi_mv_pred_q2(mi, ref, block, block + 2);
-      break;
-    case 2:
-      res = mi_mv_pred_q2(mi, ref, block, block + 1);
-      break;
-    case 3:
-      res = mi_mv_pred_q4(mi, ref);
-      break;
-    default:
-      assert(ss_idx <= 3 || ss_idx >= 0);
-  }
-  return res;
-}
-
-void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
+static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
                                   int bw, int bh,
                                   int x, int y, int w, int h,
                                   int mi_x, int mi_y) {
@@ -146,8 +154,14 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
    struct buf_2d *const pre_buf = &pd->pre[ref];
    struct buf_2d *const dst_buf = &pd->dst;
    uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
+
+    // TODO(jkoleszar): All chroma MVs in SPLITMV mode are taken as the
+    // same MV (the average of the 4 luma MVs) but we could do something
+    // smarter for non-4:2:0. Just punt for now, pending the changes to get
+    // rid of SPLITMV mode entirely.
    const MV mv = mi->mbmi.sb_type < BLOCK_8X8
-               ? average_split_mvs(pd, plane, mi, ref, block)
+               ? (plane == 0 ? mi->bmi[block].as_mv[ref].as_mv
+                             : mi_mv_pred_q4(mi, ref))
               : mi->mbmi.mv[ref].as_mv;

    // TODO(jkoleszar): This clamping is done in the incorrect place for the
@@ -227,6 +241,174 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
                                    MAX_MB_PLANE - 1);
 }

+// TODO(jingning): This function serves as a placeholder for decoder prediction
+// using on demand border extension. It should be moved to /decoder/ directory.
+static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
+                                       int bw, int bh,
+                                       int x, int y, int w, int h,
+                                       int mi_x, int mi_y) {
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  const MODE_INFO *mi = xd->mi[0];
+  const int is_compound = has_second_ref(&mi->mbmi);
+  const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
+  int ref;
+
+  for (ref = 0; ref < 1 + is_compound; ++ref) {
+    const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+    struct buf_2d *const pre_buf = &pd->pre[ref];
+    struct buf_2d *const dst_buf = &pd->dst;
+    uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
+
+    // TODO(jkoleszar): All chroma MVs in SPLITMV mode are taken as the
+    // same MV (the average of the 4 luma MVs) but we could do something
+    // smarter for non-4:2:0. Just punt for now, pending the changes to get
+    // rid of SPLITMV mode entirely.
+    const MV mv = mi->mbmi.sb_type < BLOCK_8X8
+               ? (plane == 0 ? mi->bmi[block].as_mv[ref].as_mv
+                             : mi_mv_pred_q4(mi, ref))
+               : mi->mbmi.mv[ref].as_mv;
+
+    // TODO(jkoleszar): This clamping is done in the incorrect place for the
+    // scaling case. It needs to be done on the scaled MV, not the pre-scaling
+    // MV. Note however that it performs the subsampling aware scaling so
+    // that the result is always q4.
+    // mv_precision precision is MV_PRECISION_Q4.
+    const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh,
+                                               pd->subsampling_x,
+                                               pd->subsampling_y);
+
+    MV32 scaled_mv;
+    int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, buf_stride,
+        subpel_x, subpel_y;
+    uint8_t *ref_frame, *buf_ptr;
+    const YV12_BUFFER_CONFIG *ref_buf = xd->block_refs[ref]->buf;
+
+    // Get reference frame pointer, width and height.
+    if (plane == 0) {
+      frame_width = ref_buf->y_crop_width;
+      frame_height = ref_buf->y_crop_height;
+      ref_frame = ref_buf->y_buffer;
+    } else {
+      frame_width = ref_buf->uv_crop_width;
+      frame_height = ref_buf->uv_crop_height;
+      ref_frame = plane == 1 ? ref_buf->u_buffer : ref_buf->v_buffer;
+    }
+
+    if (vp9_is_scaled(sf)) {
+      // Co-ordinate of containing block to pixel precision.
+      int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
+      int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
+
+      // Co-ordinate of the block to 1/16th pixel precision.
+      x0_16 = (x_start + x) << SUBPEL_BITS;
+      y0_16 = (y_start + y) << SUBPEL_BITS;
+
+      // Co-ordinate of current block in reference frame
+      // to 1/16th pixel precision.
+      x0_16 = sf->scale_value_x(x0_16, sf);
+      y0_16 = sf->scale_value_y(y0_16, sf);
+
+      // Map the top left corner of the block into the reference frame.
+      x0 = sf->scale_value_x(x_start + x, sf);
+      y0 = sf->scale_value_y(y_start + y, sf);
+
+      // Scale the MV and incorporate the sub-pixel offset of the block
+      // in the reference frame.
+      scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+      xs = sf->x_step_q4;
+      ys = sf->y_step_q4;
+    } else {
+      // Co-ordinate of containing block to pixel precision.
+      x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
+      y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
+
+      // Co-ordinate of the block to 1/16th pixel precision.
+      x0_16 = x0 << SUBPEL_BITS;
+      y0_16 = y0 << SUBPEL_BITS;
+
+      scaled_mv.row = mv_q4.row;
+      scaled_mv.col = mv_q4.col;
+      xs = ys = 16;
+    }
+    subpel_x = scaled_mv.col & SUBPEL_MASK;
+    subpel_y = scaled_mv.row & SUBPEL_MASK;
+
+    // Calculate the top left corner of the best matching block in the reference frame.
+    x0 += scaled_mv.col >> SUBPEL_BITS;
+    y0 += scaled_mv.row >> SUBPEL_BITS;
+    x0_16 += scaled_mv.col;
+    y0_16 += scaled_mv.row;
+
+    // Get reference block pointer.
+    buf_ptr = ref_frame + y0 * pre_buf->stride + x0;
+    buf_stride = pre_buf->stride;
+
+    // Do border extension if there is motion or the
+    // width/height is not a multiple of 8 pixels.
+    if (scaled_mv.col || scaled_mv.row ||
+        (frame_width & 0x7) || (frame_height & 0x7)) {
+      // Get reference block bottom right coordinate.
+      int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
+      int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
+      int x_pad = 0, y_pad = 0;
+
+      if (subpel_x || (sf->x_step_q4 & SUBPEL_MASK)) {
+        x0 -= VP9_INTERP_EXTEND - 1;
+        x1 += VP9_INTERP_EXTEND;
+        x_pad = 1;
+      }
+
+      if (subpel_y || (sf->y_step_q4 & SUBPEL_MASK)) {
+        y0 -= VP9_INTERP_EXTEND - 1;
+        y1 += VP9_INTERP_EXTEND;
+        y_pad = 1;
+      }
+
+      // Skip border extension if block is inside the frame.
+      if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width ||
+          y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
+        uint8_t *buf_ptr1 = ref_frame + y0 * pre_buf->stride + x0;
+        // Extend the border.
+        build_mc_border(buf_ptr1, pre_buf->stride, xd->mc_buf, x1 - x0 + 1,
+                        x0, y0, x1 - x0 + 1, y1 - y0 + 1, frame_width,
+                        frame_height);
+        buf_stride = x1 - x0 + 1;
+        buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3;
+      }
+    }
+
+    inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
+                    subpel_y, sf, w, h, ref, kernel, xs, ys);
+  }
+}
+
+void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                       BLOCK_SIZE bsize) {
+  int plane;
+  const int mi_x = mi_col * MI_SIZE;
+  const int mi_y = mi_row * MI_SIZE;
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
+                                                        &xd->plane[plane]);
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+    const int bw = 4 * num_4x4_w;
+    const int bh = 4 * num_4x4_h;
+
+    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+      int i = 0, x, y;
+      assert(bsize == BLOCK_8X8);
+      for (y = 0; y < num_4x4_h; ++y)
+        for (x = 0; x < num_4x4_w; ++x)
+          dec_build_inter_predictors(xd, plane, i++, bw, bh,
+                                     4 * x, 4 * y, 4, 4, mi_x, mi_y);
+    } else {
+      dec_build_inter_predictors(xd, plane, 0, bw, bh,
+                                 0, 0, bw, bh, mi_x, mi_y);
+    }
+  }
+}
+
 void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
                          const YV12_BUFFER_CONFIG *src,
                          int mi_row, int mi_col) {
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -18,26 +18,6 @@
 extern "C" {
 #endif

-void inter_predictor(const uint8_t *src, int src_stride,
-                            uint8_t *dst, int dst_stride,
-                            const int subpel_x,
-                            const int subpel_y,
-                            const struct scale_factors *sf,
-                            int w, int h, int ref,
-                            const InterpKernel *kernel,
-                            int xs, int ys);
-
-MV average_split_mvs(const struct macroblockd_plane *pd, int plane,
-                            const MODE_INFO *mi, int ref, int block);
-
-MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv,
-                             int bw, int bh, int ss_x, int ss_y);
-
-void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
-                                   int bw, int bh,
-                                   int x, int y, int w, int h,
-                                   int mi_x, int mi_y);
-
 void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
                                    BLOCK_SIZE bsize);

@@ -47,6 +27,9 @@ void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
 void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
                                   BLOCK_SIZE bsize);

+void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                       BLOCK_SIZE bsize);
+
 void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
                               uint8_t *dst, int dst_stride,
                               const MV *mv_q3,
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -305,15 +305,15 @@ specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc";
 $vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;

 add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/;
+specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon_asm dspr2/;
 $vp9_convolve8_neon_asm=vp9_convolve8_neon;

 add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/;
+specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon_asm dspr2/;
 $vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;

 add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/;
+specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon_asm dspr2/;
 $vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;

 add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
@@ -402,25 +402,25 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {

 # variance
 add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x16 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance32x16/, "$sse2_x86inc", "$avx2_x86inc";

 add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance16x32/, "$sse2_x86inc";

 add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x32 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance64x32/, "$sse2_x86inc", "$avx2_x86inc";

 add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance32x64/, "$sse2_x86inc";

 add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x32 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance32x32/, "$sse2_x86inc", "$avx2_x86inc";

 add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x64 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";

 add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance16x16 mmx avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";

 add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
@@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_
 specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";

 add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";

 add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";

 add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_
 specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";

 add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";

 add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";

 add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -653,7 +653,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int  src_stride, const
 specialize qw/vp9_sad4x4x8 sse4/;

 add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad64x64x4d sse2/;
+specialize qw/vp9_sad64x64x4d sse2 avx2/;

 add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
 specialize qw/vp9_sad32x64x4d sse2/;
@@ -668,7 +668,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int  src_stride, co
 specialize qw/vp9_sad16x32x4d sse2/;

 add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x32x4d sse2/;
+specialize qw/vp9_sad32x32x4d sse2 avx2/;

 add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
 specialize qw/vp9_sad16x16x4d sse2/;
@@ -693,7 +693,7 @@ add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int  src_stride, cons
 specialize qw/vp9_sad4x4x4d sse/;

 add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-specialize qw/vp9_mse16x16 mmx avx2/, "$sse2_x86inc";
+specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";

 add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
 specialize qw/vp9_mse8x16/;
@@ -714,9 +714,6 @@ specialize qw/vp9_block_error avx2/, "$sse2_x86inc";
 add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
 specialize qw/vp9_subtract_block/, "$sse2_x86inc";

-add_proto qw/void vp9_quantize_fp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-specialize qw/vp9_quantize_fp/, "$ssse3_x86_64";
-
 add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
 specialize qw/vp9_quantize_b/, "$ssse3_x86_64";

@@ -742,31 +739,19 @@ add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int strid
 specialize qw/vp9_fht8x8 sse2 avx2/;

 add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type";
-specialize qw/vp9_fht16x16 sse2/;
+specialize qw/vp9_fht16x16 sse2 avx2/;

 add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
 specialize qw/vp9_fwht4x4/, "$mmx_x86inc";

-add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct4x4_1 sse2/;
-
 add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
 specialize qw/vp9_fdct4x4 sse2 avx2/;

-add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct8x8_1 sse2/;
-
 add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride";
 specialize qw/vp9_fdct8x8 sse2 avx2/, "$ssse3_x86_64";

-add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct16x16_1 sse2/;
-
 add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct16x16 sse2/;
-
-add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct32x32_1 sse2/;
+specialize qw/vp9_fdct16x16 sse2 avx2/;

 add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride";
 specialize qw/vp9_fdct32x32 sse2 avx2/;
--- a/vp9/common/vp9_scale.c
+++ b/vp9/common/vp9_scale.c
@@ -33,6 +33,14 @@ static int get_fixed_point_scale_factor(int other_size, int this_size) {
  return (other_size << REF_SCALE_SHIFT) / this_size;
 }

+static int check_scale_factors(int other_w, int other_h,
+                               int this_w, int this_h) {
+  return 2 * this_w >= other_w &&
+         2 * this_h >= other_h &&
+         this_w <= 16 * other_w &&
+         this_h <= 16 * other_h;
+}
+
 MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) {
  const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SUBPEL_MASK;
  const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SUBPEL_MASK;
@@ -46,7 +54,7 @@ MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) {
 void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
                                       int other_w, int other_h,
                                       int this_w, int this_h) {
-  if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
+  if (!check_scale_factors(other_w, other_h, this_w, this_h)) {
    sf->x_scale_fp = REF_INVALID_SCALE;
    sf->y_scale_fp = REF_INVALID_SCALE;
    return;
--- a/vp9/common/vp9_scale.h
+++ b/vp9/common/vp9_scale.h
@@ -46,16 +46,8 @@ static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) {
 }

 static INLINE int vp9_is_scaled(const struct scale_factors *sf) {
-  return vp9_is_valid_scale(sf) &&
-         (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE);
-}
-
-static INLINE int valid_ref_frame_size(int ref_width, int ref_height,
-                                      int this_width, int this_height) {
-  return 2 * this_width >= ref_width &&
-         2 * this_height >= ref_height &&
-         this_width <= 16 * ref_width &&
-         this_height <= 16 * ref_height;
+  return sf->x_scale_fp != REF_NO_SCALE ||
+         sf->y_scale_fp != REF_NO_SCALE;
 }

 #ifdef __cplusplus
--- a/vp9/common/vp9_thread.c
+++ b/vp9/common/vp9_thread.c
@@ -1,183 +0,0 @@
-// Copyright 2013 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Multi-threaded worker
-//
-// Original source:
-//  http://git.chromium.org/webm/libwebp.git
-//  100644 blob 08ad4e1fecba302bf1247645e84a7d2779956bc3  src/utils/thread.c
-
-#include <assert.h>
-#include <string.h>   // for memset()
-#include "./vp9_thread.h"
-#include "vpx_mem/vpx_mem.h"
-
-#if CONFIG_MULTITHREAD
-
-struct VP9WorkerImpl {
-  pthread_mutex_t mutex_;
-  pthread_cond_t  condition_;
-  pthread_t       thread_;
-};
-
-//------------------------------------------------------------------------------
-
-static void execute(VP9Worker *const worker);  // Forward declaration.
-
-static THREADFN thread_loop(void *ptr) {
-  VP9Worker *const worker = (VP9Worker*)ptr;
-  int done = 0;
-  while (!done) {
-    pthread_mutex_lock(&worker->impl_->mutex_);
-    while (worker->status_ == OK) {   // wait in idling mode
-      pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
-    }
-    if (worker->status_ == WORK) {
-      execute(worker);
-      worker->status_ = OK;
-    } else if (worker->status_ == NOT_OK) {   // finish the worker
-      done = 1;
-    }
-    // signal to the main thread that we're done (for sync())
-    pthread_cond_signal(&worker->impl_->condition_);
-    pthread_mutex_unlock(&worker->impl_->mutex_);
-  }
-  return THREAD_RETURN(NULL);    // Thread is finished
-}
-
-// main thread state control
-static void change_state(VP9Worker *const worker,
-                         VP9WorkerStatus new_status) {
-  // No-op when attempting to change state on a thread that didn't come up.
-  // Checking status_ without acquiring the lock first would result in a data
-  // race.
-  if (worker->impl_ == NULL) return;
-
-  pthread_mutex_lock(&worker->impl_->mutex_);
-  if (worker->status_ >= OK) {
-    // wait for the worker to finish
-    while (worker->status_ != OK) {
-      pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
-    }
-    // assign new status and release the working thread if needed
-    if (new_status != OK) {
-      worker->status_ = new_status;
-      pthread_cond_signal(&worker->impl_->condition_);
-    }
-  }
-  pthread_mutex_unlock(&worker->impl_->mutex_);
-}
-
-#endif  // CONFIG_MULTITHREAD
-
-//------------------------------------------------------------------------------
-
-static void init(VP9Worker *const worker) {
-  memset(worker, 0, sizeof(*worker));
-  worker->status_ = NOT_OK;
-}
-
-static int sync(VP9Worker *const worker) {
-#if CONFIG_MULTITHREAD
-  change_state(worker, OK);
-#endif
-  assert(worker->status_ <= OK);
-  return !worker->had_error;
-}
-
-static int reset(VP9Worker *const worker) {
-  int ok = 1;
-  worker->had_error = 0;
-  if (worker->status_ < OK) {
-#if CONFIG_MULTITHREAD
-    worker->impl_ = (VP9WorkerImpl*)vpx_calloc(1, sizeof(*worker->impl_));
-    if (worker->impl_ == NULL) {
-      return 0;
-    }
-    if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) {
-      goto Error;
-    }
-    if (pthread_cond_init(&worker->impl_->condition_, NULL)) {
-      pthread_mutex_destroy(&worker->impl_->mutex_);
-      goto Error;
-    }
-    pthread_mutex_lock(&worker->impl_->mutex_);
-    ok = !pthread_create(&worker->impl_->thread_, NULL, thread_loop, worker);
-    if (ok) worker->status_ = OK;
-    pthread_mutex_unlock(&worker->impl_->mutex_);
-    if (!ok) {
-      pthread_mutex_destroy(&worker->impl_->mutex_);
-      pthread_cond_destroy(&worker->impl_->condition_);
- Error:
-      vpx_free(worker->impl_);
-      worker->impl_ = NULL;
-      return 0;
-    }
-#else
-    worker->status_ = OK;
-#endif
-  } else if (worker->status_ > OK) {
-    ok = sync(worker);
-  }
-  assert(!ok || (worker->status_ == OK));
-  return ok;
-}
-
-static void execute(VP9Worker *const worker) {
-  if (worker->hook != NULL) {
-    worker->had_error |= !worker->hook(worker->data1, worker->data2);
-  }
-}
-
-static void launch(VP9Worker *const worker) {
-#if CONFIG_MULTITHREAD
-  change_state(worker, WORK);
-#else
-  execute(worker);
-#endif
-}
-
-static void end(VP9Worker *const worker) {
-  if (worker->status_ >= OK) {
-#if CONFIG_MULTITHREAD
-    change_state(worker, NOT_OK);
-    pthread_join(worker->impl_->thread_, NULL);
-    pthread_mutex_destroy(&worker->impl_->mutex_);
-    pthread_cond_destroy(&worker->impl_->condition_);
-#else
-    worker->status_ = NOT_OK;
-#endif
-  }
-  vpx_free(worker->impl_);
-  worker->impl_ = NULL;
-  assert(worker->status_ == NOT_OK);
-}
-
-//------------------------------------------------------------------------------
-
-static VP9WorkerInterface g_worker_interface = {
-  init, reset, sync, launch, execute, end
-};
-
-int vp9_set_worker_interface(const VP9WorkerInterface* const winterface) {
-  if (winterface == NULL ||
-      winterface->init == NULL || winterface->reset == NULL ||
-      winterface->sync == NULL || winterface->launch == NULL ||
-      winterface->execute == NULL || winterface->end == NULL) {
-    return 0;
-  }
-  g_worker_interface = *winterface;
-  return 1;
-}
-
-const VP9WorkerInterface *vp9_get_worker_interface(void) {
-  return &g_worker_interface;
-}
-
-//------------------------------------------------------------------------------
--- a/vp9/common/x86/vp9_postproc_mmx.asm
+++ b/vp9/common/x86/vp9_postproc_mmx.asm
@@ -464,6 +464,7 @@ sym(vp9_mbpost_proc_down_mmx):
 ;                            unsigned char whiteclamp[16],
 ;                            unsigned char bothclamp[16],
 ;                            unsigned int width, unsigned int height, int pitch)
+extern sym(rand)
 global sym(vp9_plane_add_noise_mmx) PRIVATE
 sym(vp9_plane_add_noise_mmx):
    push        rbp
@@ -475,7 +476,7 @@ sym(vp9_plane_add_noise_mmx):
    ; end prolog

 .addnoise_loop:
-    call sym(LIBVPX_RAND) WRT_PLT
+    call sym(rand) WRT_PLT
    mov     rcx, arg(1) ;noise
    and     rax, 0xff
    add     rcx, rax
--- a/vp9/common/x86/vp9_postproc_sse2.asm
+++ b/vp9/common/x86/vp9_postproc_sse2.asm
@@ -629,6 +629,7 @@ sym(vp9_mbpost_proc_across_ip_xmm):
 ;                            unsigned char whiteclamp[16],
 ;                            unsigned char bothclamp[16],
 ;                            unsigned int width, unsigned int height, int pitch)
+extern sym(rand)
 global sym(vp9_plane_add_noise_wmt) PRIVATE
 sym(vp9_plane_add_noise_wmt):
    push        rbp
@@ -640,7 +641,7 @@ sym(vp9_plane_add_noise_wmt):
    ; end prolog

 .addnoise_loop:
-    call sym(LIBVPX_RAND) WRT_PLT
+    call sym(rand) WRT_PLT
    mov     rcx, arg(1) ;noise
    and     rax, 0xff
    add     rcx, rax
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -28,7 +28,6 @@
 #include "vp9/common/vp9_reconintra.h"
 #include "vp9/common/vp9_reconinter.h"
 #include "vp9/common/vp9_seg_common.h"
-#include "vp9/common/vp9_thread.h"
 #include "vp9/common/vp9_tile_common.h"

 #include "vp9/decoder/vp9_decodeframe.h"
@@ -39,6 +38,7 @@
 #include "vp9/decoder/vp9_dthread.h"
 #include "vp9/decoder/vp9_read_bit_buffer.h"
 #include "vp9/decoder/vp9_reader.h"
+#include "vp9/decoder/vp9_thread.h"

 #define MAX_VP9_HEADER_SIZE 80

@@ -312,6 +312,13 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
    for (x = !y; x < x_mis; ++x)
      xd->mi[y * cm->mi_stride + x] = xd->mi[0];

+#if CONFIG_TRANSCODE && WRITE_MI_ARRAY
+  for (y = 0; y < y_mis; ++y)
+    for (x = !y; x < x_mis; ++x)
+      vpx_memcpy(&cm->mi[offset + y * cm->mi_stride + x],
+                 &cm->mi[offset], sizeof(MODE_INFO));
+#endif
+
  set_skip_context(xd, mi_row, mi_col);

  // Distance of Mb to the various image edges. These are specified to 8th pel
@@ -327,24 +334,21 @@ static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd,
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
  RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME];
  xd->block_refs[idx] = ref_buffer;
-
  if (!vp9_is_valid_scale(&ref_buffer->sf))
    vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                       "Invalid scale factors");
  vp9_setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col,
                       &ref_buffer->sf);
-  if (!cm->frame_parallel_decode)
-    xd->corrupted |= ref_buffer->buf->corrupted;
+  xd->corrupted |= ref_buffer->buf->corrupted;
 }

-static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
+static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                         const TileInfo *const tile,
                         int mi_row, int mi_col,
                         vp9_reader *r, BLOCK_SIZE bsize) {
-  VP9_COMMON *const cm = &pbi->common;
  const int less8x8 = bsize < BLOCK_8X8;
  MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col);
-  vp9_read_mode_info(pbi, xd, tile, mi_row, mi_col, r);
+  vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r);

  if (less8x8)
    bsize = BLOCK_8X8;
@@ -368,7 +372,7 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
      set_ref(cm, xd, 1, mi_row, mi_col);

    // Prediction
-    vp9_dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col, bsize);
+    vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);

    // Reconstruction
    if (!mbmi->skip) {
@@ -407,46 +411,69 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs,
  return p;
 }

-static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd,
+static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                             const TileInfo *const tile,
                             int mi_row, int mi_col,
                             vp9_reader* r, BLOCK_SIZE bsize) {
-  VP9_COMMON *const cm = &pbi->common;
  const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
  PARTITION_TYPE partition;
-  BLOCK_SIZE subsize, uv_subsize;
+  BLOCK_SIZE subsize;
+
+#if CONFIG_TRANSCODE && READ_MI_ARRAY
+  // This is for test purpose only. It verifies the external file
+  // contains the right mode_info array.
+  if (bsize == BLOCK_64X64) {
+    MODE_INFO mi_array[64];
+    FILE *pf = cm->mi_array_pf;
+    if (pf) {
+      int i, j;
+      for (j = 0; j < MI_BLOCK_SIZE; ++j)
+        for (i = 0; i < MI_BLOCK_SIZE; ++i)
+          fread(&mi_array[j * 8 + i], 1, sizeof(MODE_INFO), pf);
+    }
+
+    if (pf && mi_row == 0 && mi_col == 8) {
+      int i, j;
+      for (j = 0; j < MI_BLOCK_SIZE; ++j) {
+        for (i = 0; i < MI_BLOCK_SIZE; ++i) {
+          MB_MODE_INFO *mbmi = &mi_array[j * 8 + i].mbmi;
+          b_mode_info *bmi = mi_array[j * 8 + i].bmi;
+          fprintf(stderr, "pos (%d, %d), bsize %d, mode %d\n",
+                  mi_row + j , mi_col + i, mbmi->sb_type, bmi[0].as_mode);
+        }
+      }
+      fprintf(stderr, "\n");
+    }
+  }
+#endif

  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
    return;

  partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r);
  subsize = get_subsize(bsize, partition);
-  uv_subsize = ss_size_lookup[subsize][cm->subsampling_x][cm->subsampling_y];
-  if (subsize >= BLOCK_8X8 && uv_subsize == BLOCK_INVALID)
-    vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
-                       "Invalid block size.");
  if (subsize < BLOCK_8X8) {
-    decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
+    decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
  } else {
    switch (partition) {
      case PARTITION_NONE:
-        decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
+        decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
        break;
      case PARTITION_HORZ:
-        decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
+        decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
        if (mi_row + hbs < cm->mi_rows)
-          decode_block(pbi, xd, tile, mi_row + hbs, mi_col, r, subsize);
+          decode_block(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
        break;
      case PARTITION_VERT:
-        decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
+        decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
        if (mi_col + hbs < cm->mi_cols)
-          decode_block(pbi, xd, tile, mi_row, mi_col + hbs, r, subsize);
+          decode_block(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
        break;
      case PARTITION_SPLIT:
-        decode_partition(pbi, xd, tile, mi_row,       mi_col,       r, subsize);
-        decode_partition(pbi, xd, tile, mi_row,       mi_col + hbs, r, subsize);
-        decode_partition(pbi, xd, tile, mi_row + hbs, mi_col,       r, subsize);
-        decode_partition(pbi, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
+        decode_partition(cm, xd, tile, mi_row,       mi_col,       r, subsize);
+        decode_partition(cm, xd, tile, mi_row,       mi_col + hbs, r, subsize);
+        decode_partition(cm, xd, tile, mi_row + hbs, mi_col,       r, subsize);
+        decode_partition(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
        break;
      default:
        assert(0 && "Invalid partition type");
@@ -457,6 +484,22 @@ static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd,
  if (bsize >= BLOCK_8X8 &&
      (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
    update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+
+#if CONFIG_TRANSCODE && WRITE_MI_ARRAY
+  if (bsize == BLOCK_64X64) {
+    FILE *pf = cm->mi_array_pf;
+    if (pf) {
+      int i, j;
+      int offset = mi_row * cm->mi_stride + mi_col;
+      for (j = 0; j < MI_BLOCK_SIZE; ++j)
+        for (i = 0; i < MI_BLOCK_SIZE; ++i)
+          fwrite(&cm->mi[offset + j * cm->mi_stride + i],
+                 1, sizeof(MODE_INFO), pf);
+    } else {
+      assert(0);
+    }
+  }
+#endif
 }

 static void setup_token_decoder(const uint8_t *data,
@@ -625,7 +668,6 @@ static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
 }

 static void apply_frame_size(VP9_COMMON *cm, int width, int height) {
-  BufferPool *const pool = cm->buffer_pool;
  if (cm->width != width || cm->height != height) {
    // Change in frame size.
    // TODO(agrange) Don't test width/height, check overall size.
@@ -642,17 +684,14 @@ static void apply_frame_size(VP9_COMMON *cm, int width, int height) {
    vp9_update_frame_size(cm);
  }

-  lock_buffer_pool(pool);
  if (vp9_realloc_frame_buffer(
          get_frame_new_buffer(cm), cm->width, cm->height,
          cm->subsampling_x, cm->subsampling_y, VP9_DEC_BORDER_IN_PIXELS,
-          &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb,
-          pool->cb_priv)) {
-    unlock_buffer_pool(pool);
+          &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb,
+          cm->cb_priv)) {
    vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                       "Failed to allocate frame buffer");
  }
-  unlock_buffer_pool(pool);
 }

 static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
@@ -679,17 +718,9 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
  if (!found)
    read_frame_size(rb, &width, &height);

-  // Check that each of the frames that this frame references has valid
-  // dimensions.
-  for (i = 0; i < REFS_PER_FRAME; ++i) {
-    RefBuffer *const ref_frame = &cm->frame_refs[i];
-    const int ref_width = ref_frame->buf->y_width;
-    const int ref_height = ref_frame->buf->y_height;
-
-    if (!valid_ref_frame_size(ref_width, ref_height, width, height))
-      vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
-                         "Referenced frame has invalid size");
-  }
+  if (width <= 0 || height <= 0)
+    vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+                       "Referenced frame with invalid size");

  apply_frame_size(cm, width, height);
  setup_display_size(cm, rb);
@@ -705,10 +736,6 @@ static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
  while (max_ones-- && vp9_rb_read_bit(rb))
    cm->log2_tile_cols++;

-  if (cm->log2_tile_cols > 6)
-    vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
-                       "Invalid number of tile columns");
-
  // rows
  cm->log2_tile_rows = vp9_rb_read_bit(rb);
  if (cm->log2_tile_rows)
@@ -779,20 +806,19 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
                                   const uint8_t *data,
                                   const uint8_t *data_end) {
  VP9_COMMON *const cm = &pbi->common;
-  const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
  const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
  const int tile_cols = 1 << cm->log2_tile_cols;
  const int tile_rows = 1 << cm->log2_tile_rows;
  TileBuffer tile_buffers[4][1 << 6];
  int tile_row, tile_col;
-  int mi_row = 0, mi_col = 0;
+  int mi_row, mi_col;
  TileData *tile_data = NULL;

  if (cm->lf.filter_level && pbi->lf_worker.data1 == NULL) {
    CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
                    vpx_memalign(32, sizeof(LFWorkerData)));
    pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
-    if (pbi->max_threads > 1 && !winterface->reset(&pbi->lf_worker)) {
+    if (pbi->max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
      vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                         "Loop filter thread creation failed");
    }
@@ -805,6 +831,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
    vp9_copy(lf_data->planes, pbi->mb.plane);
    lf_data->stop = 0;
    lf_data->y_only = 0;
+    vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
  }

  assert(tile_rows <= 4);
@@ -862,7 +889,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
        vp9_zero(tile_data->xd.left_seg_context);
        for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
             mi_col += MI_BLOCK_SIZE) {
-          decode_partition(pbi, &tile_data->xd, &tile, mi_row, mi_col,
+          decode_partition(tile_data->cm, &tile_data->xd, &tile, mi_row, mi_col,
                           &tile_data->bit_reader, BLOCK_64X64);
        }
      }
@@ -877,38 +904,30 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
        // decoding has completed: finish up the loop filter in this thread.
        if (mi_row + MI_BLOCK_SIZE >= cm->mi_rows) continue;

-        winterface->sync(&pbi->lf_worker);
+        vp9_worker_sync(&pbi->lf_worker);
        lf_data->start = lf_start;
        lf_data->stop = mi_row;
        if (pbi->max_threads > 1) {
-          winterface->launch(&pbi->lf_worker);
+          vp9_worker_launch(&pbi->lf_worker);
        } else {
-          winterface->execute(&pbi->lf_worker);
+          vp9_worker_execute(&pbi->lf_worker);
        }
      }
-      // After loopfiltering, the last 7 row pixels in each superblock row may
-      // still be changed by the longest loopfilter of the next superblock
-      // row.
-      if (pbi->frame_parallel_decode)
-        vp9_frameworker_broadcast(pbi->cur_buf,
-                                  mi_row << MI_BLOCK_SIZE_LOG2);
    }
  }

  // Loopfilter remaining rows in the frame.
  if (cm->lf.filter_level) {
    LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
-    winterface->sync(&pbi->lf_worker);
+    vp9_worker_sync(&pbi->lf_worker);
    lf_data->start = lf_data->stop;
    lf_data->stop = cm->mi_rows;
-    winterface->execute(&pbi->lf_worker);
+    vp9_worker_execute(&pbi->lf_worker);
  }

  // Get last tile data.
  tile_data = pbi->tile_data + tile_cols * tile_rows - 1;

-  if (pbi->frame_parallel_decode)
-    vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX);
  return vp9_reader_find_end(&tile_data->bit_reader);
 }

@@ -923,7 +942,7 @@ static int tile_worker_hook(void *arg1, void *arg2) {
    vp9_zero(tile_data->xd.left_seg_context);
    for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
         mi_col += MI_BLOCK_SIZE) {
-      decode_partition(tile_data->pbi, &tile_data->xd, tile,
+      decode_partition(tile_data->cm, &tile_data->xd, tile,
                       mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64);
    }
  }
@@ -947,7 +966,6 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
                                      const uint8_t *data,
                                      const uint8_t *data_end) {
  VP9_COMMON *const cm = &pbi->common;
-  const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
  const uint8_t *bit_reader_end = NULL;
  const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
  const int tile_cols = 1 << cm->log2_tile_cols;
@@ -974,11 +992,11 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
      VP9Worker *const worker = &pbi->tile_workers[i];
      ++pbi->num_tile_workers;

-      winterface->init(worker);
+      vp9_worker_init(worker);
      CHECK_MEM_ERROR(cm, worker->data1,
                      vpx_memalign(32, sizeof(TileWorkerData)));
      CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
-      if (i < num_threads - 1 && !winterface->reset(worker)) {
+      if (i < num_threads - 1 && !vp9_worker_reset(worker)) {
        vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                           "Tile decoder thread creation failed");
      }
@@ -1029,10 +1047,10 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
      TileInfo *const tile = (TileInfo*)worker->data2;
      TileBuffer *const buf = &tile_buffers[0][n];

-      tile_data->pbi = pbi;
+      tile_data->cm = cm;
      tile_data->xd = pbi->mb;
      tile_data->xd.corrupted = 0;
-      vp9_tile_init(tile, &pbi->common, 0, buf->col);
+      vp9_tile_init(tile, tile_data->cm, 0, buf->col);
      setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
                          &tile_data->bit_reader, pbi->decrypt_cb,
                          pbi->decrypt_state);
@@ -1041,9 +1059,9 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,

      worker->had_error = 0;
      if (i == num_workers - 1 || n == tile_cols - 1) {
-        winterface->execute(worker);
+        vp9_worker_execute(worker);
      } else {
-        winterface->launch(worker);
+        vp9_worker_launch(worker);
      }

      if (buf->col == tile_cols - 1) {
@@ -1055,7 +1073,7 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,

    for (; i > 0; --i) {
      VP9Worker *const worker = &pbi->tile_workers[i - 1];
-      pbi->mb.corrupted |= !winterface->sync(worker);
+      pbi->mb.corrupted |= !vp9_worker_sync(worker);
    }
    if (final_worker > -1) {
      TileWorkerData *const tile_data =
@@ -1091,10 +1109,8 @@ static BITSTREAM_PROFILE read_profile(struct vp9_read_bit_buffer *rb) {
 static size_t read_uncompressed_header(VP9Decoder *pbi,
                                       struct vp9_read_bit_buffer *rb) {
  VP9_COMMON *const cm = &pbi->common;
-  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
-  BufferPool *const pool = pbi->common.buffer_pool;
-  int i, mask, ref_index = 0;
  size_t sz;
+  int i;

  cm->last_frame_type = cm->frame_type;

@@ -1111,22 +1127,16 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
  if (cm->show_existing_frame) {
    // Show an existing frame directly.
    const int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];
-    lock_buffer_pool(pool);
-    if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1)
+
+    if (cm->frame_bufs[frame_to_show].ref_count < 1)
      vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                         "Buffer %d does not contain a decoded frame",
                         frame_to_show);

-    ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show);
-    unlock_buffer_pool(pool);
+    ref_cnt_fb(cm->frame_bufs, &cm->new_fb_idx, frame_to_show);
    pbi->refresh_frame_flags = 0;
    cm->lf.filter_level = 0;
    cm->show_frame = 1;
-
-    if (pbi->frame_parallel_decode) {
-      for (i = 0; i < REF_FRAMES; ++i)
-        cm->next_ref_frame_map[i] = cm->ref_frame_map[i];
-    }
    return 0;
  }

@@ -1166,10 +1176,6 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
    }

    setup_frame_size(cm, rb);
-    if (pbi->need_resync) {
-      vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
-      pbi->need_resync = 0;
-    }
  } else {
    cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb);

@@ -1181,20 +1187,17 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,

      pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
      setup_frame_size(cm, rb);
-      if (pbi->need_resync) {
-        vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
-        pbi->need_resync = 0;
-      }
    } else {
      pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
+
      for (i = 0; i < REFS_PER_FRAME; ++i) {
        const int ref = vp9_rb_read_literal(rb, REF_FRAMES_LOG2);
        const int idx = cm->ref_frame_map[ref];
-        RefBuffer *const ref_frame = &cm->frame_refs[i];
-        ref_frame->idx = idx;
-        ref_frame->buf = &frame_bufs[idx].buf;
+        cm->frame_refs[i].idx = idx;
+        cm->frame_refs[i].buf = &cm->frame_bufs[idx].buf;
        cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb);
      }
+
      setup_frame_size_with_refs(cm, rb);

      cm->allow_high_precision_mv = vp9_rb_read_bit(rb);
@@ -1212,12 +1215,6 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
    }
  }

-  if (pbi->need_resync) {
-    vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
-                       "Keyframe / intra-only frame required to reset decoder"
-                       " state");
-  }
-
  if (!cm->error_resilient_mode) {
    cm->coding_use_prev_mi = 1;
    cm->refresh_frame_context = vp9_rb_read_bit(rb);
@@ -1232,30 +1229,6 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
  // below, forcing the use of context 0 for those frame types.
  cm->frame_context_idx = vp9_rb_read_literal(rb, FRAME_CONTEXTS_LOG2);

-  // Generate next_ref_frame_map.
-  lock_buffer_pool(pool);
-  for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
-    if (mask & 1) {
-      cm->next_ref_frame_map[ref_index] = cm->new_fb_idx;
-      ++frame_bufs[cm->new_fb_idx].ref_count;
-    } else {
-      cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
-    }
-    // Current thread holds the reference frame.
-    if (cm->ref_frame_map[ref_index] >= 0)
-      ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
-    ++ref_index;
-  }
-
-  for (; ref_index < REF_FRAMES; ++ref_index) {
-    cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
-    // Current thread holds the reference frame.
-    if (cm->ref_frame_map[ref_index] >= 0)
-      ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
-  }
-  unlock_buffer_pool(pool);
-  pbi->hold_ref_buf = 1;
-
  if (frame_is_intra_only(cm) || cm->error_resilient_mode)
    vp9_setup_past_independence(cm);

@@ -1400,9 +1373,7 @@ void vp9_decode_frame(VP9Decoder *pbi,
                      const uint8_t **p_data_end) {
  VP9_COMMON *const cm = &pbi->common;
  MACROBLOCKD *const xd = &pbi->mb;
-  struct vp9_read_bit_buffer rb = { NULL, NULL, 0, NULL, 0};
-  int context_updated = 0;
-
+  struct vp9_read_bit_buffer rb = { 0 };
  uint8_t clear_data[MAX_VP9_HEADER_SIZE];
  const size_t first_partition_size = read_uncompressed_header(pbi,
      init_read_bit_buffer(pbi, &rb, data, data_end, clear_data));
@@ -1439,28 +1410,6 @@ void vp9_decode_frame(VP9Decoder *pbi,
  xd->corrupted = 0;
  new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);

-  if (cm->lf.filter_level) {
-    vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
-  }
-
-  // If encoded in frame parallel mode, frame context is ready after decoding
-  // the frame header.
-  if (pbi->frame_parallel_decode && cm->frame_parallel_decoding_mode) {
-    VP9Worker *const worker = pbi->frame_worker_owner;
-    FrameWorkerData *const frame_worker_data = worker->data1;
-    if (cm->refresh_frame_context) {
-      context_updated = 1;
-      cm->frame_contexts[cm->frame_context_idx] = cm->fc;
-    }
-    vp9_frameworker_lock_stats(worker);
-    pbi->cur_buf->row = -1;
-    pbi->cur_buf->col = -1;
-    frame_worker_data->frame_context_ready = 1;
-    // Signal the main thread that context is ready.
-    vp9_frameworker_signal_stats(worker);
-    vp9_frameworker_unlock_stats(worker);
-  }
-
  // TODO(jzern): remove frame_parallel_decoding_mode restriction for
  // single-frame tile decoding.
  if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
@@ -1473,7 +1422,9 @@ void vp9_decode_frame(VP9Decoder *pbi,
    *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
  }

-  if (!xd->corrupted) {
+  new_fb->corrupted |= xd->corrupted;
+
+  if (!new_fb->corrupted) {
    if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
      vp9_adapt_coef_probs(cm);

@@ -1484,235 +1435,8 @@ void vp9_decode_frame(VP9Decoder *pbi,
    } else {
      debug_check_frame_counts(cm);
    }
-  } else {
-    vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
-                       "Decode failed. Frame data is corrupted.");
  }

-  // Non frame parallel update frame context here.
-  if (cm->refresh_frame_context && !context_updated)
+  if (cm->refresh_frame_context)
    cm->frame_contexts[cm->frame_context_idx] = cm->fc;
 }
-
-static void build_mc_border(const uint8_t *src, int src_stride,
-                            uint8_t *dst, int dst_stride,
-                            int x, int y, int b_w, int b_h, int w, int h) {
-  // Get a pointer to the start of the real data for this row.
-  const uint8_t *ref_row = src - x - y * src_stride;
-
-  if (y >= h)
-    ref_row += (h - 1) * src_stride;
-  else if (y > 0)
-    ref_row += y * src_stride;
-
-  do {
-    int right = 0, copy;
-    int left = x < 0 ? -x : 0;
-
-    if (left > b_w)
-      left = b_w;
-
-    if (x + b_w > w)
-      right = x + b_w - w;
-
-    if (right > b_w)
-      right = b_w;
-
-    copy = b_w - left - right;
-
-    if (left)
-      memset(dst, ref_row[0], left);
-
-    if (copy)
-      memcpy(dst + left, ref_row + x + left, copy);
-
-    if (right)
-      memset(dst + left + copy, ref_row[w - 1], right);
-
-    dst += dst_stride;
-    ++y;
-
-    if (y > 0 && y < h)
-      ref_row += src_stride;
-  } while (--b_h);
-}
-
-void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
-                                int plane, int block, int bw, int bh, int x,
-                                int y, int w, int h, int mi_x, int mi_y) {
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  const MODE_INFO *mi = xd->mi[0];
-  const int is_compound = has_second_ref(&mi->mbmi);
-  const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
-  int ref;
-
-  for (ref = 0; ref < 1 + is_compound; ++ref) {
-    const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
-    struct buf_2d *const pre_buf = &pd->pre[ref];
-    struct buf_2d *const dst_buf = &pd->dst;
-    uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
-    const MV mv = mi->mbmi.sb_type < BLOCK_8X8
-               ? average_split_mvs(pd, plane, mi, ref, block)
-               : mi->mbmi.mv[ref].as_mv;
-
-
-    // TODO(jkoleszar): This clamping is done in the incorrect place for the
-    // scaling case. It needs to be done on the scaled MV, not the pre-scaling
-    // MV. Note however that it performs the subsampling aware scaling so
-    // that the result is always q4.
-    // mv_precision precision is MV_PRECISION_Q4.
-    const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh,
-                                               pd->subsampling_x,
-                                               pd->subsampling_y);
-
-    MV32 scaled_mv;
-    int xs, ys, x0, y0, x0_16, y0_16, y1, frame_width, frame_height,
-        buf_stride, subpel_x, subpel_y;
-    uint8_t *ref_frame, *buf_ptr;
-    const int idx = xd->block_refs[ref]->idx;
-    BufferPool *const pool = pbi->common.buffer_pool;
-    RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
-
-    // Get reference frame pointer, width and height.
-    if (plane == 0) {
-      frame_width = ref_frame_buf->buf.y_crop_width;
-      frame_height = ref_frame_buf->buf.y_crop_height;
-      ref_frame = ref_frame_buf->buf.y_buffer;
-    } else {
-      frame_width = ref_frame_buf->buf.uv_crop_width;
-      frame_height = ref_frame_buf->buf.uv_crop_height;
-      ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer
-                           : ref_frame_buf->buf.v_buffer;
-    }
-
-    if (vp9_is_scaled(sf)) {
-      // Co-ordinate of containing block to pixel precision.
-      int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
-      int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
-
-      // Co-ordinate of the block to 1/16th pixel precision.
-      x0_16 = (x_start + x) << SUBPEL_BITS;
-      y0_16 = (y_start + y) << SUBPEL_BITS;
-
-      // Co-ordinate of current block in reference frame
-      // to 1/16th pixel precision.
-      x0_16 = sf->scale_value_x(x0_16, sf);
-      y0_16 = sf->scale_value_y(y0_16, sf);
-
-      // Map the top left corner of the block into the reference frame.
-      x0 = sf->scale_value_x(x_start + x, sf);
-      y0 = sf->scale_value_y(y_start + y, sf);
-
-      // Scale the MV and incorporate the sub-pixel offset of the block
-      // in the reference frame.
-      scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
-      xs = sf->x_step_q4;
-      ys = sf->y_step_q4;
-    } else {
-      // Co-ordinate of containing block to pixel precision.
-      x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
-      y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
-
-      // Co-ordinate of the block to 1/16th pixel precision.
-      x0_16 = x0 << SUBPEL_BITS;
-      y0_16 = y0 << SUBPEL_BITS;
-
-      scaled_mv.row = mv_q4.row;
-      scaled_mv.col = mv_q4.col;
-      xs = ys = 16;
-    }
-    subpel_x = scaled_mv.col & SUBPEL_MASK;
-    subpel_y = scaled_mv.row & SUBPEL_MASK;
-
-    // Calculate the top left corner of the best matching block in the
-    // reference frame.
-    x0 += scaled_mv.col >> SUBPEL_BITS;
-    y0 += scaled_mv.row >> SUBPEL_BITS;
-    x0_16 += scaled_mv.col;
-    y0_16 += scaled_mv.row;
-
-    // Get reference block pointer.
-    buf_ptr = ref_frame + y0 * pre_buf->stride + x0;
-    buf_stride = pre_buf->stride;
-
-    // Get reference block bottom right vertical coordinate.
-    y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
-
-    // Do border extension if there is motion or the
-    // width/height is not a multiple of 8 pixels.
-    if (scaled_mv.col || scaled_mv.row ||
-        (frame_width & 0x7) || (frame_height & 0x7)) {
-      int x_pad = 0, y_pad = 0;
-
-      // Get reference block bottom right horizontal coordinate.
-      int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
-
-      if (subpel_x || (sf->x_step_q4 & SUBPEL_MASK)) {
-        x0 -= VP9_INTERP_EXTEND - 1;
-        x1 += VP9_INTERP_EXTEND;
-        x_pad = 1;
-      }
-
-      if (subpel_y || (sf->y_step_q4 & SUBPEL_MASK)) {
-        y0 -= VP9_INTERP_EXTEND - 1;
-        y1 += VP9_INTERP_EXTEND;
-        y_pad = 1;
-      }
-
-      // Wait until reference block is ready. Pad 7 more pixels as last 7
-      // pixels of each superblock row can be changed by next superblock row.
-       if (pbi->frame_parallel_decode)
-         vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
-                              (y1 + 7) << (plane == 0 ? 0 : 1));
-
-      // Skip border extension if block is inside the frame.
-      if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width ||
-          y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
-        uint8_t *buf_ptr1 = ref_frame + y0 * pre_buf->stride + x0;
-        // Extend the border.
-        build_mc_border(buf_ptr1, pre_buf->stride, xd->mc_buf, x1 - x0 + 1,
-                        x0, y0, x1 - x0 + 1, y1 - y0 + 1, frame_width,
-                        frame_height);
-        buf_stride = x1 - x0 + 1;
-        buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3;
-      }
-    } else {
-      // Wait until reference block is ready. Pad 7 more pixels as last 7
-      // pixels of each superblock row can be changed by next superblock row.
-       if (pbi->frame_parallel_decode)
-         vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
-                              (y1 + 7) << (plane == 0 ? 0 : 1));
-    }
-
-    inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
-                    subpel_y, sf, w, h, ref, kernel, xs, ys);
-  }
-}
-
-void vp9_dec_build_inter_predictors_sb(VP9Decoder *const pbi, MACROBLOCKD *xd,
-                                       int mi_row, int mi_col,
-                                       BLOCK_SIZE bsize) {
-  int plane;
-  const int mi_x = mi_col * MI_SIZE;
-  const int mi_y = mi_row * MI_SIZE;
-  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
-                                                        &xd->plane[plane]);
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
-    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
-    const int bw = 4 * num_4x4_w;
-    const int bh = 4 * num_4x4_h;
-
-    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
-      int i = 0, x, y;
-      assert(bsize == BLOCK_8X8);
-      for (y = 0; y < num_4x4_h; ++y)
-        for (x = 0; x < num_4x4_w; ++x)
-          dec_build_inter_predictors(pbi, xd, plane, i++, bw, bh,
-                                     4 * x, 4 * y, 4, 4, mi_x, mi_y);
-    } else {
-      dec_build_inter_predictors(pbi, xd, plane, 0, bw, bh,
-                                 0, 0, bw, bh, mi_x, mi_y);
-    }
-  }
-}
--- a/vp9/decoder/vp9_decodeframe.h
+++ b/vp9/decoder/vp9_decodeframe.h
@@ -25,9 +25,6 @@ void vp9_decode_frame(struct VP9Decoder *pbi,
                      const uint8_t *data, const uint8_t *data_end,
                      const uint8_t **p_data_end);

-void vp9_dec_build_inter_predictors_sb(struct VP9Decoder *const pbi,
-                                       MACROBLOCKD *xd, int mi_row, int mi_col,
-                                       BLOCK_SIZE bsize);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -96,7 +96,7 @@ static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE bsize,

  for (y = 0; y < ymis; y++)
    for (x = 0; x < xmis; x++)
-      cm->current_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id;
+      cm->last_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id;
 }

 static int read_intra_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,
@@ -129,10 +129,8 @@ static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,

  predicted_segment_id = vp9_get_segment_id(cm, cm->last_frame_seg_map,
                                            bsize, mi_row, mi_col);
-  if (!seg->update_map) {
-    set_segment_id(cm, bsize, mi_row, mi_col, predicted_segment_id);
+  if (!seg->update_map)
    return predicted_segment_id;
-  }

  if (seg->temporal_update) {
    const vp9_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
@@ -420,18 +418,11 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
  }
 }

-static void fpm_sync(void *const data, int mi_row) {
-  VP9Decoder *const pbi = (VP9Decoder *)data;
-  vp9_frameworker_wait(pbi->frame_worker_owner, pbi->prev_buf,
-                       mi_row << MI_BLOCK_SIZE_LOG2);
-}
-
-static void read_inter_block_mode_info(VP9Decoder *const pbi,
+static void read_inter_block_mode_info(VP9_COMMON *const cm,
                                       MACROBLOCKD *const xd,
                                       const TileInfo *const tile,
                                       MODE_INFO *const mi,
                                       int mi_row, int mi_col, vp9_reader *r) {
-  VP9_COMMON *const cm = &pbi->common;
  MB_MODE_INFO *const mbmi = &mi->mbmi;
  const BLOCK_SIZE bsize = mbmi->sb_type;
  const int allow_hp = cm->allow_high_precision_mv;
@@ -445,7 +436,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
  for (ref = 0; ref < 1 + is_compound; ++ref) {
    const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
    vp9_find_mv_refs(cm, xd, tile, mi, frame, mbmi->ref_mvs[frame],
-                     mi_row, mi_col, fpm_sync, (void *)pbi);
+                     mi_row, mi_col);
  }

  inter_mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
@@ -506,6 +497,10 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
          mi->bmi[j + 2] = mi->bmi[j];
        if (num_4x4_w == 2)
          mi->bmi[j + 1] = mi->bmi[j];
+
+#if CONFIG_TRANSCODE
+        mi->bmi[j].as_mode = b_mode;
+#endif
      }
    }

@@ -519,13 +514,10 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
  }
 }

-// TODO(hkuang): Pass cm instead of pbi. This requires change in
-// vp9_frameworker_wait.
-static void read_inter_frame_mode_info(VP9Decoder *const pbi,
+static void read_inter_frame_mode_info(VP9_COMMON *const cm,
                                       MACROBLOCKD *const xd,
                                       const TileInfo *const tile,
                                       int mi_row, int mi_col, vp9_reader *r) {
-  VP9_COMMON *const cm = &pbi->common;
  MODE_INFO *const mi = xd->mi[0];
  MB_MODE_INFO *const mbmi = &mi->mbmi;
  int inter_block;
@@ -539,17 +531,16 @@ static void read_inter_frame_mode_info(VP9Decoder *const pbi,
                               !mbmi->skip || !inter_block, r);

  if (inter_block)
-    read_inter_block_mode_info(pbi, xd, tile, mi, mi_row, mi_col, r);
+    read_inter_block_mode_info(cm, xd, tile, mi, mi_row, mi_col, r);
  else
    read_intra_block_mode_info(cm, mi, r);
 }

-void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
+void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd,
                        const TileInfo *const tile,
                        int mi_row, int mi_col, vp9_reader *r) {
-  VP9_COMMON *const cm = &pbi->common;
  if (frame_is_intra_only(cm))
    read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r);
  else
-    read_inter_frame_mode_info(pbi, xd, tile, mi_row, mi_col, r);
+    read_inter_frame_mode_info(cm, xd, tile, mi_row, mi_col, r);
 }
--- a/vp9/decoder/vp9_decodemv.h
+++ b/vp9/decoder/vp9_decodemv.h
@@ -11,7 +11,6 @@
 #ifndef VP9_DECODER_VP9_DECODEMV_H_
 #define VP9_DECODER_VP9_DECODEMV_H_

-#include "vp9/decoder/vp9_decoder.h"
 #include "vp9/decoder/vp9_reader.h"

 #ifdef __cplusplus
@@ -20,7 +19,7 @@ extern "C" {

 struct TileInfo;

-void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
+void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd,
                        const struct TileInfo *const tile,
                        int mi_row, int mi_col, vp9_reader *r);

--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -26,23 +26,25 @@
 #endif
 #include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_systemdependent.h"
-#include "vp9/common/vp9_thread.h"

 #include "vp9/decoder/vp9_decodeframe.h"
 #include "vp9/decoder/vp9_decoder.h"
 #include "vp9/decoder/vp9_detokenize.h"
 #include "vp9/decoder/vp9_dthread.h"

+#include <stdio.h>
+
 static void initialize_dec() {
  static int init_done = 0;

  if (!init_done) {
    vp9_init_neighbors();
+    vp9_init_quant_tables();
    init_done = 1;
  }
 }

-VP9Decoder *vp9_decoder_create(BufferPool *const pool) {
+VP9Decoder *vp9_decoder_create() {
  VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi));
  VP9_COMMON *const cm = pbi ? &pbi->common : NULL;

@@ -58,18 +60,15 @@ VP9Decoder *vp9_decoder_create(BufferPool *const pool) {
  }

  cm->error.setjmp = 1;
-  pbi->need_resync = 1;
  initialize_dec();

  vp9_rtcd();

  // Initialize the references to not point to any frame buffers.
  vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
-  vpx_memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map));

  cm->current_video_frame = 0;
  pbi->ready_for_new_data = 1;
-  pbi->common.buffer_pool = pool;

  // vp9_init_dequantizer() is first called here. Add check in
  // frame_init_dequantizer() to avoid unnecessary calling of
@@ -80,8 +79,11 @@ VP9Decoder *vp9_decoder_create(BufferPool *const pool) {

  cm->error.setjmp = 0;

-  vp9_get_worker_interface()->init(&pbi->lf_worker);
+  vp9_worker_init(&pbi->lf_worker);

+#if CONFIG_TRANSCODE && WRITE_MI_ARRAY
+  cm->mi_array_pf = fopen("mode_info_array_2.bin", "rb");
+#endif
  return pbi;
 }

@@ -89,13 +91,17 @@ void vp9_decoder_remove(VP9Decoder *pbi) {
  VP9_COMMON *const cm = &pbi->common;
  int i;

+#if CONFIG_TRANSCODE && WRITE_MI_ARRAY
+  fclose(cm->mi_array_pf);
+#endif
+
  vp9_remove_common(cm);
-  vp9_get_worker_interface()->end(&pbi->lf_worker);
+  vp9_worker_end(&pbi->lf_worker);
  vpx_free(pbi->lf_worker.data1);
  vpx_free(pbi->tile_data);
  for (i = 0; i < pbi->num_tile_workers; ++i) {
    VP9Worker *const worker = &pbi->tile_workers[i];
-    vp9_get_worker_interface()->end(worker);
+    vp9_worker_end(worker);
    vpx_free(worker->data1);
    vpx_free(worker->data2);
  }
@@ -128,7 +134,7 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi,
   */
  if (ref_frame_flag == VP9_LAST_FLAG) {
    const YV12_BUFFER_CONFIG *const cfg =
-        &cm->buffer_pool->frame_bufs[cm->ref_frame_map[0]].buf;
+        &cm->frame_bufs[cm->ref_frame_map[0]].buf;
    if (!equal_dimensions(cfg, sd))
      vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                         "Incorrect buffer dimensions");
@@ -147,7 +153,6 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
                                      VP9_REFFRAME ref_frame_flag,
                                      YV12_BUFFER_CONFIG *sd) {
  RefBuffer *ref_buf = NULL;
-  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;

  // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
  // encoder is using the frame buffers for. This is just a stub to keep the
@@ -175,11 +180,11 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
    const int free_fb = get_free_fb(cm);
    // Decrease ref_count since it will be increased again in
    // ref_cnt_fb() below.
-    --frame_bufs[free_fb].ref_count;
+    cm->frame_bufs[free_fb].ref_count--;

    // Manage the reference counters and copy image.
-    ref_cnt_fb(frame_bufs, ref_fb_ptr, free_fb);
-    ref_buf->buf = &frame_bufs[*ref_fb_ptr].buf;
+    ref_cnt_fb(cm->frame_bufs, ref_fb_ptr, free_fb);
+    ref_buf->buf = &cm->frame_bufs[*ref_fb_ptr].buf;
    vp8_yv12_copy_frame(sd, ref_buf->buf);
  }

@@ -189,12 +194,11 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,

 int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) {
  VP9_COMMON *cm = &pbi->common;
-  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;

  if (index < 0 || index >= REF_FRAMES)
    return -1;

-  *fb = &frame_bufs[cm->ref_frame_map[index]].buf;
+  *fb = &cm->frame_bufs[cm->ref_frame_map[index]].buf;
  return 0;
 }

@@ -202,38 +206,21 @@ int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) {
 static void swap_frame_buffers(VP9Decoder *pbi) {
  int ref_index = 0, mask;
  VP9_COMMON *const cm = &pbi->common;
-  BufferPool *const pool = cm->buffer_pool;
-  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;

-  lock_buffer_pool(pool);
  for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
-    const int old_idx = cm->ref_frame_map[ref_index];
-    // Current thread releases the holding of reference frame.
-    decrease_ref_count(old_idx, frame_bufs, pool);
-
-    // Release the reference frame in reference map.
-    if ((mask & 1) && old_idx >= 0) {
-      decrease_ref_count(old_idx, frame_bufs, pool);
+    if (mask & 1) {
+      const int old_idx = cm->ref_frame_map[ref_index];
+      ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[ref_index],
+                 cm->new_fb_idx);
+      if (old_idx >= 0 && cm->frame_bufs[old_idx].ref_count == 0)
+        cm->release_fb_cb(cm->cb_priv,
+                          &cm->frame_bufs[old_idx].raw_frame_buffer);
    }
-    cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index];
    ++ref_index;
  }

-  // Current thread releases the holding of reference frame.
-  for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) {
-    const int old_idx = cm->ref_frame_map[ref_index];
-    decrease_ref_count(old_idx, frame_bufs, pool);
-    cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index];
-  }
-  unlock_buffer_pool(pool);
-  pbi->hold_ref_buf = 0;
  cm->frame_to_show = get_frame_new_buffer(cm);
-
-  if (!pbi->frame_parallel_decode || !cm->show_frame) {
-    lock_buffer_pool(pool);
-    --frame_bufs[cm->new_fb_idx].ref_count;
-    unlock_buffer_pool(pool);
-  }
+  cm->frame_bufs[cm->new_fb_idx].ref_count--;

  // Invalidate these references until the next frame starts.
  for (ref_index = 0; ref_index < 3; ref_index++)
@@ -243,10 +230,9 @@ static void swap_frame_buffers(VP9Decoder *pbi) {
 int vp9_receive_compressed_data(VP9Decoder *pbi,
                                size_t size, const uint8_t **psource) {
  VP9_COMMON *const cm = &pbi->common;
-  BufferPool *const pool = cm->buffer_pool;
-  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
  const uint8_t *source = *psource;
  int retcode = 0;
+
  cm->error.error_code = VPX_CODEC_OK;

  if (size == 0) {
@@ -262,63 +248,27 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
      cm->frame_refs[0].buf->corrupted = 1;
  }

-  pbi->ready_for_new_data = 0;
-
  // Check if the previous frame was a frame without any references to it.
-  // Release frame buffer if not decoding in frame parallel mode.
-  if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0
-      && frame_bufs[cm->new_fb_idx].ref_count == 0)
-    pool->release_fb_cb(pool->cb_priv,
-                        &frame_bufs[cm->new_fb_idx].raw_frame_buffer);
+  if (cm->new_fb_idx >= 0 && cm->frame_bufs[cm->new_fb_idx].ref_count == 0)
+    cm->release_fb_cb(cm->cb_priv,
+                      &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer);
  cm->new_fb_idx = get_free_fb(cm);

-  pbi->hold_ref_buf = 0;
-  if (pbi->frame_parallel_decode) {
-    VP9Worker *const worker = pbi->frame_worker_owner;
-    vp9_frameworker_lock_stats(worker);
-    frame_bufs[cm->new_fb_idx].frame_worker_owner = worker;
-    // Reset decoding progress.
-    pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
-    pbi->cur_buf->row = -1;
-    pbi->cur_buf->col = -1;
-    vp9_frameworker_unlock_stats(worker);
-  } else {
-    pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
-  }
-
  if (setjmp(cm->error.jmp)) {
    cm->error.setjmp = 0;
-    pbi->ready_for_new_data = 1;

-    lock_buffer_pool(pool);
-    // Release all the reference buffers if worker thread is holding them.
-    if (pbi->hold_ref_buf == 1) {
-      int ref_index = 0, mask;
-      VP9_COMMON *const cm = &pbi->common;
-      BufferPool *const pool = cm->buffer_pool;
-      RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
-      for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
-        const int old_idx = cm->ref_frame_map[ref_index];
-        // Current thread releases the holding of reference frame.
-        decrease_ref_count(old_idx, frame_bufs, pool);
+    // We do not know if the missing frame(s) was supposed to update
+    // any of the reference buffers, but we act conservative and
+    // mark only the last buffer as corrupted.
+    //
+    // TODO(jkoleszar): Error concealment is undefined and non-normative
+    // at this point, but if it becomes so, [0] may not always be the correct
+    // thing to do here.
+    if (cm->frame_refs[0].idx != INT_MAX)
+      cm->frame_refs[0].buf->corrupted = 1;

-        // Release the reference frame in reference map.
-        if ((mask & 1) && old_idx >= 0) {
-          decrease_ref_count(old_idx, frame_bufs, pool);
-        }
-        ++ref_index;
-      }
-
-      // Current thread releases the holding of reference frame.
-      for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) {
-        const int old_idx = cm->ref_frame_map[ref_index];
-        decrease_ref_count(old_idx, frame_bufs, pool);
-      }
-      pbi->hold_ref_buf = 0;
-    }
-    // Release current frame.
-    decrease_ref_count(cm->new_fb_idx, frame_bufs, pool);
-    unlock_buffer_pool(pool);
+    if (cm->frame_bufs[cm->new_fb_idx].ref_count > 0)
+      cm->frame_bufs[cm->new_fb_idx].ref_count--;

    return -1;
  }
@@ -331,39 +281,20 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,

  vp9_clear_system_state();

+  cm->last_width = cm->width;
+  cm->last_height = cm->height;
+
  if (!cm->show_existing_frame)
    cm->last_show_frame = cm->show_frame;
+  if (cm->show_frame) {
+    if (!cm->show_existing_frame)
+      vp9_swap_mi_and_prev_mi(cm);

-  // Update progress in frame parallel decode.
-  if (pbi->frame_parallel_decode) {
-    // Need to lock the mutex here as another thread may
-    // be accessing this buffer.
-    VP9Worker *const worker = pbi->frame_worker_owner;
-    FrameWorkerData *const frame_worker_data = worker->data1;
-    vp9_frameworker_lock_stats(worker);
-
-    if (cm->show_frame) {
-      if (!cm->show_existing_frame)
-        vp9_swap_mi_and_prev_mi(cm);
-      cm->current_video_frame++;
-    }
-    vp9_swap_current_and_last_seg_map(cm);
-    frame_worker_data->frame_decoded = 1;
-    frame_worker_data->frame_context_ready = 1;
-    vp9_frameworker_signal_stats(worker);
-    vp9_frameworker_unlock_stats(worker);
-  } else {
-    cm->last_width = cm->width;
-    cm->last_height = cm->height;
-    if (cm->show_frame) {
-      if (!cm->show_existing_frame)
-        vp9_swap_mi_and_prev_mi(cm);
-      cm->current_video_frame++;
-    }
-
-    vp9_swap_current_and_last_seg_map(cm);
+    cm->current_video_frame++;
  }

+  pbi->ready_for_new_data = 0;
+
  cm->error.setjmp = 0;
  return retcode;
 }
@@ -378,12 +309,12 @@ int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd,
  if (pbi->ready_for_new_data == 1)
    return ret;

-  pbi->ready_for_new_data = 1;
-
  /* no raw frame to show!!! */
  if (pbi->common.show_frame == 0)
    return ret;

+  pbi->ready_for_new_data = 1;
+
 #if CONFIG_VP9_POSTPROC
  ret = vp9_post_proc_frame(&pbi->common, sd, flags);
 #else
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -18,15 +18,20 @@

 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_ppflags.h"
-#include "vp9/common/vp9_thread.h"

 #include "vp9/decoder/vp9_decoder.h"
 #include "vp9/decoder/vp9_dthread.h"
+#include "vp9/decoder/vp9_thread.h"

 #ifdef __cplusplus
 extern "C" {
 #endif

+#if CONFIG_TRANSCODE
+#define WRITE_MI_ARRAY 0
+#define READ_MI_ARRAY  0
+#endif
+
 // TODO(hkuang): combine this with TileWorkerData.
 typedef struct TileData {
  VP9_COMMON *cm;
@@ -43,14 +48,6 @@ typedef struct VP9Decoder {

  int refresh_frame_flags;

-  int frame_parallel_decode;  // frame-based threading.
-
-  // TODO(hkuang): Combine this with cur_buf in macroblockd as they are
-  // the same.
-  RefCntBuffer *cur_buf;   //  Current decoding frame buffer.
-  RefCntBuffer *prev_buf;  //  Previous decoding frame buffer.
-
-  VP9Worker *frame_worker_owner;   // frame_worker that owns this pbi.
  VP9Worker lf_worker;
  VP9Worker *tile_workers;
  int num_tile_workers;
@@ -65,8 +62,6 @@ typedef struct VP9Decoder {

  int max_threads;
  int inv_tile_order;
-  int need_resync;  // wait for key/intra-only frame.
-  int hold_ref_buf;  // hold the reference buffer.
 } VP9Decoder;

 int vp9_receive_compressed_data(struct VP9Decoder *pbi,
@@ -86,25 +81,10 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
 int vp9_get_reference_dec(struct VP9Decoder *pbi,
                          int index, YV12_BUFFER_CONFIG **fb);

-struct VP9Decoder *vp9_decoder_create(BufferPool *const pool);
+struct VP9Decoder *vp9_decoder_create();

 void vp9_decoder_remove(struct VP9Decoder *pbi);

-static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs,
-                                      BufferPool *const pool) {
-  if (idx >= 0) {
-    --frame_bufs[idx].ref_count;
-    // A worker may only get a free framebuffer index when calling get_free_fb.
-    // But the private buffer is not set up until finish decoding header.
-    // So any error happens during decoding header, the frame_bufs will not
-    // have valid priv buffer.
-    if (frame_bufs[idx].ref_count == 0 &&
-        frame_bufs[idx].raw_frame_buffer.priv) {
-      pool->release_fb_cb(pool->cb_priv, &frame_bufs[idx].raw_frame_buffer);
-    }
-  }
-}
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -17,8 +17,6 @@
 #include "vp9/decoder/vp9_dthread.h"
 #include "vp9/decoder/vp9_decoder.h"

-// #define DEBUG_THREAD
-
 #if CONFIG_MULTITHREAD
 static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
  const int kMaxTryLocks = 4000;
@@ -126,7 +124,7 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
 static int loop_filter_row_worker(void *arg1, void *arg2) {
  TileWorkerData *const tile_data = (TileWorkerData*)arg1;
  LFWorkerData *const lf_data = &tile_data->lfdata;
-  (void) arg2;
+
  loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
                      lf_data->start, lf_data->stop, lf_data->y_only,
                      lf_data->lf_sync, lf_data->num_lf_workers);
@@ -140,7 +138,6 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
                              int frame_filter_level,
                              int y_only) {
  VP9LfSync *const lf_sync = &pbi->lf_row_sync;
-  const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
  // Number of superblock rows and cols
  const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
  const int tile_cols = 1 << cm->log2_tile_cols;
@@ -200,15 +197,15 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,

    // Start loopfiltering
    if (i == num_workers - 1) {
-      winterface->execute(worker);
+      vp9_worker_execute(worker);
    } else {
-      winterface->launch(worker);
+      vp9_worker_launch(worker);
    }
  }

  // Wait till all rows are finished
  for (i = 0; i < num_workers; ++i) {
-    winterface->sync(&pbi->tile_workers[i]);
+    vp9_worker_sync(&pbi->tile_workers[i]);
  }
 }

@@ -281,178 +278,3 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) {
    vp9_zero(*lf_sync);
  }
 }
-
-// TODO(hkuang): Clean up all the #ifdef in this file.
-void vp9_frameworker_lock_stats(VP9Worker *const worker) {
-#if CONFIG_MULTITHREAD
-  FrameWorkerData *const worker_data = worker->data1;
-  pthread_mutex_lock(&worker_data->stats_mutex);
-#else
-  (void)worker;
-#endif
-}
-
-void vp9_frameworker_unlock_stats(VP9Worker *const worker) {
-#if CONFIG_MULTITHREAD
-  FrameWorkerData *const worker_data = worker->data1;
-  pthread_mutex_unlock(&worker_data->stats_mutex);
-#else
-  (void)worker;
-#endif
-}
-
-void vp9_frameworker_signal_stats(VP9Worker *const worker) {
-#if CONFIG_MULTITHREAD
-  FrameWorkerData *const worker_data = worker->data1;
-  // TODO(hkuang): Investigate using broadcast or signal.
-  pthread_cond_signal(&worker_data->stats_cond);
-#else
-  (void)worker;
-#endif
-}
-
-// TODO(hkuang): Remove worker parameter as it is only used in debug code.
-void vp9_frameworker_wait(VP9Worker *const worker, RefCntBuffer *const ref_buf,
-                          int row) {
-#if CONFIG_MULTITHREAD
-  if (!ref_buf)
-    return;
-
-  // Enabling the following line of code will get harmless tsan error but
-  // will get best performance.
-  // if (ref_buf->row >= row && ref_buf->buf.corrupted != 1) return;
-
-  {
-    // Find the worker thread that owns the reference frame. If the reference
-    // frame has been fully decoded, it may not have owner.
-    VP9Worker *const ref_worker = ref_buf->frame_worker_owner;
-    FrameWorkerData *const ref_worker_data =
-        (FrameWorkerData *)ref_worker->data1;
-    const VP9Decoder *const pbi = ref_worker_data->pbi;
-
-#ifdef DEBUG_THREAD
-    {
-      FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
-      printf("%d %p worker is waiting for %d %p worker (%d)  ref %d \r\n",
-             worker_data->worker_id, worker, ref_worker_data->worker_id,
-             ref_buf->frame_worker_owner, row, ref_buf->row);
-    }
-#endif
-
-    vp9_frameworker_lock_stats(ref_worker);
-    while (ref_buf->row < row && pbi->cur_buf == ref_buf &&
-           ref_buf->buf.corrupted != 1) {
-      pthread_cond_wait(&ref_worker_data->stats_cond,
-                        &ref_worker_data->stats_mutex);
-    }
-
-    if (ref_buf->buf.corrupted == 1) {
-      FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
-      vp9_frameworker_unlock_stats(ref_worker);
-      vpx_internal_error(&worker_data->pbi->common.error,
-                         VPX_CODEC_CORRUPT_FRAME,
-                         "Worker %p failed to decode frame", worker);
-    }
-    vp9_frameworker_unlock_stats(ref_worker);
-  }
-#else
-  (void)ref_buf;
-  (void)row;
-  (void)ref_buf;
-#endif  // CONFIG_MULTITHREAD
-}
-
-void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row) {
-#if CONFIG_MULTITHREAD
-  VP9Worker *worker = buf->frame_worker_owner;
-
-#ifdef DEBUG_THREAD
-  {
-    FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
-    printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id,
-           buf->frame_worker_owner, row);
-  }
-#endif
-
-  vp9_frameworker_lock_stats(worker);
-  buf->row = row;
-  vp9_frameworker_signal_stats(worker);
-  vp9_frameworker_unlock_stats(worker);
-#else
-  (void)buf;
-  (void)row;
-#endif  // CONFIG_MULTITHREAD
-}
-
-void vp9_frameworker_copy_context(VP9Worker *const dst_worker,
-                                  VP9Worker *const src_worker) {
-#if CONFIG_MULTITHREAD
-  FrameWorkerData *const src_worker_data = (FrameWorkerData *)src_worker->data1;
-  FrameWorkerData *const dst_worker_data = (FrameWorkerData *)dst_worker->data1;
-  VP9_COMMON *const src_cm = &src_worker_data->pbi->common;
-  VP9_COMMON *const dst_cm = &dst_worker_data->pbi->common;
-  int i;
-
-  // Wait until source frame's context is ready.
-  vp9_frameworker_lock_stats(src_worker);
-  while (!src_worker_data->frame_context_ready) {
-    pthread_cond_wait(&src_worker_data->stats_cond,
-        &src_worker_data->stats_mutex);
-  }
-
-  // src worker may have already finished decoding a frame and swapped the mi.
-  // TODO(hkuang): Remove following code after implenment no ModeInfo decoding.
-  if (src_worker_data->frame_decoded) {
-    dst_cm->prev_mip = src_cm->prev_mip;
-    dst_cm->prev_mi = src_cm->prev_mi;
-    dst_cm->prev_mi_grid_base = src_cm->prev_mi_grid_base;
-    dst_cm->prev_mi_grid_visible = src_cm->prev_mi_grid_visible;
-    dst_cm->last_frame_seg_map = src_cm->last_frame_seg_map;
-  } else {
-    dst_cm->prev_mip = src_cm->mip;
-    dst_cm->prev_mi = src_cm->mi;
-    dst_cm->prev_mi_grid_base = src_cm->mi_grid_base;
-    dst_cm->prev_mi_grid_visible = src_cm->mi_grid_visible;
-    dst_cm->last_frame_seg_map = src_cm->current_frame_seg_map;
-  }
-  dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync;
-  vp9_frameworker_unlock_stats(src_worker);
-
-  dst_worker_data->pbi->prev_buf =
-      src_worker_data->pbi->common.show_existing_frame ?
-          NULL : src_worker_data->pbi->cur_buf;
-
-  dst_cm->last_width = !src_cm->show_existing_frame ?
-                       src_cm->width : src_cm->last_width;
-  dst_cm->last_height = !src_cm->show_existing_frame ?
-                        src_cm->height : src_cm->last_height;
-  dst_cm->display_width = src_cm->display_width;
-  dst_cm->display_height = src_cm->display_height;
-  dst_cm->subsampling_x = src_cm->subsampling_x;
-  dst_cm->subsampling_y = src_cm->subsampling_y;
-  dst_cm->last_show_frame = !src_cm->show_existing_frame ?
-                            src_cm->show_frame : src_cm->last_show_frame;
-  dst_cm->last_frame_type = src_cm->last_frame_type;
-  dst_cm->frame_type = src_cm->frame_type;
-  dst_cm->y_dc_delta_q = src_cm->y_dc_delta_q;
-  dst_cm->uv_dc_delta_q = src_cm->uv_dc_delta_q;
-  dst_cm->uv_ac_delta_q = src_cm->uv_ac_delta_q;
-  dst_cm->base_qindex = src_cm->base_qindex;
-
-  for (i = 0; i < REF_FRAMES; ++i)
-    dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i];
-
-  memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr,
-         (MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh));
-  dst_cm->lf.last_sharpness_level = src_cm->lf.sharpness_level;
-  dst_cm->lf.filter_level = src_cm->lf.filter_level;
-  memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, MAX_REF_LF_DELTAS);
-  memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
-  dst_cm->seg = src_cm->seg;
-  memcpy(dst_cm->frame_contexts, src_cm->frame_contexts,
-         FRAME_CONTEXTS * sizeof(dst_cm->frame_contexts[0]));
-#else
-  (void) dst_worker;
-  (void) src_worker;
-#endif  // CONFIG_MULTITHREAD
-}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jingning Han	9d9b70a36a	Allow backward prob update in external mode info coding flow This commit enables vpxenc to properly count the coded motion vector related information for backward update. This allows the coding flow using external mode info to use backward probability update. In the short test clip, over 10% bit-rate saving is observed at no distortion change. Change-Id: Ie27e97114ab91c3d95ba7b5554d617d226db5e20	2014-07-21 12:11:31 -07:00
Jingning Han	1e168d12d9	Enable motion vector based prediction mode decision This commit enables vpxenc to compare the motion vector provided by external file to the predicted motion vectors and select the prediction mode with minimum rate cost if motion vector is matched. It doesn't change reconstruction distortion, but provide rate savings. Change-Id: Ia682b775d2bafcaabb5a113bd90a98e1931c9c5a	2014-07-17 16:35:11 -07:00
Jingning Han	6ee6e714b4	Make key frame coding mode configurable in the command line Add --kf-extc configuration. If it is 1, the key frame is coded using mode info from external file; otherwise, use vpxenc internal mode selection process to decide coding modes. It is by default 0. Change-Id: I916f811f9eaa2d0f6cc2a2035ca381a1b0ddd974	2014-07-17 11:52:21 -07:00
Jingning Han	674cc787d3	Update the effective motion vector of sub8x8 blocks This commit enables the vpxenc to update the effective motion vectors stored in the mode_info struct for sub8x8 block coding. It resolves the reference motion vector enc/dec mismatch issue. Change-Id: I93a88fed6f15fad06a41ca21e297d7281cb75c57	2014-07-16 21:37:15 -07:00
Jingning Han	c765cd1a78	Properly handle the effective motion vector in inferred mv modes This commit allows vpxenc to properly set the effective motion vector values in the mode_info struct for inferred motion vector modes. It resolves an enc/dec mismatch issue due to the mode info struct loaded from external file has conflict effective motion vector and inferred prediction mode. Change-Id: I1f47aeaf2b92fcd4dd3d4f3644b88466495be070	2014-07-16 21:35:29 -07:00
Jingning Han	9e3965ae90	Make external sub8x8 block mode info conformable coding decisions This commit converts the sub8x8 block mode info from external file into proper format that conforms the bit-stream definitions. It resolves an enc/dec mismatch issue in sub8x8 block coding used in the inter frames. Change-Id: Ie5717b19d0d06e0f525f9b7c7311abdd40f7885f	2014-07-15 22:40:33 -07:00
Jingning Han	f297504f2d	Re-work configure interface for encoding based on external mi This commit refines the configuration interface for encoding process based on external mode info. It allows the vpxenc to read the external file name from command line, and to produce warning message when necessary. Change-Id: I109d02ea9e6e418d00378d512ed9ab9bb0770dbd	2014-07-14 15:30:03 -07:00
Jingning Han	b4b897605a	Allow more coding flexibility in key frame coding This commit relaxes the encoding mode constraints on key frame coding. It improves the key frame coding performance in speed 5 and up. Change-Id: I114315c2b467174bb1f135f4ab2c1f328c8c65be	2014-07-11 09:55:38 -07:00
Jingning Han	51959786d2	Merge "Use normal encoding route for key frame coding" into sandbox/Jingning/transcode	2014-07-10 10:55:24 -07:00
Jingning Han	502baedb48	Enable motion vector precision regulation conversion This commit allows the vpxenc to check if the motion vectors read from external file comply the frame header. If the frame is using lower precision, the codec will convert the non-conformable motion vectors into corresponding level. This fixes another outstanding enc/dec mismatch issue due to the mode_info values provided by external file not complying the bit-stream definitions. Change-Id: Ie5409f5d3201e9159f6a49c7608db3541f8a190c	2014-07-09 16:58:44 -07:00
Jingning Han	2568ff0081	Enforce tx_size conversion to handle invalid mode_info values This commit forces a transform size check to handle the case where the provided transform size is larger than the block size. In such cases, it will convert the transform size to be the maximally allowed value according to the block size. Change-Id: I6ae26d5008fd60955427e2b7d5dcd3daa6eeb531	2014-07-09 10:31:41 -07:00
Jingning Han	4f2aeceabe	Use normal encoding route for key frame coding This commit makes the key frame coding to use the normal vpxenc coding route. The encoding process based on mode_info read from external file now starts from the first inter frame. Change-Id: Iee5ae2c3aa35d4b89d0cb4e890b9b0f29fe89d62	2014-07-08 12:06:31 -07:00
Jim Bankoski	06eed502bd	adjust the context we got from file Change-Id: Ifeed2fa6b8dbc735f3746548e4535d522e732990	2014-07-07 16:03:32 -07:00
Jingning Han	5e9f681dec	Merge "Force the use of selectable transform size" into sandbox/Jingning/transcode	2014-07-01 10:51:15 -07:00
Jingning Han	80bd67f09d	Merge "Disable decoder read/write access to the mode_info array" into sandbox/Jingning/transcode	2014-07-01 10:32:07 -07:00
Jingning Han	d019119777	Force the use of selectable transform size Change-Id: I87034c5933a9cfc6f82b925bcae11a2e6509c472	2014-06-30 17:17:31 -07:00
Jingning Han	6af2a29764	Disable decoder read/write access to the mode_info array The decoder read/write access to the mode_info array was for the purpose of creating a conformable coding mode decisions and hence validating the encoding process based on exteranl mode_info array. This commit makes a flag to disable all such potential access. Change-Id: I21ece4b595c1c24cdf5581a3147fe76bf33a5570	2014-06-30 14:49:15 -07:00
Jingning Han	a3d2b5213e	Merge "Enable vpxenc to process and convert external mode_info" into sandbox/Jingning/transcode	2014-06-30 11:15:53 -07:00
Jingning Han	0d075d907c	Merge "Add optional mode_info printout function for debug purpose" into sandbox/Jingning/transcode	2014-06-27 16:34:42 -07:00
Jingning Han	1bf27df775	Enable vpxenc to process and convert external mode_info This commit enables the encoder to convert the mode information read from external file into effective VP9 coding decisions. Further optimization for compression performance can be applied therein. Change-Id: Ic3abb8e223ed4b5aa54e5ed099feb450c1ad9363	2014-06-27 16:10:26 -07:00
Jingning Han	d7e8490d04	Add optional mode_info printout function for debug purpose This commit adds an optional function to print out the mode_info loaded from external file for debug purpose. It can be turned on by setting PRINT_MODE_INFO_LOAD 1. Change-Id: I8612801cbf2eb38213105afb7434da2584b3ff2c	2014-06-26 12:11:44 -07:00
Jingning Han	68556c2f1d	Merge "Silence quantization index check warnings" into sandbox/Jingning/transcode	2014-06-19 14:37:23 -07:00
Jingning Han	19c1c1f429	Merge "Make encoding process support non-switchable filter" into sandbox/Jingning/transcode	2014-06-19 14:37:05 -07:00
Jingning Han	1153454cd0	Merge "Enable encoding and bit-stream writing based on mode_info array" into sandbox/Jingning/transcode	2014-06-19 14:36:37 -07:00
Jingning Han	8f17deb617	Merge "Dump mode_info array from vp9 decoder to external file" into sandbox/Jingning/transcode	2014-06-19 14:36:20 -07:00
Jingning Han	f5bb406799	Merge "Add transcode flag in the experimental list" into sandbox/Jingning/transcode	2014-06-19 14:36:09 -07:00
Jingning Han	ff073a70ee	Silence quantization index check warnings Allow the encoder to use fixed quantization step size. Note that this effectively breaks the internal rate control scheme and can cause substantial compression performance. Change-Id: I1caacb1ab06629107f8975e5f707de16d6d5b36a	2014-06-19 09:52:09 -07:00
Jingning Han	44877260a5	Make encoding process support non-switchable filter This commit allows the encoder to handle cases where the encoder is forced to use an arbitrary prediction filter type. Change-Id: I984e554ef8b05d88d3c1714c0b621f5cf09f5dd6	2014-06-17 15:03:49 -07:00
Jingning Han	06510d1ff9	Enable encoding and bit-stream writing based on mode_info array This commit enables vpxenc to encode and write out bit-stream from coding information provided by external mode_info array file. It currently assumes single reference frame and 8-tap switchable prediction filters at frame header level. Tested using the mode_info array dumped at VP9 decoder into the external file, where the bit-stream was generated by VP9 encoder at speed -6. The coding statics remain the same. Note that the compression performance will be affected quite a lot in the two pass coding setting, where at this point the rate control scheme can not be updated properly without statistics gathered during rate distortion optimization search. Change-Id: Ide979d08d3ce6167c1f2e513c34fd8440f3e2aaf	2014-06-17 14:57:43 -07:00
Jingning Han	b95807f2bb	Dump mode_info array from vp9 decoder to external file This commit allows the vp9 decoder to dump the decoded mode_info array, per 64x64, into external file, which serves as conformable test vector for transcoding encoder. The mode_info of 8x8 block inside a 64x64 block is aligned in raster order. Change-Id: I0447d62922c674a674c0d4b31184625cf722f872	2014-06-11 15:26:42 -07:00
Jingning Han	de810ac620	Add transcode flag in the experimental list Change-Id: I756b5899d3b5101643b4e084a1647a15b427d9e9	2014-06-11 11:45:46 -07:00
Jingning Han	0b3ffed9be	Add transcode flag in the experimental list Change-Id: I756b5899d3b5101643b4e084a1647a15b427d9e9	2014-06-11 11:18:02 -07:00