Allow backward prob update in external mode info coding flow

This commit enables vpxenc to properly count the coded motion vector related information for backward update. This allows the coding flow using external mode info to use backward probability update. In the short test clip, over 10% bit-rate saving is observed at no distortion change. Change-Id: Ie27e97114ab91c3d95ba7b5554d617d226db5e20
Enable motion vector based prediction mode decision
2014-07-21 12:11:31 -07:00 · 2014-07-17 16:35:11 -07:00 · 2014-07-17 11:52:21 -07:00 · 2014-07-16 21:37:15 -07:00 · 2014-07-16 21:35:29 -07:00 · 2014-07-15 22:40:33 -07:00
138 changed files with 2919 additions and 12470 deletions
--- a/3
+++ b/3
@@ -62,7 +62,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    armv7-none-rvct
    armv7-win32-vs11
    armv7-win32-vs12
-    armv7s-darwin-gcc
    mips32-linux-gcc
    ppc32-darwin8-gcc
    ppc32-darwin9-gcc
@@ -80,7 +79,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-darwin11-gcc
    x86-darwin12-gcc
    x86-darwin13-gcc
-    x86-iphonesimulator-gcc
    x86-linux-gcc
    x86-linux-icc
    x86-os2-gcc
@@ -97,7 +95,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86_64-darwin11-gcc
    x86_64-darwin12-gcc
    x86_64-darwin13-gcc
-    x86_64-iphonesimulator-gcc
    x86_64-linux-gcc
    x86_64-linux-icc
    x86_64-solaris-gcc
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -330,10 +330,7 @@ endef
 ifneq ($(target),)
 include $(SRC_PATH_BARE)/$(target:-$(TOOLCHAIN)=).mk
 endif
-
-skip_deps := $(filter %clean,$(MAKECMDGOALS))
-skip_deps += $(findstring testdata,$(MAKECMDGOALS))
-ifeq ($(strip $(skip_deps)),)
+ifeq ($(filter %clean,$(MAKECMDGOALS)),)
  # Older versions of make don't like -include directives with no arguments
  ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),)
    -include $(filter %.d,$(OBJS-yes:.o=.d))
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -802,7 +802,7 @@ process_common_toolchain() {
        armv8)
            soft_enable neon
            ;;
-        armv7|armv7s)
+        armv7)
            soft_enable neon
            soft_enable neon_asm
            soft_enable media
@@ -831,7 +831,7 @@ process_common_toolchain() {
            arch_int=${arch_int%%te}
            check_add_asflags --defsym ARCHITECTURE=${arch_int}
            tune_cflags="-mtune="
-            if [ ${tgt_isa} = "armv7" ] || [ ${tgt_isa} = "armv7s" ]; then
+            if [ ${tgt_isa} = "armv7" ]; then
                if [ -z "${float_abi}" ]; then
                    check_cpp <<EOF && float_abi=hard || float_abi=softfp
 #ifndef __ARM_PCS_VFP
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -137,9 +137,7 @@ for opt in "$@"; do
        ;;
        --lib) proj_kind="lib"
        ;;
-        --src-path-bare=*)
-            src_path_bare=$(fix_path "$optval")
-            src_path_bare=${src_path_bare%/}
+        --src-path-bare=*) src_path_bare=$(fix_path "$optval")
        ;;
        --static-crt) use_static_runtime=true
        ;;
@@ -153,9 +151,9 @@ for opt in "$@"; do
            esac
        ;;
        -I*)
+            opt="${opt%/}"
            opt=${opt##-I}
            opt=$(fix_path "$opt")
-            opt="${opt%/}"
            incs="${incs}${incs:+;}&quot;${opt}&quot;"
            yasmincs="${yasmincs} -I&quot;${opt}&quot;"
        ;;
@@ -416,7 +414,7 @@ generate_vcproj() {
                    vpx)
                        tag Tool \
                            Name="VCPreBuildEventTool" \
-                            CommandLine="call obj_int_extract.bat &quot;$src_path_bare&quot; $plat_no_ws\\\$(ConfigurationName)" \
+                            CommandLine="call obj_int_extract.bat $src_path_bare $plat_no_ws\\\$(ConfigurationName)" \

                        tag Tool \
                            Name="VCCLCompilerTool" \
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -157,9 +157,7 @@ for opt in "$@"; do
        ;;
        --lib) proj_kind="lib"
        ;;
-        --src-path-bare=*)
-            src_path_bare=$(fix_path "$optval")
-            src_path_bare=${src_path_bare%/}
+        --src-path-bare=*) src_path_bare=$(fix_path "$optval")
        ;;
        --static-crt) use_static_runtime=true
        ;;
@@ -175,9 +173,9 @@ for opt in "$@"; do
            esac
        ;;
        -I*)
+            opt="${opt%/}"
            opt=${opt##-I}
            opt=$(fix_path "$opt")
-            opt="${opt%/}"
            incs="${incs}${incs:+;}&quot;${opt}&quot;"
            yasmincs="${yasmincs} -I&quot;${opt}&quot;"
        ;;
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -1,244 +0,0 @@
-#!/bin/sh
-##
-##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license
-##  that can be found in the LICENSE file in the root of the source
-##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may
-##  be found in the AUTHORS file in the root of the source tree.
-##
-##
-## This script generates 'VPX.framework'. An iOS app can encode and decode VPx
-## video by including 'VPX.framework'.
-##
-## Run iosbuild.sh to create 'VPX.framework' in the current directory.
-##
-set -e
-devnull='> /dev/null 2>&1'
-
-BUILD_ROOT="_iosbuild"
-DIST_DIR="_dist"
-FRAMEWORK_DIR="VPX.framework"
-HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
-MAKE_JOBS=1
-LIBVPX_SOURCE_DIR=$(dirname "$0" | sed -e s,/build/make,,)
-LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
-ORIG_PWD="$(pwd)"
-TARGETS="armv6-darwin-gcc
-         armv7-darwin-gcc
-         armv7s-darwin-gcc
-         x86-iphonesimulator-gcc
-         x86_64-iphonesimulator-gcc"
-
-# Configures for the target specified by $1, and invokes make with the dist
-# target using $DIST_DIR as the distribution output directory.
-build_target() {
-  local target="$1"
-  local old_pwd="$(pwd)"
-
-  vlog "***Building target: ${target}***"
-
-  mkdir "${target}"
-  cd "${target}"
-  eval "../../${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
-      --disable-docs ${devnull}
-  export DIST_DIR
-  eval make -j ${MAKE_JOBS} dist ${devnull}
-  cd "${old_pwd}"
-
-  vlog "***Done building target: ${target}***"
-}
-
-# Returns the preprocessor symbol for the target specified by $1.
-target_to_preproc_symbol() {
-  target="$1"
-  case "${target}" in
-    armv6-*)
-      echo "__ARM_ARCH_6__"
-      ;;
-    armv7-*)
-      echo "__ARM_ARCH_7__"
-      ;;
-    armv7s-*)
-      echo "__ARM_ARCH_7S__"
-      ;;
-    x86-*)
-      echo "__i386__"
-      ;;
-    x86_64-*)
-      echo "__x86_64__"
-      ;;
-    *)
-      echo "#error ${target} unknown/unsupported"
-      return 1
-      ;;
-  esac
-}
-
-# Create a vpx_config.h shim that, based on preprocessor settings for the
-# current target CPU, includes the real vpx_config.h for the current target.
-# $1 is the list of targets.
-create_vpx_framework_config_shim() {
-  local targets="$1"
-  local config_file="${HEADER_DIR}/vpx_config.h"
-  local preproc_symbol=""
-  local target=""
-  local include_guard="VPX_FRAMEWORK_HEADERS_VPX_VPX_CONFIG_H_"
-
-  local file_header="/*
- *  Copyright (c) $(date +%Y) The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/* GENERATED FILE: DO NOT EDIT! */
-
-#ifndef ${include_guard}
-#define ${include_guard}
-
-#if defined"
-
-  printf "%s" "${file_header}" > "${config_file}"
-  for target in ${targets}; do
-    preproc_symbol=$(target_to_preproc_symbol "${target}")
-    printf " ${preproc_symbol}\n" >> "${config_file}"
-    printf "#include \"VPX/vpx/${target}/vpx_config.h\"\n" >> "${config_file}"
-    printf "#elif defined" >> "${config_file}"
-    mkdir "${HEADER_DIR}/${target}"
-    cp -p "${BUILD_ROOT}/${target}/vpx_config.h" "${HEADER_DIR}/${target}"
-  done
-
-  # Consume the last line of output from the loop: We don't want it.
-  sed -i '' -e '$d' "${config_file}"
-
-  printf "#endif\n\n" >> "${config_file}"
-  printf "#endif  // ${include_guard}" >> "${config_file}"
-}
-
-# Configures and builds each target specified by $1, and then builds
-# VPX.framework.
-build_framework() {
-  local lib_list=""
-  local targets="$1"
-  local target=""
-  local target_dist_dir=""
-
-  # Clean up from previous build(s).
-  rm -rf "${BUILD_ROOT}" "${FRAMEWORK_DIR}"
-
-  # Create output dirs.
-  mkdir -p "${BUILD_ROOT}"
-  mkdir -p "${HEADER_DIR}"
-
-  cd "${BUILD_ROOT}"
-
-  for target in ${targets}; do
-    build_target "${target}"
-    target_dist_dir="${BUILD_ROOT}/${target}/${DIST_DIR}"
-    lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.a"
-  done
-
-  cd "${ORIG_PWD}"
-
-  # The basic libvpx API includes are all the same; just grab the most recent
-  # set.
-  cp -p "${target_dist_dir}"/include/vpx/* "${HEADER_DIR}"
-
-  # Build the fat library.
-  ${LIPO} -create ${lib_list} -output ${FRAMEWORK_DIR}/VPX
-
-  # Create the vpx_config.h shim that allows usage of vpx_config.h from
-  # within VPX.framework.
-  create_vpx_framework_config_shim "${targets}"
-
-  # Copy in vpx_version.h.
-  cp -p "${BUILD_ROOT}/${target}/vpx_version.h" "${HEADER_DIR}"
-
-  vlog "Created fat library ${FRAMEWORK_DIR}/VPX containing:"
-  for lib in ${lib_list}; do
-    vlog "  $(echo ${lib} | awk -F / '{print $2, $NF}')"
-  done
-
-  # TODO(tomfinegan): Verify that expected targets are included within
-  # VPX.framework/VPX via lipo -info.
-}
-
-# Trap function. Cleans up the subtree used to build all targets contained in
-# $TARGETS.
-cleanup() {
-  cd "${ORIG_PWD}"
-
-  if [ "${PRESERVE_BUILD_OUTPUT}" != "yes" ]; then
-    rm -rf "${BUILD_ROOT}"
-  fi
-}
-
-iosbuild_usage() {
-cat << EOF
-  Usage: ${0##*/} [arguments]
-    --help: Display this message and exit.
-    --jobs: Number of make jobs.
-    --preserve-build-output: Do not delete the build directory.
-    --show-build-output: Show output from each library build.
-    --verbose: Output information about the environment and each stage of the
-               build.
-EOF
-}
-
-vlog() {
-  if [ "${VERBOSE}" = "yes" ]; then
-    echo "$@"
-  fi
-}
-
-trap cleanup EXIT
-
-# Parse the command line.
-while [ -n "$1" ]; do
-  case "$1" in
-    --help)
-      iosbuild_usage
-      exit
-      ;;
-    --jobs)
-      MAKE_JOBS="$2"
-      shift
-      ;;
-    --preserve-build-output)
-      PRESERVE_BUILD_OUTPUT=yes
-      ;;
-    --show-build-output)
-      devnull=
-      ;;
-    --verbose)
-      VERBOSE=yes
-      ;;
-    *)
-      iosbuild_usage
-      exit 1
-      ;;
-  esac
-  shift
-done
-
-if [ "${VERBOSE}" = "yes" ]; then
-cat << EOF
-  BUILD_ROOT=${BUILD_ROOT}
-  DIST_DIR=${DIST_DIR}
-  FRAMEWORK_DIR=${FRAMEWORK_DIR}
-  HEADER_DIR=${HEADER_DIR}
-  MAKE_JOBS=${MAKE_JOBS}
-  PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
-  LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR}
-  LIPO=${LIPO}
-  ORIG_PWD=${ORIG_PWD}
-  TARGETS="${TARGETS}"
-EOF
-fi
-
-build_framework "${TARGETS}"
--- a/10
+++ b/10
@@ -103,7 +103,6 @@ all_platforms="${all_platforms} armv7-linux-gcc"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-none-rvct"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-win32-vs11"
 all_platforms="${all_platforms} armv7-win32-vs12"
-all_platforms="${all_platforms} armv7s-darwin-gcc"
 all_platforms="${all_platforms} mips32-linux-gcc"
 all_platforms="${all_platforms} ppc32-darwin8-gcc"
 all_platforms="${all_platforms} ppc32-darwin9-gcc"
@@ -272,14 +271,7 @@ EXPERIMENT_LIST="
    alpha
    multiple_arf
    spatial_svc
-    denoising
-    masked_interinter
-    interintra
-    masked_interintra
-    filterintra
-    ext_tx
-    supertx
-    copy_coding
+    transcode
 "
 CONFIG_LIST="
    external_build
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -296,7 +296,6 @@ int main(int argc, const char **argv) {
  int frame_duration = 1; /* 1 timebase tick per frame */
  FILE *infile = NULL;
  int end_of_stream = 0;
-  int frame_size;

  memset(&svc_ctx, 0, sizeof(svc_ctx));
  svc_ctx.log_print = 1;
@@ -352,10 +351,11 @@ int main(int argc, const char **argv) {
      die_codec(&codec, "Failed to encode frame");
    }
    if (!(app_input.passes == 2 && app_input.pass == 1)) {
-      while ((frame_size = vpx_svc_get_frame_size(&svc_ctx)) > 0) {
+      if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
        vpx_video_writer_write_frame(writer,
                                     vpx_svc_get_buffer(&svc_ctx),
-                                     frame_size, pts);
+                                     vpx_svc_get_frame_size(&svc_ctx),
+                                     pts);
      }
    }
    if (vpx_svc_get_rc_stats_buffer_size(&svc_ctx) > 0) {
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -645,26 +645,6 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
 #endif

 #if HAVE_AVX2
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
-                             uint8_t *dst, ptrdiff_t dst_stride,
-                             const int16_t *filter_x, int x_step_q4,
-                             const int16_t *filter_y, int y_step_q4,
-                             int w, int h);
-void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
-                              uint8_t *dst, ptrdiff_t dst_stride,
-                              const int16_t *filter_x, int x_step_q4,
-                              const int16_t *filter_y, int y_step_q4,
-                              int w, int h);
-void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
-                        uint8_t *dst, ptrdiff_t dst_stride,
-                        const int16_t *filter_x, int x_step_q4,
-                        const int16_t *filter_y, int y_step_q4,
-                        int w, int h);
-}
-
 const ConvolveFunctions convolve8_avx2(
    vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
    vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
@@ -675,10 +655,8 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
    make_tuple(8, 4, &convolve8_avx2),
    make_tuple(4, 8, &convolve8_avx2),
    make_tuple(8, 8, &convolve8_avx2),
-    make_tuple(8, 16, &convolve8_avx2)));
-
-INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, ConvolveTest, ::testing::Values(
    make_tuple(16, 8, &convolve8_avx2),
+    make_tuple(8, 16, &convolve8_avx2),
    make_tuple(16, 16, &convolve8_avx2),
    make_tuple(32, 16, &convolve8_avx2),
    make_tuple(16, 32, &convolve8_avx2),
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -606,29 +606,4 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0)));
 #endif
-
-#if HAVE_AVX2
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride);
-void vp9_fht16x16_avx2(const int16_t *input, int16_t *output, int stride,
-                       int tx_type);
-}
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, Trans16x16DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_avx2,
-                   &vp9_idct16x16_256_add_c, 0)));
-INSTANTIATE_TEST_CASE_P(
-    AVX2, Trans16x16HT,
-    ::testing::Values(
-        make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 3)));
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, Trans16x16HT,
-    ::testing::Values(
-        make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 0),
-        make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 1),
-        make_tuple(&vp9_fht16x16_avx2, &vp9_iht16x16_256_add_c, 2)));
-#endif
 }  // namespace
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -15,27 +15,13 @@

 namespace libvpx_test {

-const char kVP8Name[] = "WebM Project VP8";
-
-vpx_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size,
-                                    vpx_codec_stream_info_t *stream_info) {
-  return vpx_codec_peek_stream_info(CodecInterface(),
-                                    cxdata, static_cast<unsigned int>(size),
-                                    stream_info);
-}
-
 vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) {
-  return DecodeFrame(cxdata, size, NULL);
-}
-
-vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size,
-                                     void *user_priv) {
  vpx_codec_err_t res_dec;
  InitOnce();
  REGISTER_STATE_CHECK(
      res_dec = vpx_codec_decode(&decoder_,
                                 cxdata, static_cast<unsigned int>(size),
-                                 user_priv, 0));
+                                 NULL, 0));
  return res_dec;
 }

@@ -43,37 +29,13 @@ void DecoderTest::RunLoop(CompressedVideoSource *video) {
  vpx_codec_dec_cfg_t dec_cfg = {0};
  Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
  ASSERT_TRUE(decoder != NULL);
-  const char *codec_name = decoder->GetDecoderName();
-  const bool is_vp8 = strncmp(kVP8Name, codec_name, sizeof(kVP8Name) - 1) == 0;

  // Decode frames.
-  for (video->Begin(); !::testing::Test::HasFailure() && video->cxdata();
-       video->Next()) {
+  for (video->Begin(); video->cxdata(); video->Next()) {
    PreDecodeFrameHook(*video, decoder);
-
-    vpx_codec_stream_info_t stream_info;
-    stream_info.sz = sizeof(stream_info);
-    const vpx_codec_err_t res_peek = decoder->PeekStream(video->cxdata(),
-                                                         video->frame_size(),
-                                                         &stream_info);
-    if (is_vp8) {
-      /* Vp8's implementation of PeekStream returns an error if the frame you
-       * pass it is not a keyframe, so we only expect VPX_CODEC_OK on the first
-       * frame, which must be a keyframe. */
-      if (video->frame_number() == 0)
-        ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: "
-            << vpx_codec_err_to_string(res_peek);
-    } else {
-      /* The Vp9 implementation of PeekStream returns an error only if the
-       * data passed to it isn't a valid Vp9 chunk. */
-      ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: "
-          << vpx_codec_err_to_string(res_peek);
-    }
-
    vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(),
                                                   video->frame_size());
-    if (!HandleDecodeResult(res_dec, *video, decoder))
-      break;
+    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();

    DxDataIterator dec_iter = decoder->GetDxData();
    const vpx_image_t *img = NULL;
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -49,14 +49,8 @@ class Decoder {
    vpx_codec_destroy(&decoder_);
  }

-  vpx_codec_err_t PeekStream(const uint8_t *cxdata, size_t size,
-                             vpx_codec_stream_info_t *stream_info);
-
  vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size);

-  vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size,
-                              void *user_priv);
-
  DxDataIterator GetDxData() {
    return DxDataIterator(&decoder_);
  }
@@ -91,10 +85,6 @@ class Decoder {
        &decoder_, cb_get, cb_release, user_priv);
  }

-  const char* GetDecoderName() {
-    return vpx_codec_iface_name(CodecInterface());
-  }
-
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const = 0;

@@ -124,14 +114,6 @@ class DecoderTest {
  virtual void PreDecodeFrameHook(const CompressedVideoSource& video,
                                  Decoder *decoder) {}

-  // Hook to be called to handle decode result. Return true to continue.
-  virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
-                                  const CompressedVideoSource& /* video */,
-                                  Decoder *decoder) {
-    EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
-    return VPX_CODEC_OK == res_dec;
-  }
-
  // Hook to be called on every decompressed frame.
  virtual void DecompressedFrameHook(const vpx_image_t& img,
                                     const unsigned int frame_number) {}
--- a/test/decode_to_md5.sh
+++ b/test/decode_to_md5.sh
@@ -34,10 +34,7 @@ decode_to_md5() {
  local expected_md5="$3"
  local output_file="${VPX_TEST_OUTPUT_DIR}/decode_to_md5_${codec}"

-  if [ ! -x "${decoder}" ]; then
-    elog "${decoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${decoder}" ] || return 1

  eval "${decoder}" "${input_file}" "${output_file}" ${devnull}

--- a/test/decode_with_drops.sh
+++ b/test/decode_with_drops.sh
@@ -34,10 +34,7 @@ decode_with_drops() {
  local output_file="${VPX_TEST_OUTPUT_DIR}/decode_with_drops_${codec}"
  local drop_mode="$3"

-  if [ ! -x "${decoder}" ]; then
-    elog "${decoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${decoder}" ] || return 1

  eval "${decoder}" "${input_file}" "${output_file}" "${drop_mode}" ${devnull}

--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -376,19 +376,4 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3)));
 #endif

-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
-    AVX2, Trans4x4DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_avx2,
-                   &vp9_idct4x4_16_add_c, 0)));
-INSTANTIATE_TEST_CASE_P(
-    AVX2, Trans4x4HT,
-    ::testing::Values(
-        make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 0),
-        make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 1),
-        make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 2),
-        make_tuple(&vp9_fht4x4_avx2, &vp9_iht4x4_16_add_c, 3)));
-#endif
-
 }  // namespace
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -367,18 +367,4 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0)));
 #endif
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
-    AVX2, FwdTrans8x8DCT,
-    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_avx2, &vp9_idct8x8_64_add_c, 0)));
-INSTANTIATE_TEST_CASE_P(
-    AVX2, FwdTrans8x8HT,
-    ::testing::Values(
-        make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 0),
-        make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 1),
-        make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 2),
-        make_tuple(&vp9_fht8x8_avx2, &vp9_iht8x8_64_add_c, 3)));
-#endif
 }  // namespace
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -1,109 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include <vector>
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/ivf_video_source.h"
-#include "test/util.h"
-#if CONFIG_WEBM_IO
-#include "test/webm_video_source.h"
-#endif
-#include "vpx_mem/vpx_mem.h"
-
-namespace {
-
-class InvalidFileTest
-    : public ::libvpx_test::DecoderTest,
-      public ::libvpx_test::CodecTestWithParam<const char*> {
- protected:
-  InvalidFileTest() : DecoderTest(GET_PARAM(0)), res_file_(NULL) {}
-
-  virtual ~InvalidFileTest() {
-    if (res_file_ != NULL)
-      fclose(res_file_);
-  }
-
-  void OpenResFile(const std::string &res_file_name_) {
-    res_file_ = libvpx_test::OpenTestDataFile(res_file_name_);
-    ASSERT_TRUE(res_file_ != NULL) << "Result file open failed. Filename: "
-        << res_file_name_;
-  }
-
-  virtual bool HandleDecodeResult(
-      const vpx_codec_err_t res_dec,
-      const libvpx_test::CompressedVideoSource &video,
-      libvpx_test::Decoder *decoder) {
-    EXPECT_TRUE(res_file_ != NULL);
-    int expected_res_dec;
-
-    // Read integer result.
-    const int res = fscanf(res_file_, "%d", &expected_res_dec);
-    EXPECT_NE(res, EOF) << "Read result data failed";
-
-    // Check results match.
-    EXPECT_EQ(expected_res_dec, res_dec)
-        << "Results don't match: frame number = " << video.frame_number();
-
-    return !HasFailure();
-  }
-
- private:
-  FILE *res_file_;
-};
-
-TEST_P(InvalidFileTest, ReturnCode) {
-  const std::string filename = GET_PARAM(1);
-  libvpx_test::CompressedVideoSource *video = NULL;
-
-  // Open compressed video file.
-  if (filename.substr(filename.length() - 3, 3) == "ivf") {
-    video = new libvpx_test::IVFVideoSource(filename);
-  } else if (filename.substr(filename.length() - 4, 4) == "webm") {
-#if CONFIG_WEBM_IO
-    video = new libvpx_test::WebMVideoSource(filename);
-#else
-    fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n",
-            filename.c_str());
-    return;
-#endif
-  }
-  video->Init();
-
-  // Construct result file name. The file holds a list of expected integer
-  // results, one for each decoded frame.  Any result that doesn't match
-  // the files list will cause a test failure.
-  const std::string res_filename = filename + ".res";
-  OpenResFile(res_filename);
-
-  // Decode frame, and check the md5 matching.
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
-  delete video;
-}
-
-const char *const kVP9InvalidFileTests[] = {
-  "invalid-vp90-01.webm",
-  "invalid-vp90-02.webm",
-  "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf",
-};
-
-#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
-
-VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
-                          ::testing::ValuesIn(kVP9InvalidFileTests,
-                                              kVP9InvalidFileTests +
-                                              NELEMENTS(kVP9InvalidFileTests)));
-
-}  // namespace
--- a/test/postproc.sh
+++ b/test/postproc.sh
@@ -32,10 +32,7 @@ postproc() {
  local codec="$2"
  local output_file="${VPX_TEST_OUTPUT_DIR}/postproc_${codec}.raw"

-  if [ ! -x "${decoder}" ]; then
-    elog "${decoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${decoder}" ] || return 1

  eval "${decoder}" "${input_file}" "${output_file}" ${devnull}

--- a/test/resize_util.sh
+++ b/test/resize_util.sh
@@ -33,10 +33,7 @@ resize_util() {

  # resize_util is available only when CONFIG_SHARED is disabled.
  if [ -z "$(vpx_config_option_enabled CONFIG_SHARED)" ]; then
-    if [ ! -x "${resizer}" ]; then
-      elog "${resizer} does not exist or is not executable."
-      return 1
-    fi
+    [ -x "${resizer}" ] || return 1

    eval "${resizer}" "${YUV_RAW_INPUT}" \
        "${YUV_RAW_INPUT_WIDTH}x${YUV_RAW_INPUT_HEIGHT}" \
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -627,24 +627,4 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
 #endif  // CONFIG_USE_X86INC
 #endif  // HAVE_SSSE3

-#if HAVE_AVX2
-#if CONFIG_VP9_ENCODER
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-void vp9_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
-                          const uint8_t *const ref_ptr[], int ref_stride,
-                          unsigned int *sad_array);
-void vp9_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride,
-                          const uint8_t *const ref_ptr[], int ref_stride,
-                          unsigned int *sad_array);
-}
-const sad_n_by_n_by_4_fn_t sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
-const sad_n_by_n_by_4_fn_t sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
-INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, SADx4Test, ::testing::Values(
-                        make_tuple(32, 32, sad_32x32x4d_avx2),
-                        make_tuple(64, 64, sad_64x64x4d_avx2)));
-#endif  // CONFIG_VP9_ENCODER
-#endif  // HAVE_AVX2
-
 }  // namespace
--- a/test/simple_decoder.sh
+++ b/test/simple_decoder.sh
@@ -32,10 +32,7 @@ simple_decoder() {
  local codec="$2"
  local output_file="${VPX_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw"

-  if [ ! -x "${decoder}" ]; then
-    elog "${decoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${decoder}" ] || return 1

  eval "${decoder}" "${input_file}" "${output_file}" ${devnull}

--- a/test/simple_encoder.sh
+++ b/test/simple_encoder.sh
@@ -29,10 +29,7 @@ simple_encoder() {
  local codec="$1"
  local output_file="${VPX_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf"

-  if [ ! -x "${encoder}" ]; then
-    elog "${encoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${encoder}" ] || return 1

  eval "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 \
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -31,6 +31,7 @@ class SvcTest : public ::testing::Test {
  SvcTest()
      : codec_iface_(0),
        test_file_name_("hantro_collage_w352h288.yuv"),
+        stats_file_name_("hantro_collage_w352h288.stat"),
        codec_initialized_(false),
        decoder_(0) {
    memset(&svc_, 0, sizeof(svc_));
@@ -73,6 +74,7 @@ class SvcTest : public ::testing::Test {
  struct vpx_codec_enc_cfg codec_enc_;
  vpx_codec_iface_t *codec_iface_;
  std::string test_file_name_;
+  std::string stats_file_name_;
  bool codec_initialized_;
  Decoder *decoder_;
 };
@@ -265,17 +267,9 @@ TEST_F(SvcTest, FirstFrameHasLayers) {
                       video.duration(), VPX_DL_GOOD_QUALITY);
  EXPECT_EQ(VPX_CODEC_OK, res);

-  if (vpx_svc_get_frame_size(&svc_) == 0) {
-    // Flush encoder
-    res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
-                         video.duration(), VPX_DL_GOOD_QUALITY);
-    EXPECT_EQ(VPX_CODEC_OK, res);
-  }
-
-  int frame_size = vpx_svc_get_frame_size(&svc_);
-  EXPECT_GT(frame_size, 0);
  const vpx_codec_err_t res_dec = decoder_->DecodeFrame(
-      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));

  // this test fails with a decoder error
  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
@@ -285,9 +279,6 @@ TEST_F(SvcTest, EncodeThreeFrames) {
  svc_.spatial_layers = 2;
  vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
  vpx_svc_set_quantizers(&svc_, "40,30", 0);
-  int decoded_frames = 0;
-  vpx_codec_err_t res_dec;
-  int frame_size;

  vpx_codec_err_t res =
      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
@@ -302,14 +293,13 @@ TEST_F(SvcTest, EncodeThreeFrames) {
  // This frame is a keyframe.
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));

-  if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
+  vpx_codec_err_t res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();

  // FRAME 1
  video.Next();
@@ -317,14 +307,12 @@ TEST_F(SvcTest, EncodeThreeFrames) {
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));

-  if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
+  res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();

  // FRAME 2
  video.Next();
@@ -332,29 +320,12 @@ TEST_F(SvcTest, EncodeThreeFrames) {
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));

-  if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
-
-  // Flush encoder
-  res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
-                       video.duration(), VPX_DL_GOOD_QUALITY);
-  EXPECT_EQ(VPX_CODEC_OK, res);
-
-  while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
-
-  EXPECT_EQ(decoded_frames, 3);
+  res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
 }

 TEST_F(SvcTest, GetLayerResolution) {
@@ -393,9 +364,7 @@ TEST_F(SvcTest, GetLayerResolution) {
  EXPECT_EQ(kHeight * 8 / 16, layer_height);
 }

-TEST_F(SvcTest, TwoPassEncode) {
-  // First pass encode
-  std::string stats_buf;
+TEST_F(SvcTest, FirstPassEncode) {
  svc_.spatial_layers = 2;
  codec_enc_.g_pass = VPX_RC_FIRST_PASS;
  vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
@@ -414,61 +383,62 @@ TEST_F(SvcTest, TwoPassEncode) {
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
  ASSERT_EQ(VPX_CODEC_OK, res);
-  size_t stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
-  EXPECT_GT(stats_size, 0U);
-  const char *stats_data = vpx_svc_get_rc_stats_buffer(&svc_);
-  ASSERT_TRUE(stats_data != NULL);
-  stats_buf.append(stats_data, stats_size);
+  EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U);

  // FRAME 1
  video.Next();
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
-  stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
-  EXPECT_GT(stats_size, 0U);
-  stats_data = vpx_svc_get_rc_stats_buffer(&svc_);
-  ASSERT_TRUE(stats_data != NULL);
-  stats_buf.append(stats_data, stats_size);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U);

  // Flush encoder and test EOS packet
  res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
-  stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
-  EXPECT_GT(stats_size, 0U);
-  stats_data = vpx_svc_get_rc_stats_buffer(&svc_);
-  ASSERT_TRUE(stats_data != NULL);
-  stats_buf.append(stats_data, stats_size);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U);
+}

-  // Tear down encoder
-  vpx_svc_release(&svc_);
-  vpx_codec_destroy(&codec_);
-
-  // Second pass encode
-  int decoded_frames = 0;
-  vpx_codec_err_t res_dec;
-  int frame_size;
+TEST_F(SvcTest, SecondPassEncode) {
+  svc_.spatial_layers = 2;
  codec_enc_.g_pass = VPX_RC_LAST_PASS;
-  codec_enc_.rc_twopass_stats_in.buf = &stats_buf[0];
-  codec_enc_.rc_twopass_stats_in.sz = stats_buf.size();

-  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  FILE *const stats_file = libvpx_test::OpenTestDataFile(stats_file_name_);
+  ASSERT_TRUE(stats_file != NULL) << "Stats file open failed. Filename: "
+      << stats_file;
+
+  struct vpx_fixed_buf stats_buf;
+  fseek(stats_file, 0, SEEK_END);
+  stats_buf.sz = static_cast<size_t>(ftell(stats_file));
+  fseek(stats_file, 0, SEEK_SET);
+
+  stats_buf.buf = malloc(stats_buf.sz);
+  ASSERT_TRUE(stats_buf.buf != NULL);
+  const size_t bytes_read = fread(stats_buf.buf, 1, stats_buf.sz, stats_file);
+  ASSERT_EQ(bytes_read, stats_buf.sz);
+  fclose(stats_file);
+  codec_enc_.rc_twopass_stats_in = stats_buf;
+
+  vpx_codec_err_t res =
+      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
  ASSERT_EQ(VPX_CODEC_OK, res);
  codec_initialized_ = true;

+  libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+                                     codec_enc_.g_timebase.den,
+                                     codec_enc_.g_timebase.num, 0, 30);
  // FRAME 0
  video.Begin();
  // This frame is a keyframe.
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));

-  if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
+  vpx_codec_err_t res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();

  // FRAME 1
  video.Next();
@@ -476,14 +446,12 @@ TEST_F(SvcTest, TwoPassEncode) {
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));

-  if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
+  res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();

  // FRAME 2
  video.Next();
@@ -491,29 +459,14 @@ TEST_F(SvcTest, TwoPassEncode) {
  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                       video.duration(), VPX_DL_GOOD_QUALITY);
  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));

-  if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
+  res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();

-  // Flush encoder
-  res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
-                       video.duration(), VPX_DL_GOOD_QUALITY);
-  EXPECT_EQ(VPX_CODEC_OK, res);
-
-  while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
-    EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
-    res_dec = decoder_->DecodeFrame(
-        static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
-    ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
-    ++decoded_frames;
-  }
-
-  EXPECT_EQ(decoded_frames, 3);
+  free(stats_buf.buf);
 }

 }  // namespace
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -1,9 +1,6 @@
 d5dfb0151c9051f8c85999255645d7a23916d3c0  hantro_collage_w352h288.yuv
+998cec53307c94aa5835aaf8d5731f6a3c7c2e5a  hantro_collage_w352h288.stat
 b87815bf86020c592ccc7a846ba2e28ec8043902  hantro_odd.yuv
-fe346136b9b8c1e6f6084cc106485706915795e4  invalid-vp90-01.webm
-25751f5d3b05ff03f0719ad42cd625348eb8961e  invalid-vp90-01.webm.res
-d78e2fceba5ac942246503ec8366f879c4775ca5  invalid-vp90-02.webm
-2dadee5306245fa5eeb0f99652d0e17afbcba96d  invalid-vp90-02.webm.res
 b1f1c3ec79114b9a0651af24ce634afb44a9a419  rush_hour_444.y4m
 5184c46ddca8b1fadd16742e8500115bc8f749da  vp80-00-comprehensive-001.ivf
 65bf1bbbced81b97bd030f376d1b7f61a224793f  vp80-00-comprehensive-002.ivf
@@ -580,8 +577,6 @@ d48c5db1b0f8e60521a7c749696b8067886033a3  vp90-2-09-aq2.webm
 54638c38009198c38c8f3b25c182b709b6c1fd2e  vp90-2-09-lf_deltas.webm.md5
 510d95f3beb3b51c572611fdaeeece12277dac30  vp90-2-10-show-existing-frame.webm
 14d631096f4bfa2d71f7f739aec1448fb3c33bad  vp90-2-10-show-existing-frame.webm.md5
-d2feea7728e8d2c615981d0f47427a4a5a45d881  vp90-2-10-show-existing-frame2.webm
-5f7c7811baa3e4f03be1dd78c33971b727846821  vp90-2-10-show-existing-frame2.webm.md5
 b4318e75f73a6a08992c7326de2fb589c2a794c7  vp90-2-11-size-351x287.webm
 b3c48382cf7d0454e83a02497c229d27720f9e20  vp90-2-11-size-351x287.webm.md5
 8e0096475ea2535bac71d3e2fc09e0c451c444df  vp90-2-11-size-351x288.webm
@@ -644,5 +639,4 @@ e615575ded499ea1d992f3b38e3baa434509cdcd  vp90-2-15-segkey.webm
 e3ab35d4316c5e81325c50f5236ceca4bc0d35df  vp90-2-15-segkey.webm.md5
 9b7ca2cac09d34c4a5d296c1900f93b1e2f69d0d  vp90-2-15-segkey_adpq.webm
 8f46ba5f785d0c2170591a153e0d0d146a7c8090  vp90-2-15-segkey_adpq.webm.md5
-76024eb753cdac6a5e5703aaea189d35c3c30ac7  invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf
-d3964f9dad9f60363c81b688324d95b4ec7c8038  invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf.res
+
--- a/test/test.mk
+++ b/test/test.mk
@@ -30,7 +30,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
@@ -55,7 +54,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.h
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += webm_video_source.h
 endif

-LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += invalid_file_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += test_vector_test.cc

 # Currently we only support decoder perf tests for vp9. Also they read from WebM
@@ -133,6 +131,7 @@ endif # CONFIG_SHARED
 ## TEST DATA
 ##
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.stat
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m

@@ -692,8 +691,6 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-subpixel-00.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-subpixel-00.ivf.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame2.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame2.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x287.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x287.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x288.webm
@@ -759,14 +756,6 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm.md5

-# Invalid files for testing libvpx error checking.
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01.webm.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02.webm.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf.res
-
 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
 # BBB VP9 streams
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += \
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -161,7 +161,6 @@ const char *const kVP9TestVectors[] = {
  "vp90-2-08-tile-4x1.webm", "vp90-2-09-subpixel-00.ivf",
  "vp90-2-02-size-lf-1920x1080.webm", "vp90-2-09-aq2.webm",
  "vp90-2-09-lf_deltas.webm", "vp90-2-10-show-existing-frame.webm",
-  "vp90-2-10-show-existing-frame2.webm",
  "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm",
  "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf",
  "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf",
@@ -179,7 +178,7 @@ const char *const kVP9TestVectors[] = {
  "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm",
  "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm",
  "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm",
-  "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm",
+  "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm"
 };
 const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
 #endif  // CONFIG_VP9_DECODER
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -17,10 +17,6 @@ VPX_TEST_TOOLS_COMMON_SH=included
 set -e
 devnull='> /dev/null 2>&1'

-elog() {
-  echo "$@" 1>&2
-}
-
 vlog() {
  if [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ]; then
    echo "$@"
@@ -460,19 +456,10 @@ vlog "$(basename "${0%.*}") test configuration:
  LIBVPX_BIN_PATH=${LIBVPX_BIN_PATH}
  LIBVPX_CONFIG_PATH=${LIBVPX_CONFIG_PATH}
  LIBVPX_TEST_DATA_PATH=${LIBVPX_TEST_DATA_PATH}
-  VP8_IVF_FILE=${VP8_IVF_FILE}
-  VP9_IVF_FILE=${VP9_IVF_FILE}
-  VP9_WEBM_FILE=${VP9_WEBM_FILE}
-  VPX_TEST_EXE_SUFFIX=${VPX_TEST_EXE_SUFFIX}
-  VPX_TEST_FILTER=${VPX_TEST_FILTER}
  VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}
-  VPX_TEST_RAND=${VPX_TEST_RAND}
-  VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
-  VPX_TEST_SHOW_PROGRAM_OUTPUT=${VPX_TEST_SHOW_PROGRAM_OUTPUT}
-  VPX_TEST_TEMP_ROOT=${VPX_TEST_TEMP_ROOT}
  VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
-  YUV_RAW_INPUT=${YUV_RAW_INPUT}
-  YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH}
-  YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}"
+  VPX_TEST_FILTER=${VPX_TEST_FILTER}
+  VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
+  VPX_TEST_SHOW_PROGRAM_OUTPUT=${VPX_TEST_SHOW_PROGRAM_OUTPUT}"

 fi  # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard.
--- a/test/twopass_encoder.sh
+++ b/test/twopass_encoder.sh
@@ -29,10 +29,7 @@ twopass_encoder() {
  local codec="$1"
  local output_file="${VPX_TEST_OUTPUT_DIR}/twopass_encoder_${codec}.ivf"

-  if [ ! -x "${encoder}" ]; then
-    elog "${encoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${encoder}" ] || return 1

  eval "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
--- a/test/user_priv_test.cc
+++ b/test/user_priv_test.cc
@@ -1,100 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
-#include "test/acm_random.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/ivf_video_source.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#if CONFIG_WEBM_IO
-#include "test/webm_video_source.h"
-#endif
-#include "vpx_mem/vpx_mem.h"
-#include "vpx/vp8.h"
-
-namespace {
-
-using std::string;
-using libvpx_test::ACMRandom;
-
-#if CONFIG_WEBM_IO
-
-void CheckUserPrivateData(void *user_priv, int *target) {
-  // actual pointer value should be the same as expected.
-  EXPECT_EQ(reinterpret_cast<void *>(target), user_priv) <<
-      "user_priv pointer value does not match.";
-}
-
-// Decodes |filename|. Passes in user_priv data when calling DecodeFrame and
-// compares the user_priv from return img with the original user_priv to see if
-// they match. Both the pointer values and the values inside the addresses
-// should match.
-string DecodeFile(const string &filename) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  libvpx_test::WebMVideoSource video(filename);
-  video.Init();
-
-  vpx_codec_dec_cfg_t cfg = {0};
-  libvpx_test::VP9Decoder decoder(cfg, 0);
-
-  libvpx_test::MD5 md5;
-  int frame_num = 0;
-  for (video.Begin(); !::testing::Test::HasFailure() && video.cxdata();
-       video.Next()) {
-    void *user_priv = reinterpret_cast<void *>(&frame_num);
-    const vpx_codec_err_t res =
-        decoder.DecodeFrame(video.cxdata(), video.frame_size(),
-                            (frame_num == 0) ? NULL : user_priv);
-    if (res != VPX_CODEC_OK) {
-      EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
-      break;
-    }
-    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
-    const vpx_image_t *img = NULL;
-
-    // Get decompressed data.
-    while ((img = dec_iter.Next())) {
-      if (frame_num == 0) {
-        CheckUserPrivateData(img->user_priv, NULL);
-      } else {
-        CheckUserPrivateData(img->user_priv, &frame_num);
-
-        // Also test ctrl_get_reference api.
-        struct vp9_ref_frame ref;
-        // Randomly fetch a reference frame.
-        ref.idx = rnd.Rand8() % 3;
-        decoder.Control(VP9_GET_REFERENCE, &ref);
-
-        CheckUserPrivateData(ref.img.user_priv, &frame_num);
-      }
-      md5.Add(img);
-    }
-
-    frame_num++;
-  }
-  return string(md5.Get());
-}
-
-TEST(UserPrivTest, VideoDecode) {
-  // no tiles or frame parallel; this exercises the decoding to test the
-  // user_priv.
-  EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc",
-               DecodeFile("vp90-2-03-size-226x226.webm").c_str());
-}
-
-#endif  // CONFIG_WEBM_IO
-
-}  // namespace
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -702,57 +702,6 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(6, 6, subpel_avg_variance64x64_ssse3)));
 #endif
 #endif
-
-#if HAVE_AVX2
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-unsigned int vp9_sub_pixel_variance32x32_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_sub_pixel_variance64x64_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_sub_pixel_avg_variance32x32_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
-    const uint8_t *second_pred);
-unsigned int vp9_sub_pixel_avg_variance64x64_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
-    const uint8_t *second_pred);
-}
-const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2;
-const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2;
-const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2;
-const vp9_variance_fn_t variance64x32_avx2 = vp9_variance64x32_avx2;
-const vp9_variance_fn_t variance64x64_avx2 = vp9_variance64x64_avx2;
-INSTANTIATE_TEST_CASE_P(
-    AVX2, VP9VarianceTest,
-    ::testing::Values(make_tuple(4, 4, variance16x16_avx2),
-                      make_tuple(5, 4, variance32x16_avx2),
-                      make_tuple(5, 5, variance32x32_avx2),
-                      make_tuple(6, 5, variance64x32_avx2),
-                      make_tuple(6, 6, variance64x64_avx2)));
-
-const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
-    vp9_sub_pixel_variance32x32_avx2;
-const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
-    vp9_sub_pixel_variance64x64_avx2;
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, VP9SubpelVarianceTest,
-    ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2),
-                      make_tuple(6, 6, subpel_variance64x64_avx2)));
-
-const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_avx2 =
-    vp9_sub_pixel_avg_variance32x32_avx2;
-const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 =
-    vp9_sub_pixel_avg_variance64x64_avx2;
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, VP9SubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
-                      make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
-#endif  // HAVE_AVX2
 #endif  // CONFIG_VP9_ENCODER

 }  // namespace vp9
--- a/test/vp8cx_set_ref.sh
+++ b/test/vp8cx_set_ref.sh
@@ -34,10 +34,7 @@ vpx_set_ref() {
  local output_file="${VPX_TEST_OUTPUT_DIR}/vp8cx_set_ref_${codec}.ivf"
  local ref_frame_num=90

-  if [ ! -x "${encoder}" ]; then
-    elog "${encoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${encoder}" ] || return 1

  eval "${encoder}" "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
      "${YUV_RAW_INPUT}" "${output_file}" "${ref_frame_num}" \
--- a/test/vp9_spatial_svc_encoder.sh
+++ b/test/vp9_spatial_svc_encoder.sh
@@ -34,10 +34,7 @@ vp9_spatial_svc_encoder() {

  shift

-  if [ ! -x "${encoder}" ]; then
-    elog "${encoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${encoder}" ] || return 1

  eval "${encoder}" -w "${YUV_RAW_INPUT_WIDTH}" -h "${YUV_RAW_INPUT_HEIGHT}" \
      -k "${max_kf}" -f "${frames_to_encode}" "$@" "${YUV_RAW_INPUT}" \
--- a/test/vpx_temporal_svc_encoder.sh
+++ b/test/vpx_temporal_svc_encoder.sh
@@ -39,10 +39,7 @@ vpx_tsvc_encoder() {

  shift 2

-  if [ ! -x "${encoder}" ]; then
-    elog "${encoder} does not exist or is not executable."
-    return 1
-  fi
+  [ -x "${encoder}" ] || return 1

  eval "${encoder}" "${YUV_RAW_INPUT}" "${output_file}" "${codec}" \
      "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
--- a/third_party/libmkv/EbmlIDs.h
+++ b/third_party/libmkv/EbmlIDs.h
@@ -1,231 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#ifndef MKV_DEFS_HPP
-#define MKV_DEFS_HPP 1
-
-/* Commenting out values not available in webm, but available in matroska */
-
-enum mkv {
-  EBML = 0x1A45DFA3,
-  EBMLVersion = 0x4286,
-  EBMLReadVersion = 0x42F7,
-  EBMLMaxIDLength = 0x42F2,
-  EBMLMaxSizeLength = 0x42F3,
-  DocType = 0x4282,
-  DocTypeVersion = 0x4287,
-  DocTypeReadVersion = 0x4285,
-/* CRC_32 = 0xBF, */
-  Void = 0xEC,
-  SignatureSlot = 0x1B538667,
-  SignatureAlgo = 0x7E8A,
-  SignatureHash = 0x7E9A,
-  SignaturePublicKey = 0x7EA5,
-  Signature = 0x7EB5,
-  SignatureElements = 0x7E5B,
-  SignatureElementList = 0x7E7B,
-  SignedElement = 0x6532,
-  /* segment */
-  Segment = 0x18538067,
-  /* Meta Seek Information */
-  SeekHead = 0x114D9B74,
-  Seek = 0x4DBB,
-  SeekID = 0x53AB,
-  SeekPosition = 0x53AC,
-  /* Segment Information */
-  Info = 0x1549A966,
-/* SegmentUID = 0x73A4, */
-/* SegmentFilename = 0x7384, */
-/* PrevUID = 0x3CB923, */
-/* PrevFilename = 0x3C83AB, */
-/* NextUID = 0x3EB923, */
-/* NextFilename = 0x3E83BB, */
-/* SegmentFamily = 0x4444, */
-/* ChapterTranslate = 0x6924, */
-/* ChapterTranslateEditionUID = 0x69FC, */
-/* ChapterTranslateCodec = 0x69BF, */
-/* ChapterTranslateID = 0x69A5, */
-  TimecodeScale = 0x2AD7B1,
-  Segment_Duration = 0x4489,
-  DateUTC = 0x4461,
-/* Title = 0x7BA9, */
-  MuxingApp = 0x4D80,
-  WritingApp = 0x5741,
-  /* Cluster */
-  Cluster = 0x1F43B675,
-  Timecode = 0xE7,
-/* SilentTracks = 0x5854, */
-/* SilentTrackNumber = 0x58D7, */
-/* Position = 0xA7, */
-  PrevSize = 0xAB,
-  BlockGroup = 0xA0,
-  Block = 0xA1,
-/* BlockVirtual = 0xA2, */
-  BlockAdditions = 0x75A1,
-  BlockMore = 0xA6,
-  BlockAddID = 0xEE,
-  BlockAdditional = 0xA5,
-  BlockDuration = 0x9B,
-/* ReferencePriority = 0xFA, */
-  ReferenceBlock = 0xFB,
-/* ReferenceVirtual = 0xFD, */
-/* CodecState = 0xA4, */
-/* Slices = 0x8E, */
-/* TimeSlice = 0xE8, */
-  LaceNumber = 0xCC,
-/* FrameNumber = 0xCD, */
-/* BlockAdditionID = 0xCB, */
-/* MkvDelay = 0xCE, */
-/* Cluster_Duration = 0xCF, */
-  SimpleBlock = 0xA3,
-/* EncryptedBlock = 0xAF, */
-  /* Track */
-  Tracks = 0x1654AE6B,
-  TrackEntry = 0xAE,
-  TrackNumber = 0xD7,
-  TrackUID = 0x73C5,
-  TrackType = 0x83,
-  FlagEnabled = 0xB9,
-  FlagDefault = 0x88,
-  FlagForced = 0x55AA,
-  FlagLacing = 0x9C,
-/* MinCache = 0x6DE7, */
-/* MaxCache = 0x6DF8, */
-  DefaultDuration = 0x23E383,
-/* TrackTimecodeScale = 0x23314F, */
-/* TrackOffset = 0x537F, */
-  MaxBlockAdditionID = 0x55EE,
-  Name = 0x536E,
-  Language = 0x22B59C,
-  CodecID = 0x86,
-  CodecPrivate = 0x63A2,
-  CodecName = 0x258688,
-/* AttachmentLink = 0x7446, */
-/* CodecSettings = 0x3A9697, */
-/* CodecInfoURL = 0x3B4040, */
-/* CodecDownloadURL = 0x26B240, */
-/* CodecDecodeAll = 0xAA, */
-/* TrackOverlay = 0x6FAB, */
-/* TrackTranslate = 0x6624, */
-/* TrackTranslateEditionUID = 0x66FC, */
-/* TrackTranslateCodec = 0x66BF, */
-/* TrackTranslateTrackID = 0x66A5, */
-  /* video */
-  Video = 0xE0,
-  FlagInterlaced = 0x9A,
-  StereoMode = 0x53B8,
-  AlphaMode = 0x53C0,
-  PixelWidth = 0xB0,
-  PixelHeight = 0xBA,
-  PixelCropBottom = 0x54AA,
-  PixelCropTop = 0x54BB,
-  PixelCropLeft = 0x54CC,
-  PixelCropRight = 0x54DD,
-  DisplayWidth = 0x54B0,
-  DisplayHeight = 0x54BA,
-  DisplayUnit = 0x54B2,
-  AspectRatioType = 0x54B3,
-/* ColourSpace = 0x2EB524, */
-/* GammaValue = 0x2FB523, */
-  FrameRate = 0x2383E3,
-  /* end video */
-  /* audio */
-  Audio = 0xE1,
-  SamplingFrequency = 0xB5,
-  OutputSamplingFrequency = 0x78B5,
-  Channels = 0x9F,
-/* ChannelPositions = 0x7D7B, */
-  BitDepth = 0x6264,
-  /* end audio */
-  /* content encoding */
-/* ContentEncodings = 0x6d80, */
-/* ContentEncoding = 0x6240, */
-/* ContentEncodingOrder = 0x5031, */
-/* ContentEncodingScope = 0x5032, */
-/* ContentEncodingType = 0x5033, */
-/* ContentCompression = 0x5034, */
-/* ContentCompAlgo = 0x4254, */
-/* ContentCompSettings = 0x4255, */
-/* ContentEncryption = 0x5035, */
-/* ContentEncAlgo = 0x47e1, */
-/* ContentEncKeyID = 0x47e2, */
-/* ContentSignature = 0x47e3, */
-/* ContentSigKeyID = 0x47e4, */
-/* ContentSigAlgo = 0x47e5, */
-/* ContentSigHashAlgo = 0x47e6, */
-  /* end content encoding */
-  /* Cueing Data */
-  Cues = 0x1C53BB6B,
-  CuePoint = 0xBB,
-  CueTime = 0xB3,
-  CueTrackPositions = 0xB7,
-  CueTrack = 0xF7,
-  CueClusterPosition = 0xF1,
-  CueBlockNumber = 0x5378
-/* CueCodecState = 0xEA, */
-/* CueReference = 0xDB, */
-/* CueRefTime = 0x96, */
-/* CueRefCluster = 0x97, */
-/* CueRefNumber = 0x535F, */
-/* CueRefCodecState = 0xEB, */
-  /* Attachment */
-/* Attachments = 0x1941A469, */
-/* AttachedFile = 0x61A7, */
-/* FileDescription = 0x467E, */
-/* FileName = 0x466E, */
-/* FileMimeType = 0x4660, */
-/* FileData = 0x465C, */
-/* FileUID = 0x46AE, */
-/* FileReferral = 0x4675, */
-  /* Chapters */
-/* Chapters = 0x1043A770, */
-/* EditionEntry = 0x45B9, */
-/* EditionUID = 0x45BC, */
-/* EditionFlagHidden = 0x45BD, */
-/* EditionFlagDefault = 0x45DB, */
-/* EditionFlagOrdered = 0x45DD, */
-/* ChapterAtom = 0xB6, */
-/* ChapterUID = 0x73C4, */
-/* ChapterTimeStart = 0x91, */
-/* ChapterTimeEnd = 0x92, */
-/* ChapterFlagHidden = 0x98, */
-/* ChapterFlagEnabled = 0x4598, */
-/* ChapterSegmentUID = 0x6E67, */
-/* ChapterSegmentEditionUID = 0x6EBC, */
-/* ChapterPhysicalEquiv = 0x63C3, */
-/* ChapterTrack = 0x8F, */
-/* ChapterTrackNumber = 0x89, */
-/* ChapterDisplay = 0x80, */
-/* ChapString = 0x85, */
-/* ChapLanguage = 0x437C, */
-/* ChapCountry = 0x437E, */
-/* ChapProcess = 0x6944, */
-/* ChapProcessCodecID = 0x6955, */
-/* ChapProcessPrivate = 0x450D, */
-/* ChapProcessCommand = 0x6911, */
-/* ChapProcessTime = 0x6922, */
-/* ChapProcessData = 0x6933, */
-  /* Tagging */
-/* Tags = 0x1254C367, */
-/* Tag = 0x7373, */
-/* Targets = 0x63C0, */
-/* TargetTypeValue = 0x68CA, */
-/* TargetType = 0x63CA, */
-/* Tagging_TrackUID = 0x63C5, */
-/* Tagging_EditionUID = 0x63C9, */
-/* Tagging_ChapterUID = 0x63C4, */
-/* AttachmentUID = 0x63C6, */
-/* SimpleTag = 0x67C8, */
-/* TagName = 0x45A3, */
-/* TagLanguage = 0x447A, */
-/* TagDefault = 0x4484, */
-/* TagString = 0x4487, */
-/* TagBinary = 0x4485, */
-};
-#endif
--- a/third_party/libmkv/EbmlWriter.c
+++ b/third_party/libmkv/EbmlWriter.c
@@ -1,157 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#include "EbmlWriter.h"
-#include <stdlib.h>
-#include <wchar.h>
-#include <string.h>
-#include <limits.h>
-#if defined(_MSC_VER)
-#define LITERALU64(n) n
-#else
-#define LITERALU64(n) n##LLU
-#endif
-
-void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) {
-  /* TODO check and make sure we are not > than 0x0100000000000000LLU */
-  unsigned char size = 8; /* size in bytes to output */
-
-  /* mask to compare for byte size */
-  int64_t minVal = 0xff;
-
-  for (size = 1; size < 8; size ++) {
-    if (val < minVal)
-      break;
-
-    minVal = (minVal << 7);
-  }
-
-  val |= (((uint64_t)0x80) << ((size - 1) * 7));
-
-  Ebml_Serialize(glob, (void *) &val, sizeof(val), size);
-}
-
-void Ebml_WriteString(EbmlGlobal *glob, const char *str) {
-  const size_t size_ = strlen(str);
-  const uint64_t  size = size_;
-  Ebml_WriteLen(glob, size);
-  /* TODO: it's not clear from the spec whether the nul terminator
-   * should be serialized too.  For now we omit the null terminator.
-   */
-  Ebml_Write(glob, str, (unsigned long)size);
-}
-
-void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) {
-  const size_t strlen = wcslen(wstr);
-
-  /* TODO: it's not clear from the spec whether the nul terminator
-   * should be serialized too.  For now we include it.
-   */
-  const uint64_t  size = strlen;
-
-  Ebml_WriteLen(glob, size);
-  Ebml_Write(glob, wstr, (unsigned long)size);
-}
-
-void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) {
-  int len;
-
-  if (class_id >= 0x01000000)
-    len = 4;
-  else if (class_id >= 0x00010000)
-    len = 3;
-  else if (class_id >= 0x00000100)
-    len = 2;
-  else
-    len = 1;
-
-  Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len);
-}
-
-void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) {
-  unsigned char sizeSerialized = 8 | 0x80;
-  Ebml_WriteID(glob, class_id);
-  Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
-  Ebml_Serialize(glob, &ui, sizeof(ui), 8);
-}
-
-void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) {
-  unsigned char size = 8; /* size in bytes to output */
-  unsigned char sizeSerialized = 0;
-  unsigned long minVal;
-
-  Ebml_WriteID(glob, class_id);
-  minVal = 0x7fLU; /* mask to compare for byte size */
-
-  for (size = 1; size < 4; size ++) {
-    if (ui < minVal) {
-      break;
-    }
-
-    minVal <<= 7;
-  }
-
-  sizeSerialized = 0x80 | size;
-  Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
-  Ebml_Serialize(glob, &ui, sizeof(ui), size);
-}
-/* TODO: perhaps this is a poor name for this id serializer helper function */
-void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) {
-  int size;
-  for (size = 4; size > 1; size--) {
-    if (bin & (unsigned int)0x000000ff << ((size - 1) * 8))
-      break;
-  }
-  Ebml_WriteID(glob, class_id);
-  Ebml_WriteLen(glob, size);
-  Ebml_WriteID(glob, bin);
-}
-
-void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d) {
-  unsigned char len = 0x88;
-
-  Ebml_WriteID(glob, class_id);
-  Ebml_Serialize(glob, &len, sizeof(len), 1);
-  Ebml_Serialize(glob,  &d, sizeof(d), 8);
-}
-
-void Ebml_WriteSigned16(EbmlGlobal *glob, short val) {
-  signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8;
-  Ebml_Serialize(glob, &out, sizeof(out), 3);
-}
-
-void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s) {
-  Ebml_WriteID(glob, class_id);
-  Ebml_WriteString(glob, s);
-}
-
-void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s) {
-  Ebml_WriteID(glob,  class_id);
-  Ebml_WriteUTF8(glob,  s);
-}
-
-void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length) {
-  Ebml_WriteID(glob, class_id);
-  Ebml_WriteLen(glob, data_length);
-  Ebml_Write(glob,  data, data_length);
-}
-
-void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) {
-  unsigned char tmp = 0;
-  unsigned long i = 0;
-
-  Ebml_WriteID(glob, 0xEC);
-  Ebml_WriteLen(glob, vSize);
-
-  for (i = 0; i < vSize; i++) {
-    Ebml_Write(glob, &tmp, 1);
-  }
-}
-
-/* TODO Serialize Date */
--- a/third_party/libmkv/EbmlWriter.h
+++ b/third_party/libmkv/EbmlWriter.h
@@ -1,42 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#ifndef EBMLWRITER_HPP
-#define EBMLWRITER_HPP
-#include <stddef.h>
-#include "vpx/vpx_integer.h"
-
-/* note: you must define write and serialize functions as well as your own
- * EBML_GLOBAL
- *
- * These functions MUST be implemented
- */
-
-typedef struct EbmlGlobal EbmlGlobal;
-void  Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long);
-void  Ebml_Write(EbmlGlobal *glob, const void *, unsigned long);
-
-/*****/
-
-void Ebml_WriteLen(EbmlGlobal *glob, int64_t val);
-void Ebml_WriteString(EbmlGlobal *glob, const char *str);
-void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr);
-void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id);
-void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui);
-void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
-void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
-void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d);
-/* TODO make this more generic to signed */
-void Ebml_WriteSigned16(EbmlGlobal *glob, short val);
-void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s);
-void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s);
-void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length);
-void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize);
-/* TODO need date function */
-#endif
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -463,7 +463,9 @@ $vp8_short_walsh4x4_neon_asm=vp8_short_walsh4x4_neon;
 # Quantizer
 #
 add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
-specialize qw/vp8_regular_quantize_b sse2 sse4_1/;
+specialize qw/vp8_regular_quantize_b sse2/;
+# TODO(johann) Update sse4 implementation and re-enable
+#$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4;

 add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
 specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon_asm/;
--- a/vp8/common/x86/postproc_mmx.asm
+++ b/vp8/common/x86/postproc_mmx.asm
@@ -246,6 +246,7 @@ sym(vp8_mbpost_proc_down_mmx):
 ;                            unsigned char whiteclamp[16],
 ;                            unsigned char bothclamp[16],
 ;                            unsigned int Width, unsigned int Height, int Pitch)
+extern sym(rand)
 global sym(vp8_plane_add_noise_mmx) PRIVATE
 sym(vp8_plane_add_noise_mmx):
    push        rbp
@@ -257,7 +258,7 @@ sym(vp8_plane_add_noise_mmx):
    ; end prolog

 .addnoise_loop:
-    call sym(LIBVPX_RAND) WRT_PLT
+    call sym(rand) WRT_PLT
    mov     rcx, arg(1) ;noise
    and     rax, 0xff
    add     rcx, rax
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -660,6 +660,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
 ;                            unsigned char whiteclamp[16],
 ;                            unsigned char bothclamp[16],
 ;                            unsigned int Width, unsigned int Height, int Pitch)
+extern sym(rand)
 global sym(vp8_plane_add_noise_wmt) PRIVATE
 sym(vp8_plane_add_noise_wmt):
    push        rbp
@@ -671,7 +672,7 @@ sym(vp8_plane_add_noise_wmt):
    ; end prolog

 .addnoise_loop:
-    call sym(LIBVPX_RAND) WRT_PLT
+    call sym(rand) WRT_PLT
    mov     rcx, arg(1) ;noise
    and     rax, 0xff
    add     rcx, rax
--- a/vp8/common/x86/postproc_x86.c
+++ b/vp8/common/x86/postproc_x86.c
@@ -0,0 +1,24 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* On Android NDK, rand is inlined function, but postproc needs rand symbol */
+#if defined(__ANDROID__)
+#define rand __rand
+#include <stdlib.h>
+#undef rand
+
+extern int rand(void)
+{
+  return __rand();
+}
+#else
+/* ISO C forbids an empty translation unit. */
+int vp8_unused;
+#endif
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -191,12 +191,10 @@ int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride,
    return FILTER_BLOCK;
 }

-int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
-                          int num_mb_rows, int num_mb_cols)
+int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height)
 {
    int i;
    assert(denoiser);
-    denoiser->num_mb_cols = num_mb_cols;

    for (i = 0; i < MAX_REF_FRAMES; i++)
    {
@@ -224,10 +222,6 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,

    vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
               denoiser->yv12_mc_running_avg.frame_size);
-
-    denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1);
-    vpx_memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
-
    return 0;
 }

@@ -249,20 +243,13 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
                             unsigned int best_sse,
                             unsigned int zero_mv_sse,
                             int recon_yoffset,
-                             int recon_uvoffset,
-                             loop_filter_info_n *lfi_n,
-                             int mb_row,
-                             int mb_col,
-                             int block_index)
+                             int recon_uvoffset)
 {
    int mv_row;
    int mv_col;
    unsigned int motion_magnitude2;
    unsigned int sse_thresh;
    int sse_diff_thresh = 0;
-    // Spatial loop filter: only applied selectively based on
-    // temporal filter state of block relative to top/left neighbors.
-    int apply_spatial_loop_filter = 1;
    MV_REFERENCE_FRAME frame = x->best_reference_frame;
    MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;

@@ -276,11 +263,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
        MB_MODE_INFO saved_mbmi;
        MACROBLOCKD *filter_xd = &x->e_mbd;
        MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi;
-        int sse_diff = 0;
-        // Bias on zero motion vector sse.
-        int zero_bias = 95;
-        zero_mv_sse = (unsigned int)((int64_t)zero_mv_sse * zero_bias / 100);
-        sse_diff = zero_mv_sse - best_sse;
+        int sse_diff = zero_mv_sse - best_sse;

        saved_mbmi = *mbmi;

@@ -379,8 +362,6 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
                                         running_avg_y, avg_y_stride,
                                         x->thismb, 16, motion_magnitude2,
                                         x->increase_denoising);
-        denoiser->denoise_state[block_index] = motion_magnitude2 > 0 ?
-            kFilterNonZeroMV : kFilterZeroMV;
    }
    if (decision == COPY_BLOCK)
    {
@@ -391,59 +372,5 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
                x->thismb, 16,
                denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
                denoiser->yv12_running_avg[INTRA_FRAME].y_stride);
-        denoiser->denoise_state[block_index] = kNoFilter;
-    }
-    // Option to selectively deblock the denoised signal.
-    if (apply_spatial_loop_filter) {
-      loop_filter_info lfi;
-      int apply_filter_col = 0;
-      int apply_filter_row = 0;
-      int apply_filter = 0;
-      int y_stride = denoiser->yv12_running_avg[INTRA_FRAME].y_stride;
-      int uv_stride =denoiser->yv12_running_avg[INTRA_FRAME].uv_stride;
-
-      // Fix filter level to some nominal value for now.
-      int filter_level = 32;
-
-      int hev_index = lfi_n->hev_thr_lut[INTER_FRAME][filter_level];
-      lfi.mblim = lfi_n->mblim[filter_level];
-      lfi.blim = lfi_n->blim[filter_level];
-      lfi.lim = lfi_n->lim[filter_level];
-      lfi.hev_thr = lfi_n->hev_thr[hev_index];
-
-      // Apply filter if there is a difference in the denoiser filter state
-      // between the current and left/top block, or if non-zero motion vector
-      // is used for the motion-compensated filtering.
-      if (mb_col > 0) {
-        apply_filter_col = !((denoiser->denoise_state[block_index] ==
-            denoiser->denoise_state[block_index - 1]) &&
-            denoiser->denoise_state[block_index] != kFilterNonZeroMV);
-        if (apply_filter_col) {
-          // Filter left vertical edge.
-          apply_filter = 1;
-          vp8_loop_filter_mbv(
-              denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
-              NULL, NULL, y_stride, uv_stride, &lfi);
-        }
-      }
-      if (mb_row > 0) {
-        apply_filter_row = !((denoiser->denoise_state[block_index] ==
-            denoiser->denoise_state[block_index - denoiser->num_mb_cols]) &&
-            denoiser->denoise_state[block_index] != kFilterNonZeroMV);
-        if (apply_filter_row) {
-          // Filter top horizontal edge.
-          apply_filter = 1;
-          vp8_loop_filter_mbh(
-              denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
-              NULL, NULL, y_stride, uv_stride, &lfi);
-        }
-      }
-      if (apply_filter) {
-        // Update the signal block |x|. Pixel changes are only to top and/or
-        // left boundary pixels: can we avoid full block copy here.
-        vp8_copy_mem16x16(
-            denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
-            y_stride, x->thismb, 16);
-      }
    }
 }
--- a/vp8/encoder/denoising.h
+++ b/vp8/encoder/denoising.h
@@ -12,7 +12,6 @@
 #define VP8_ENCODER_DENOISING_H_

 #include "block.h"
-#include "vp8/common/loopfilter.h"

 #ifdef __cplusplus
 extern "C" {
@@ -28,22 +27,13 @@ enum vp8_denoiser_decision
  FILTER_BLOCK
 };

-enum vp8_denoiser_filter_state {
-  kNoFilter,
-  kFilterZeroMV,
-  kFilterNonZeroMV
-};
-
 typedef struct vp8_denoiser
 {
    YV12_BUFFER_CONFIG yv12_running_avg[MAX_REF_FRAMES];
    YV12_BUFFER_CONFIG yv12_mc_running_avg;
-    unsigned char* denoise_state;
-    int num_mb_cols;
 } VP8_DENOISER;

-int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
-                          int num_mb_rows, int num_mb_cols);
+int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height);

 void vp8_denoiser_free(VP8_DENOISER *denoiser);

@@ -52,11 +42,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
                             unsigned int best_sse,
                             unsigned int zero_mv_sse,
                             int recon_yoffset,
-                             int recon_uvoffset,
-                             loop_filter_info_n *lfi_n,
-                             int mb_row,
-                             int mb_col,
-                             int block_index);
+                             int recon_uvoffset);

 #ifdef __cplusplus
 }  // extern "C"
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1246,7 +1246,7 @@ int vp8cx_encode_inter_macroblock
            x->zbin_mode_boost_enabled = 0;
        }
        vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
-                               &distortion, &intra_error, mb_row, mb_col);
+                               &distortion, &intra_error);

        /* switch back to the regular quantizer for the encode */
        if (cpi->sf.improved_quant)
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -98,9 +98,6 @@ extern double vp8_calc_ssimg
 #ifdef OUTPUT_YUV_SRC
 FILE *yuv_file;
 #endif
-#ifdef OUTPUT_YUV_DENOISED
-FILE *yuv_denoised_file;
-#endif

 #if 0
 FILE *framepsnr;
@@ -1751,8 +1748,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
      {
        int width = (cpi->oxcf.Width + 15) & ~15;
        int height = (cpi->oxcf.Height + 15) & ~15;
-        vp8_denoiser_allocate(&cpi->denoiser, width, height,
-                              cpi->common.mb_rows, cpi->common.mb_cols);
+        vp8_denoiser_allocate(&cpi->denoiser, width, height);
      }
    }
 #endif
@@ -1965,9 +1961,6 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
 #ifdef OUTPUT_YUV_SRC
    yuv_file = fopen("bd.yuv", "ab");
 #endif
-#ifdef OUTPUT_YUV_DENOISED
-    yuv_denoised_file = fopen("denoised.yuv", "ab");
-#endif

 #if 0
    framepsnr = fopen("framepsnr.stt", "a");
@@ -2417,9 +2410,6 @@ void vp8_remove_compressor(VP8_COMP **ptr)
 #ifdef OUTPUT_YUV_SRC
    fclose(yuv_file);
 #endif
-#ifdef OUTPUT_YUV_DENOISED
-    fclose(yuv_denoised_file);
-#endif

 #if 0

@@ -2620,7 +2610,7 @@ int vp8_update_entropy(VP8_COMP *cpi, int update)
 }


-#if defined(OUTPUT_YUV_SRC) || defined(OUTPUT_YUV_DENOISED)
+#if OUTPUT_YUV_SRC
 void vp8_write_yuv_frame(FILE *yuv_file, YV12_BUFFER_CONFIG *s)
 {
    unsigned char *src = s->y_buffer;
@@ -4440,11 +4430,6 @@ static void encode_frame_to_data_rate

    update_reference_frames(cpi);

-#ifdef OUTPUT_YUV_DENOISED
-    vp8_write_yuv_frame(yuv_denoised_file,
-                        &cpi->denoiser.yv12_running_avg[INTRA_FRAME]);
-#endif
-
 #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
    if (cpi->oxcf.error_resilient_mode)
    {
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -1168,7 +1168,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
 #if CONFIG_TEMPORAL_DENOISING
    if (cpi->oxcf.noise_sensitivity)
    {
-        int block_index = mb_row * cpi->common.mb_cols + mb_col;
        if (x->best_sse_inter_mode == DC_PRED)
        {
            /* No best MV found. */
@@ -1180,9 +1179,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
        }
        x->increase_denoising = 0;
        vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
-                                recon_yoffset, recon_uvoffset,
-                                &cpi->common.lf_info, mb_row, mb_col,
-                                block_index);
+                                recon_yoffset, recon_uvoffset);


        /* Reevaluate ZEROMV after denoising. */
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1935,8 +1935,7 @@ static void update_best_mode(BEST_MODE* best_mode, int this_rd,

 void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
                            int recon_uvoffset, int *returnrate,
-                            int *returndistortion, int *returnintra,
-                            int mb_row, int mb_col)
+                            int *returndistortion, int *returnintra)
 {
    BLOCK *b = &x->block[0];
    BLOCKD *d = &x->e_mbd.block[0];
@@ -2511,7 +2510,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
 #if CONFIG_TEMPORAL_DENOISING
    if (cpi->oxcf.noise_sensitivity)
    {
-        int block_index = mb_row * cpi->common.mb_cols + mb_col;
        if (x->best_sse_inter_mode == DC_PRED)
        {
            /* No best MV found. */
@@ -2522,9 +2520,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
            best_sse = best_rd_sse;
        }
        vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
-                                recon_yoffset, recon_uvoffset,
-                                &cpi->common.lf_info, mb_row, mb_col,
-                                block_index);
+                                recon_yoffset, recon_uvoffset);


        /* Reevaluate ZEROMV after denoising. */
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -70,10 +70,7 @@ static void insertsortsad(int arr[],int idx[], int len)
 }

 extern void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue);
-extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x,
-                                   int recon_yoffset, int recon_uvoffset,
-                                   int *returnrate, int *returndistortion,
-                                   int *returnintra, int mb_row, int mb_col);
+extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
 extern void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate);


--- a/vp8/encoder/x86/quantize_sse2.c
+++ b/vp8/encoder/x86/quantize_sse2.c
@@ -26,10 +26,11 @@
        int cmp = (x[z] < boost) | (y[z] == 0); \
        zbin_boost_ptr++; \
        if (cmp) \
-            break; \
+            goto select_eob_end_##i; \
        qcoeff_ptr[z] = y[z]; \
        eob = i; \
        zbin_boost_ptr = b->zrun_zbin_boost; \
+        select_eob_end_##i:; \
    } while (0)

 void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)
--- a/vp8/encoder/x86/quantize_sse4.asm
+++ b/vp8/encoder/x86/quantize_sse4.asm
@@ -0,0 +1,256 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+%include "vp8_asm_enc_offsets.asm"
+
+
+; void vp8_regular_quantize_b_sse4 | arg
+;  (BLOCK  *b,                     |  0
+;   BLOCKD *d)                     |  1
+
+global sym(vp8_regular_quantize_b_sse4) PRIVATE
+sym(vp8_regular_quantize_b_sse4):
+
+%if ABI_IS_32BIT
+    push        rbp
+    mov         rbp, rsp
+    GET_GOT     rbx
+    push        rdi
+    push        rsi
+
+    ALIGN_STACK 16, rax
+    %define qcoeff      0 ; 32
+    %define stack_size 32
+    sub         rsp, stack_size
+%else
+  %if LIBVPX_YASM_WIN64
+    SAVE_XMM 8, u
+    push        rdi
+    push        rsi
+  %endif
+%endif
+    ; end prolog
+
+%if ABI_IS_32BIT
+    mov         rdi, arg(0)                 ; BLOCK *b
+    mov         rsi, arg(1)                 ; BLOCKD *d
+%else
+  %if LIBVPX_YASM_WIN64
+    mov         rdi, rcx                    ; BLOCK *b
+    mov         rsi, rdx                    ; BLOCKD *d
+  %else
+    ;mov         rdi, rdi                    ; BLOCK *b
+    ;mov         rsi, rsi                    ; BLOCKD *d
+  %endif
+%endif
+
+    mov         rax, [rdi + vp8_block_coeff]
+    mov         rcx, [rdi + vp8_block_zbin]
+    mov         rdx, [rdi + vp8_block_round]
+    movd        xmm7, [rdi + vp8_block_zbin_extra]
+
+    ; z
+    movdqa      xmm0, [rax]
+    movdqa      xmm1, [rax + 16]
+
+    ; duplicate zbin_oq_value
+    pshuflw     xmm7, xmm7, 0
+    punpcklwd   xmm7, xmm7
+
+    movdqa      xmm2, xmm0
+    movdqa      xmm3, xmm1
+
+    ; sz
+    psraw       xmm0, 15
+    psraw       xmm1, 15
+
+    ; (z ^ sz)
+    pxor        xmm2, xmm0
+    pxor        xmm3, xmm1
+
+    ; x = abs(z)
+    psubw       xmm2, xmm0
+    psubw       xmm3, xmm1
+
+    ; zbin
+    movdqa      xmm4, [rcx]
+    movdqa      xmm5, [rcx + 16]
+
+    ; *zbin_ptr + zbin_oq_value
+    paddw       xmm4, xmm7
+    paddw       xmm5, xmm7
+
+    movdqa      xmm6, xmm2
+    movdqa      xmm7, xmm3
+
+    ; x - (*zbin_ptr + zbin_oq_value)
+    psubw       xmm6, xmm4
+    psubw       xmm7, xmm5
+
+    ; round
+    movdqa      xmm4, [rdx]
+    movdqa      xmm5, [rdx + 16]
+
+    mov         rax, [rdi + vp8_block_quant_shift]
+    mov         rcx, [rdi + vp8_block_quant]
+    mov         rdx, [rdi + vp8_block_zrun_zbin_boost]
+
+    ; x + round
+    paddw       xmm2, xmm4
+    paddw       xmm3, xmm5
+
+    ; quant
+    movdqa      xmm4, [rcx]
+    movdqa      xmm5, [rcx + 16]
+
+    ; y = x * quant_ptr >> 16
+    pmulhw      xmm4, xmm2
+    pmulhw      xmm5, xmm3
+
+    ; y += x
+    paddw       xmm2, xmm4
+    paddw       xmm3, xmm5
+
+    pxor        xmm4, xmm4
+%if ABI_IS_32BIT
+    movdqa      [rsp + qcoeff], xmm4
+    movdqa      [rsp + qcoeff + 16], xmm4
+%else
+    pxor        xmm8, xmm8
+%endif
+
+    ; quant_shift
+    movdqa      xmm5, [rax]
+
+    ; zrun_zbin_boost
+    mov         rax, rdx
+
+%macro ZIGZAG_LOOP 5
+    ; x
+    pextrw      ecx, %4, %2
+
+    ; if (x >= zbin)
+    sub         cx, WORD PTR[rdx]           ; x - zbin
+    lea         rdx, [rdx + 2]              ; zbin_boost_ptr++
+    jl          .rq_zigzag_loop_%1          ; x < zbin
+
+    pextrw      edi, %3, %2                 ; y
+
+    ; downshift by quant_shift[rc]
+    pextrb      ecx, xmm5, %1               ; quant_shift[rc]
+    sar         edi, cl                     ; also sets Z bit
+    je          .rq_zigzag_loop_%1          ; !y
+%if ABI_IS_32BIT
+    mov         WORD PTR[rsp + qcoeff + %1 *2], di
+%else
+    pinsrw      %5, edi, %2                 ; qcoeff[rc]
+%endif
+    mov         rdx, rax                    ; reset to b->zrun_zbin_boost
+.rq_zigzag_loop_%1:
+%endmacro
+; in vp8_default_zig_zag1d order: see vp8/common/entropy.c
+ZIGZAG_LOOP  0, 0, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  1, 1, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  4, 4, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  8, 0, xmm3, xmm7, xmm8
+ZIGZAG_LOOP  5, 5, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  2, 2, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  3, 3, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  6, 6, xmm2, xmm6, xmm4
+ZIGZAG_LOOP  9, 1, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8
+ZIGZAG_LOOP  7, 7, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
+
+    mov         rcx, [rsi + vp8_blockd_dequant]
+    mov         rdi, [rsi + vp8_blockd_dqcoeff]
+
+%if ABI_IS_32BIT
+    movdqa      xmm4, [rsp + qcoeff]
+    movdqa      xmm5, [rsp + qcoeff + 16]
+%else
+    %define     xmm5 xmm8
+%endif
+
+    ; y ^ sz
+    pxor        xmm4, xmm0
+    pxor        xmm5, xmm1
+    ; x = (y ^ sz) - sz
+    psubw       xmm4, xmm0
+    psubw       xmm5, xmm1
+
+    ; dequant
+    movdqa      xmm0, [rcx]
+    movdqa      xmm1, [rcx + 16]
+
+    mov         rcx, [rsi + vp8_blockd_qcoeff]
+
+    pmullw      xmm0, xmm4
+    pmullw      xmm1, xmm5
+
+    ; store qcoeff
+    movdqa      [rcx], xmm4
+    movdqa      [rcx + 16], xmm5
+
+    ; store dqcoeff
+    movdqa      [rdi], xmm0
+    movdqa      [rdi + 16], xmm1
+
+    mov         rcx, [rsi + vp8_blockd_eob]
+
+    ; select the last value (in zig_zag order) for EOB
+    pxor        xmm6, xmm6
+    pcmpeqw     xmm4, xmm6
+    pcmpeqw     xmm5, xmm6
+
+    packsswb    xmm4, xmm5
+    pshufb      xmm4, [GLOBAL(zig_zag1d)]
+    pmovmskb    edx, xmm4
+    xor         rdi, rdi
+    mov         eax, -1
+    xor         dx, ax
+    bsr         eax, edx
+    sub         edi, edx
+    sar         edi, 31
+    add         eax, 1
+    and         eax, edi
+
+    mov         BYTE PTR [rcx], al          ; store eob
+
+    ; begin epilog
+%if ABI_IS_32BIT
+    add         rsp, stack_size
+    pop         rsp
+
+    pop         rsi
+    pop         rdi
+    RESTORE_GOT
+    pop         rbp
+%else
+  %undef xmm5
+  %if LIBVPX_YASM_WIN64
+    pop         rsi
+    pop         rdi
+    RESTORE_XMM
+  %endif
+%endif
+
+    ret
+
+SECTION_RODATA
+align 16
+; vp8/common/entropy.c: vp8_default_zig_zag1d
+zig_zag1d:
+    db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
--- a/vp8/encoder/x86/quantize_sse4.c
+++ b/vp8/encoder/x86/quantize_sse4.c
@@ -1,128 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include <smmintrin.h> /* SSE4.1 */
-
-#include "./vp8_rtcd.h"
-#include "vp8/encoder/block.h"
-#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */
-
-#define SELECT_EOB(i, z, x, y, q) \
-    do { \
-        short boost = *zbin_boost_ptr; \
-        short x_z = _mm_extract_epi16(x, z); \
-        short y_z = _mm_extract_epi16(y, z); \
-        int cmp = (x_z < boost) | (y_z == 0); \
-        zbin_boost_ptr++; \
-        if (cmp) \
-            break; \
-        q = _mm_insert_epi16(q, y_z, z); \
-        eob = i; \
-        zbin_boost_ptr = b->zrun_zbin_boost; \
-    } while (0)
-
-void vp8_regular_quantize_b_sse4_1(BLOCK *b, BLOCKD *d) {
-    char eob = 0;
-    short *zbin_boost_ptr  = b->zrun_zbin_boost;
-
-    __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1,
-            dqcoeff0, dqcoeff1;
-    __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift));
-    __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8));
-    __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
-    __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8));
-    __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra);
-    __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin));
-    __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8));
-    __m128i round0 = _mm_load_si128((__m128i *)(b->round));
-    __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
-    __m128i quant0 = _mm_load_si128((__m128i *)(b->quant));
-    __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8));
-    __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
-    __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
-    __m128i qcoeff0 = _mm_setzero_si128();
-    __m128i qcoeff1 = _mm_setzero_si128();
-
-    /* Duplicate to all lanes. */
-    zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);
-    zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra);
-
-    /* Sign of z: z >> 15 */
-    sz0 = _mm_srai_epi16(z0, 15);
-    sz1 = _mm_srai_epi16(z1, 15);
-
-    /* x = abs(z): (z ^ sz) - sz */
-    x0 = _mm_xor_si128(z0, sz0);
-    x1 = _mm_xor_si128(z1, sz1);
-    x0 = _mm_sub_epi16(x0, sz0);
-    x1 = _mm_sub_epi16(x1, sz1);
-
-    /* zbin[] + zbin_extra */
-    zbin0 = _mm_add_epi16(zbin0, zbin_extra);
-    zbin1 = _mm_add_epi16(zbin1, zbin_extra);
-
-    /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance
-     * the equation because boost is the only value which can change:
-     * x - (zbin[] + extra) >= boost */
-    x_minus_zbin0 = _mm_sub_epi16(x0, zbin0);
-    x_minus_zbin1 = _mm_sub_epi16(x1, zbin1);
-
-    /* All the remaining calculations are valid whether they are done now with
-     * simd or later inside the loop one at a time. */
-    x0 = _mm_add_epi16(x0, round0);
-    x1 = _mm_add_epi16(x1, round1);
-
-    y0 = _mm_mulhi_epi16(x0, quant0);
-    y1 = _mm_mulhi_epi16(x1, quant1);
-
-    y0 = _mm_add_epi16(y0, x0);
-    y1 = _mm_add_epi16(y1, x1);
-
-    /* Instead of shifting each value independently we convert the scaling
-     * factor with 1 << (16 - shift) so we can use multiply/return high half. */
-    y0 = _mm_mulhi_epi16(y0, quant_shift0);
-    y1 = _mm_mulhi_epi16(y1, quant_shift1);
-
-    /* Return the sign: (y ^ sz) - sz */
-    y0 = _mm_xor_si128(y0, sz0);
-    y1 = _mm_xor_si128(y1, sz1);
-    y0 = _mm_sub_epi16(y0, sz0);
-    y1 = _mm_sub_epi16(y1, sz1);
-
-    /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */
-    SELECT_EOB(1, 0, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(2, 1, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(3, 4, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(4, 0, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(5, 5, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(6, 2, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(7, 3, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(8, 6, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(9, 1, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(10, 4, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(11, 5, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(12, 2, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(13, 7, x_minus_zbin0, y0, qcoeff0);
-    SELECT_EOB(14, 3, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(15, 6, x_minus_zbin1, y1, qcoeff1);
-    SELECT_EOB(16, 7, x_minus_zbin1, y1, qcoeff1);
-
-    _mm_store_si128((__m128i *)(d->qcoeff), qcoeff0);
-    _mm_store_si128((__m128i *)(d->qcoeff + 8), qcoeff1);
-
-    dqcoeff0 = _mm_mullo_epi16(qcoeff0, dequant0);
-    dqcoeff1 = _mm_mullo_epi16(qcoeff1, dequant1);
-
-    _mm_store_si128((__m128i *)(d->dqcoeff), dqcoeff0);
-    _mm_store_si128((__m128i *)(d->dqcoeff + 8), dqcoeff1);
-
-    *d->eob = eob;
-}
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -107,6 +107,7 @@ VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/variance_impl_ssse3.asm
 VP8_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/sad_sse4.asm

 ifeq ($(CONFIG_POSTPROC),yes)
+VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/postproc_x86.c
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -89,7 +89,6 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
 VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.c
-VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.c

 ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c
@@ -98,6 +97,7 @@ endif
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
+VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
 VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt_x86_64.asm
--- a/vp9/common/arm/neon/vp9_convolve_neon.c
+++ b/vp9/common/arm/neon/vp9_convolve_neon.c
@@ -25,14 +25,12 @@ void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
  // Account for the vertical phase needing 3 lines prior and 4 lines post
  int intermediate_height = h + 7;

-  if (x_step_q4 != 16 || y_step_q4 != 16) {
-    vp9_convolve8_c(src, src_stride,
-                    dst, dst_stride,
-                    filter_x, x_step_q4,
-                    filter_y, y_step_q4,
-                    w, h);
-    return;
-  }
+  if (x_step_q4 != 16 || y_step_q4 != 16)
+    return vp9_convolve8_c(src, src_stride,
+                           dst, dst_stride,
+                           filter_x, x_step_q4,
+                           filter_y, y_step_q4,
+                           w, h);

  /* Filter starting 3 lines back. The neon implementation will ignore the
   * given height and filter a multiple of 4 lines. Since this goes in to
@@ -59,14 +57,12 @@ void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
  DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
  int intermediate_height = h + 7;

-  if (x_step_q4 != 16 || y_step_q4 != 16) {
-    vp9_convolve8_avg_c(src, src_stride,
-                        dst, dst_stride,
-                        filter_x, x_step_q4,
-                        filter_y, y_step_q4,
-                        w, h);
-    return;
-  }
+  if (x_step_q4 != 16 || y_step_q4 != 16)
+    return vp9_convolve8_avg_c(src, src_stride,
+                               dst, dst_stride,
+                               filter_x, x_step_q4,
+                               filter_y, y_step_q4,
+                               w, h);

  /* This implementation has the same issues as above. In addition, we only want
   * to average the values after both passes.
--- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
+++ b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
@@ -9,7 +9,6 @@
 */

 #include "./vp9_rtcd.h"
-#include "vpx/vpx_integer.h"

 void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
                                    const uint8_t *blimit0,
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -109,9 +109,7 @@ void vp9_free_frame_buffers(VP9_COMMON *cm) {
  }

  vp9_free_frame_buffer(&cm->post_proc_buffer);
-}

-void vp9_free_context_buffers(VP9_COMMON *cm) {
  free_mi(cm);

  vpx_free(cm->last_frame_seg_map);
@@ -167,55 +165,37 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {

 fail:
  vp9_free_frame_buffers(cm);
-  vp9_free_context_buffers(cm);
  return 1;
 }

-static void init_frame_bufs(VP9_COMMON *cm) {
-  int i;
-
-  cm->new_fb_idx = FRAME_BUFFERS - 1;
-  cm->frame_bufs[cm->new_fb_idx].ref_count = 1;
-
-  for (i = 0; i < REF_FRAMES; ++i) {
-    cm->ref_frame_map[i] = i;
-    cm->frame_bufs[i].ref_count = 1;
-  }
-}
-
 int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
-  int i;
+  const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
+  const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
  const int ss_x = cm->subsampling_x;
  const int ss_y = cm->subsampling_y;
+  int i;

  vp9_free_frame_buffers(cm);

-  for (i = 0; i < FRAME_BUFFERS; ++i) {
+  for (i = 0; i < FRAME_BUFFERS; i++) {
    cm->frame_bufs[i].ref_count = 0;
    if (vp9_alloc_frame_buffer(&cm->frame_bufs[i].buf, width, height,
                               ss_x, ss_y, VP9_ENC_BORDER_IN_PIXELS) < 0)
      goto fail;
  }

-  init_frame_bufs(cm);
+  cm->new_fb_idx = FRAME_BUFFERS - 1;
+  cm->frame_bufs[cm->new_fb_idx].ref_count = 1;
+
+  for (i = 0; i < REF_FRAMES; i++) {
+    cm->ref_frame_map[i] = i;
+    cm->frame_bufs[i].ref_count = 1;
+  }

  if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
                             VP9_ENC_BORDER_IN_PIXELS) < 0)
    goto fail;

-  return 0;
-
- fail:
-  vp9_free_frame_buffers(cm);
-  return 1;
-}
-
-int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
-  const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
-  const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
-
-  vp9_free_context_buffers(cm);
-
  set_mb_mi(cm, aligned_width, aligned_height);

  if (alloc_mi(cm, cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE)))
@@ -244,13 +224,12 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
  return 0;

 fail:
-  vp9_free_context_buffers(cm);
+  vp9_free_frame_buffers(cm);
  return 1;
 }

 void vp9_remove_common(VP9_COMMON *cm) {
  vp9_free_frame_buffers(cm);
-  vp9_free_context_buffers(cm);
  vp9_free_internal_frame_buffers(&cm->int_frame_buffers);
 }

--- a/vp9/common/vp9_alloccommon.h
+++ b/vp9/common/vp9_alloccommon.h
@@ -23,12 +23,8 @@ void vp9_remove_common(struct VP9Common *cm);
 int vp9_resize_frame_buffers(struct VP9Common *cm, int width, int height);

 int vp9_alloc_frame_buffers(struct VP9Common *cm, int width, int height);
-int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height);
-int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height);

 void vp9_free_frame_buffers(struct VP9Common *cm);
-void vp9_free_state_buffers(struct VP9Common *cm);
-void vp9_free_context_buffers(struct VP9Common *cm);

 void vp9_update_frame_size(struct VP9Common *cm);

--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -32,9 +32,6 @@ extern "C" {
 #define BLOCK_SIZE_GROUPS 4
 #define SKIP_CONTEXTS 3
 #define INTER_MODE_CONTEXTS 7
-#if CONFIG_COPY_CODING
-#define COPY_MODE_CONTEXTS 5
-#endif

 /* Segment Feature Masks */
 #define MAX_MV_REF_CANDIDATES 2
@@ -82,16 +79,6 @@ typedef enum {
  MB_MODE_COUNT
 } PREDICTION_MODE;

-#if CONFIG_COPY_CODING
-typedef enum {
-  NOREF,
-  REF0,
-  REF1,
-  REF2,
-  COPY_MODE_COUNT
-} COPY_MODE;
-#endif
-
 static INLINE int is_inter_mode(PREDICTION_MODE mode) {
  return mode >= NEARESTMV && mode <= NEWMV;
 }
@@ -131,86 +118,11 @@ static INLINE int mi_width_log2(BLOCK_SIZE sb_type) {
  return mi_width_log2_lookup[sb_type];
 }

-#if CONFIG_SUPERTX
-static INLINE TX_SIZE bsize_to_tx_size(BLOCK_SIZE bsize) {
-  const TX_SIZE tx_size_lookup[BLOCK_SIZES] = {
-    TX_4X4, TX_4X4, TX_4X4,
-    TX_8X8, TX_8X8, TX_8X8,
-    TX_16X16, TX_16X16, TX_16X16,
-    TX_32X32, TX_32X32, TX_32X32, TX_32X32};
-  return tx_size_lookup[bsize];
-}
-#endif
-
-#if CONFIG_MASKED_INTERINTER
-#define MASK_BITS_SML   3
-#define MASK_BITS_MED   4
-#define MASK_BITS_BIG   5
-#define MASK_NONE      -1
-
-static inline int get_mask_bits(BLOCK_SIZE sb_type) {
-  if (sb_type < BLOCK_8X8)
-    return 0;
-  if (sb_type <= BLOCK_8X8)
-    return MASK_BITS_SML;
-  else if (sb_type <= BLOCK_32X32)
-    return MASK_BITS_MED;
-  else
-    return MASK_BITS_BIG;
-}
-#endif
-
-#if CONFIG_INTERINTRA
-static INLINE TX_SIZE intra_size_log2_for_interintra(int bs) {
-  switch (bs) {
-    case 4:
-      return TX_4X4;
-      break;
-    case 8:
-      return TX_8X8;
-      break;
-    case 16:
-      return TX_16X16;
-      break;
-    case 32:
-      return TX_32X32;
-      break;
-    default:
-      return TX_32X32;
-      break;
-  }
-}
-
-static INLINE int is_interintra_allowed(BLOCK_SIZE sb_type) {
-  return ((sb_type >= BLOCK_8X8) && (sb_type < BLOCK_64X64));
-}
-
-#if CONFIG_MASKED_INTERINTRA
-#define MASK_BITS_SML_INTERINTRA   3
-#define MASK_BITS_MED_INTERINTRA   4
-#define MASK_BITS_BIG_INTERINTRA   5
-#define MASK_NONE_INTERINTRA      -1
-static INLINE int get_mask_bits_interintra(BLOCK_SIZE sb_type) {
-  if (sb_type == BLOCK_4X4)
-     return 0;
-  if (sb_type <= BLOCK_8X8)
-    return MASK_BITS_SML_INTERINTRA;
-  else if (sb_type <= BLOCK_32X32)
-    return MASK_BITS_MED_INTERINTRA;
-  else
-    return MASK_BITS_BIG_INTERINTRA;
-}
-#endif
-#endif
-
 // This structure now relates to 8x8 block regions.
 typedef struct {
  // Common for both INTER and INTRA blocks
  BLOCK_SIZE sb_type;
  PREDICTION_MODE mode;
-#if CONFIG_FILTERINTRA
-  int filterbit, uv_filterbit;
-#endif
  TX_SIZE tx_size;
  uint8_t skip;
  uint8_t segment_id;
@@ -225,34 +137,10 @@ typedef struct {
  int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
  uint8_t mode_context[MAX_REF_FRAMES];
  INTERP_FILTER interp_filter;
-
-#if CONFIG_EXT_TX
-  EXT_TX_TYPE ext_txfrm;
-#endif
-
-#if CONFIG_MASKED_INTERINTER
-  int use_masked_interinter;
-  int mask_index;
-#endif
-#if CONFIG_INTERINTRA
-  PREDICTION_MODE interintra_mode, interintra_uv_mode;
-#if CONFIG_MASKED_INTERINTRA
-  int interintra_mask_index;
-  int interintra_uv_mask_index;
-  int use_masked_interintra;
-#endif
-#endif
-#if CONFIG_COPY_CODING
-  COPY_MODE copy_mode;
-  int inter_ref_count;
-#endif
 } MB_MODE_INFO;

 typedef struct {
  MB_MODE_INFO mbmi;
-#if CONFIG_FILTERINTRA
-  int b_filter_info[4];
-#endif
  b_mode_info bmi[4];
 } MODE_INFO;

@@ -261,16 +149,6 @@ static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) {
                                      : mi->mbmi.mode;
 }

-#if CONFIG_FILTERINTRA
-static INLINE int is_filter_allowed(PREDICTION_MODE mode) {
-  return 1;
-}
-
-static INLINE int is_filter_enabled(TX_SIZE txsize) {
-  return (txsize <= TX_32X32);
-}
-#endif
-
 static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) {
  return mbmi->ref_frame[0] > INTRA_FRAME;
 }
@@ -362,13 +240,6 @@ typedef struct macroblockd {
  PARTITION_CONTEXT left_seg_context[8];
 } MACROBLOCKD;

-#if CONFIG_SUPERTX
-static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) {
-  return mbmi->tx_size >
-         MIN(b_width_log2(mbmi->sb_type), b_height_log2(mbmi->sb_type));
-}
-#endif
-
 static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize,
                                     PARTITION_TYPE partition) {
  const BLOCK_SIZE subsize = subsize_lookup[partition][bsize];
@@ -382,20 +253,8 @@ static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
                                  const MACROBLOCKD *xd) {
  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;

-#if !CONFIG_EXT_TX
  if (plane_type != PLANE_TYPE_Y || is_inter_block(mbmi))
    return DCT_DCT;
-#else
-  if (plane_type != PLANE_TYPE_Y)
-      return DCT_DCT;
-
-  if (is_inter_block(mbmi)) {
-    if (mbmi->ext_txfrm == NORM || mbmi->tx_size >= TX_32X32)
-      return DCT_DCT;
-    else
-      return ADST_ADST;
-  }
-#endif
  return intra_mode_to_tx_type_lookup[mbmi->mode];
 }

@@ -403,20 +262,8 @@ static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
                                      const MACROBLOCKD *xd, int ib) {
  const MODE_INFO *const mi = xd->mi[0];

-#if !CONFIG_EXT_TX
  if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(&mi->mbmi))
    return DCT_DCT;
-#else
-  if (plane_type != PLANE_TYPE_Y || xd->lossless)
-      return DCT_DCT;
-
-  if (is_inter_block(&mi->mbmi)) {
-    if (mi->mbmi.ext_txfrm == NORM)
-      return DCT_DCT;
-    else
-      return ADST_ADST;
-  }
-#endif

  return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)];
 }
@@ -434,15 +281,7 @@ static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize) {
 }

 static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
-#if CONFIG_SUPERTX
-  if (!supertx_enabled(mbmi)) {
-#endif
  return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type);
-#if CONFIG_SUPERTX
-  } else {
-    return uvsupertx_size_lookup[mbmi->tx_size];
-  }
-#endif
 }

 static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
--- a/vp9/common/vp9_common_data.c
+++ b/vp9/common/vp9_common_data.c
@@ -133,15 +133,6 @@ const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = {
  {{BLOCK_64X64, BLOCK_64X32},   {BLOCK_32X64,   BLOCK_32X32}},
 };

-#if CONFIG_SUPERTX
-const TX_SIZE uvsupertx_size_lookup[TX_SIZES] = {
-    TX_4X4,
-    TX_4X4,
-    TX_8X8,
-    TX_16X16
-};
-#endif
-
 // Generates 4 bit field in which each bit set to 1 represents
 // a blocksize partition  1111 means we split 64x64, 32x32, 16x16
 // and 8x8.  1000 means we just split the 64x64 to 32x32
--- a/vp9/common/vp9_common_data.h
+++ b/vp9/common/vp9_common_data.h
@@ -31,9 +31,6 @@ extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES];
 extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
 extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES];
 extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
-#if CONFIG_SUPERTX
-extern const TX_SIZE uvsupertx_size_lookup[TX_SIZES];
-#endif

 #ifdef __cplusplus
 }  // extern "C"
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -117,25 +117,17 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride,
                     const InterpKernel *const y_filters,
                     int y0_q4, int y_step_q4,
                     int w, int h) {
-  // Note: Fixed size intermediate buffer, temp, places limits on parameters.
-  // 2d filtering proceeds in 2 steps:
-  //   (1) Interpolate horizontally into an intermediate buffer, temp.
-  //   (2) Interpolate temp vertically to derive the sub-pixel result.
-  // Deriving the maximum number of rows in the temp buffer (135):
-  // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
-  // --Largest block size is 64x64 pixels.
-  // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
-  //   original frame (in 1/16th pixel units).
-  // --Must round-up because block may be located at sub-pixel position.
-  // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
-  // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
-  uint8_t temp[135 * 64];
+  // Fixed size intermediate buffer places limits on parameters.
+  // Maximum intermediate_height is 324, for y_step_q4 == 80,
+  // h == 64, taps == 8.
+  // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
+  uint8_t temp[64 * 324];
  int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS;

  assert(w <= 64);
  assert(h <= 64);
-  assert(y_step_q4 <= 32);
-  assert(x_step_q4 <= 32);
+  assert(y_step_q4 <= 80);
+  assert(x_step_q4 <= 80);

  if (intermediate_height < h)
    intermediate_height = h;
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -13,84 +13,6 @@
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_seg_common.h"

-#if CONFIG_MASKED_INTERINTER
-static const vp9_prob default_masked_interinter_prob[BLOCK_SIZES] = {
-    192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
-};
-#endif
-
-#if CONFIG_INTERINTRA
-static const vp9_prob default_interintra_prob[BLOCK_SIZES] = {
-  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
-};
-#if CONFIG_MASKED_INTERINTRA
-static const vp9_prob default_masked_interintra_prob[BLOCK_SIZES] = {
-  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
-};
-#endif
-#endif
-
-#if CONFIG_FILTERINTRA
-static const vp9_prob default_filterintra_prob[TX_SIZES][INTRA_MODES] = {
-  // DC     V      H    D45   D135   D117   D153   D207    D63     TM
-  {153,   171,   147,   150,   129,   101,   100,   153,   132,   111},
-  {171,   173,   185,   131,    70,    53,    70,   148,   127,   114},
-  {175,   203,   213,    86,    45,    71,    41,   150,   125,   154},
-  {235,   230,   154,   202,   154,   205,    37,   128,     0,   202}
-};
-#endif
-
-#if CONFIG_EXT_TX
-static const vp9_prob default_ext_tx_prob = 178;
-#endif
-
-#if CONFIG_SUPERTX
-static const vp9_prob default_supertx_prob[TX_SIZES] = {
-  255, 160, 160, 160
-};
-
-static const vp9_prob default_supertxsplit_prob[TX_SIZES] = {
-  255, 200, 200, 200
-};
-#endif
-
-#if CONFIG_COPY_CODING
-static const vp9_prob default_copy_noref_prob[COPY_MODE_CONTEXTS]
-                                             [BLOCK_SIZES] = {
-  {255, 255, 255,  82, 148, 182,  65, 193, 158,  70, 138, 101,  23},
-  {255, 255, 255, 118, 153, 161, 123, 169, 157,  82, 101, 123,  88},
-  {255, 255, 255, 130, 178, 226, 194, 196, 174, 173, 135, 144, 141},
-  {255, 255, 255, 178, 218, 225, 197, 230, 222, 215, 220, 220, 220},
-  {255, 255, 255, 243, 248, 241, 233, 249, 249, 249, 249, 249, 249}
-};
-
-static const vp9_prob default_copy_mode_probs_l2[COPY_MODE_CONTEXTS][1] = {
-  {207},
-  {135},
-  {141},
-  {189},
-  {209}
-};
-
-const vp9_tree_index vp9_copy_mode_tree_l2[TREE_SIZE(2)] = {
-  -(REF0 - REF0), -(REF1 - REF0)
-};
-
-static const vp9_prob default_copy_mode_probs[COPY_MODE_CONTEXTS]
-                                             [COPY_MODE_COUNT - 2] = {
-  {130, 159},
-  {126, 176},
-  {120, 150},
-  {158, 183},
-  {149, 125}
-};
-
-const vp9_tree_index vp9_copy_mode_tree[TREE_SIZE(COPY_MODE_COUNT - 1)] = {
-  -(REF0 - REF0),  2,
-  -(REF1 - REF0),  -(REF2 - REF0)
-};
-#endif
-
 const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = {
  {  // above = dc
    { 137,  30,  42, 148, 151, 207,  70,  52,  91 },  // left = dc
@@ -323,11 +245,7 @@ const vp9_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)] = {
 };

 static const vp9_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = {
-#if !CONFIG_COPY_CODING
  9, 102, 187, 225
-#else
-  35, 112, 187, 225
-#endif
 };

 static const vp9_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = {
@@ -408,30 +326,6 @@ void vp9_init_mode_probs(FRAME_CONTEXT *fc) {
  fc->tx_probs = default_tx_probs;
  vp9_copy(fc->skip_probs, default_skip_probs);
  vp9_copy(fc->inter_mode_probs, default_inter_mode_probs);
-#if CONFIG_MASKED_INTERINTER
-  vp9_copy(fc->masked_interinter_prob, default_masked_interinter_prob);
-#endif
-#if CONFIG_INTERINTRA
-  vp9_copy(fc->interintra_prob, default_interintra_prob);
-#if CONFIG_MASKED_INTERINTRA
-  vp9_copy(fc->masked_interintra_prob, default_masked_interintra_prob);
-#endif
-#endif
-#if CONFIG_FILTERINTRA
-  vp9_copy(fc->filterintra_prob, default_filterintra_prob);
-#endif
-#if CONFIG_EXT_TX
-  fc->ext_tx_prob = default_ext_tx_prob;
-#endif
-#if CONFIG_SUPERTX
-  vp9_copy(fc->supertx_prob, default_supertx_prob);
-  vp9_copy(fc->supertxsplit_prob, default_supertxsplit_prob);
-#endif
-#if CONFIG_COPY_CODING
-  vp9_copy(fc->copy_noref_prob, default_copy_noref_prob);
-  vp9_copy(fc->copy_mode_probs_l2, default_copy_mode_probs_l2);
-  vp9_copy(fc->copy_mode_probs, default_copy_mode_probs);
-#endif
 }

 const vp9_tree_index vp9_switchable_interp_tree
@@ -522,73 +416,6 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {

  for (i = 0; i < SKIP_CONTEXTS; ++i)
    fc->skip_probs[i] = adapt_prob(pre_fc->skip_probs[i], counts->skip[i]);
-
-#if CONFIG_MASKED_INTERINTER
-  if (cm->use_masked_interinter) {
-    for (i = 0; i < BLOCK_SIZES; ++i) {
-      if (get_mask_bits(i))
-        fc->masked_interinter_prob[i] = adapt_prob
-                                      (pre_fc->masked_interinter_prob[i],
-                                       counts->masked_interinter[i]);
-    }
-  }
-#endif
-
-#if CONFIG_INTERINTRA
-  if (cm->use_interintra) {
-    for (i = 0; i < BLOCK_SIZES; ++i) {
-      if (is_interintra_allowed(i))
-        fc->interintra_prob[i] = adapt_prob(pre_fc->interintra_prob[i],
-                                            counts->interintra[i]);
-    }
-#if CONFIG_MASKED_INTERINTRA
-    if (cm->use_masked_interintra) {
-      for (i = 0; i < BLOCK_SIZES; ++i) {
-        if (is_interintra_allowed(i) && get_mask_bits_interintra(i))
-          fc->masked_interintra_prob[i] = adapt_prob(
-                                          pre_fc->masked_interintra_prob[i],
-                                          counts->masked_interintra[i]);
-      }
-    }
-#endif
-  }
-#endif
-
-#if CONFIG_FILTERINTRA
-  for (i = 0; i < TX_SIZES; ++i)
-    for (j = 0; j < INTRA_MODES; ++j)
-      fc->filterintra_prob[i][j] = adapt_prob(pre_fc->filterintra_prob[i][j],
-                                   counts->filterintra[i][j]);
-#endif
-
-#if CONFIG_EXT_TX
-  fc->ext_tx_prob = adapt_prob(pre_fc->ext_tx_prob, counts->ext_tx);
-#endif
-
-#if CONFIG_SUPERTX
-  for (i = 1; i < TX_SIZES; ++i) {
-    fc->supertx_prob[i] = adapt_prob(pre_fc->supertx_prob[i],
-                                     counts->supertx[i]);
-  }
-
-  for (i = 1; i < TX_SIZES; ++i) {
-    fc->supertxsplit_prob[i] = adapt_prob(pre_fc->supertxsplit_prob[i],
-                                          counts->supertxsplit[i]);
-  }
-#endif
-
-#if CONFIG_COPY_CODING
-  for (i = 0; i < COPY_MODE_CONTEXTS; i++) {
-    for (j = BLOCK_8X8; j < BLOCK_SIZES; j++) {
-      fc->copy_noref_prob[i][j] =
-          adapt_prob(pre_fc->copy_noref_prob[i][j], counts->copy_noref[i][j]);
-    }
-    adapt_probs(vp9_copy_mode_tree_l2, pre_fc->copy_mode_probs_l2[i],
-                counts->copy_mode_l2[i], fc->copy_mode_probs_l2[i]);
-    adapt_probs(vp9_copy_mode_tree, pre_fc->copy_mode_probs[i],
-                counts->copy_mode[i], fc->copy_mode_probs[i]);
-  }
-#endif
 }

 static void set_default_lf_deltas(struct loopfilter *lf) {
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -52,30 +52,6 @@ typedef struct frame_contexts {
  struct tx_probs tx_probs;
  vp9_prob skip_probs[SKIP_CONTEXTS];
  nmv_context nmvc;
-#if CONFIG_MASKED_INTERINTER
-  vp9_prob masked_interinter_prob[BLOCK_SIZES];
-#endif
-#if CONFIG_INTERINTRA
-  vp9_prob interintra_prob[BLOCK_SIZES];
-#if CONFIG_MASKED_INTERINTRA
-  vp9_prob masked_interintra_prob[BLOCK_SIZES];
-#endif
-#endif
-#if CONFIG_FILTERINTRA
-  vp9_prob filterintra_prob[TX_SIZES][INTRA_MODES];
-#endif
-#if CONFIG_EXT_TX
-  vp9_prob ext_tx_prob;
-#endif
-#if CONFIG_SUPERTX
-  vp9_prob supertx_prob[TX_SIZES];
-  vp9_prob supertxsplit_prob[TX_SIZES];
-#endif
-#if CONFIG_COPY_CODING
-  vp9_prob copy_noref_prob[COPY_MODE_CONTEXTS][BLOCK_SIZES];
-  vp9_prob copy_mode_probs_l2[COPY_MODE_CONTEXTS][1];
-  vp9_prob copy_mode_probs[COPY_MODE_CONTEXTS][COPY_MODE_COUNT - 2];
-#endif
 } FRAME_CONTEXT;

 typedef struct {
@@ -95,31 +71,6 @@ typedef struct {
  struct tx_counts tx;
  unsigned int skip[SKIP_CONTEXTS][2];
  nmv_context_counts mv;
-#if CONFIG_MASKED_INTERINTER
-  unsigned int masked_interinter[BLOCK_SIZES][2];
-#endif
-#if CONFIG_INTERINTRA
-  unsigned int interintra[BLOCK_SIZES][2];
-#if CONFIG_MASKED_INTERINTRA
-  unsigned int masked_interintra[BLOCK_SIZES][2];
-#endif
-#endif
-#if CONFIG_FILTERINTRA
-  unsigned int filterintra[TX_SIZES][INTRA_MODES][2];
-#endif
-#if CONFIG_EXT_TX
-  unsigned int ext_tx[2];
-#endif
-#if CONFIG_SUPERTX
-  unsigned int supertx[TX_SIZES][2];
-  unsigned int supertxsplit[TX_SIZES][2];
-  unsigned int supertx_size[BLOCK_SIZES];
-#endif
-#if CONFIG_COPY_CODING
-  unsigned int copy_noref[COPY_MODE_CONTEXTS][BLOCK_SIZES][2];
-  unsigned int copy_mode_l2[COPY_MODE_CONTEXTS][2];
-  unsigned int copy_mode[COPY_MODE_CONTEXTS][COPY_MODE_COUNT - 1];
-#endif
 } FRAME_COUNTS;

 extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
@@ -132,10 +83,6 @@ extern const vp9_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)];
 extern const vp9_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)];
 extern const vp9_tree_index vp9_switchable_interp_tree
                                [TREE_SIZE(SWITCHABLE_FILTERS)];
-#if CONFIG_COPY_CODING
-extern const vp9_tree_index vp9_copy_mode_tree_l2[TREE_SIZE(2)];
-extern const vp9_tree_index vp9_copy_mode_tree[TREE_SIZE(COPY_MODE_COUNT - 1)];
-#endif

 void vp9_setup_past_independence(struct VP9Common *cm);

--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -100,14 +100,6 @@ typedef enum {
  TX_TYPES = 4
 } TX_TYPE;

-#if CONFIG_EXT_TX
-typedef enum {
-  NORM   = 0,
-  ALT = 1,
-  EXT_TX_TYPES = 2
-} EXT_TX_TYPE;
-#endif
-
 typedef enum {
  UNKNOWN    = 0,
  BT_601     = 1,  // YUV
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -206,13 +206,6 @@ static const int mode_lf_lut[MB_MODE_COUNT] = {
  1, 1, 0, 1                     // INTER_MODES (ZEROMV == 0)
 };

-#if CONFIG_SUPERTX
-static int supertx_enabled_lpf(const MB_MODE_INFO *mbmi) {
-  return mbmi->tx_size >
-         MIN(b_width_log2(mbmi->sb_type), b_height_log2(mbmi->sb_type));
-}
-#endif
-
 static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
  int lvl;

@@ -579,85 +572,6 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
    *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
 }

-#if CONFIG_SUPERTX
-static void build_masks_supertx(const loop_filter_info_n *const lfi_n,
-                                const MODE_INFO *mi, const int shift_y,
-                                const int shift_uv,
-                                LOOP_FILTER_MASK *lfm) {
-  const MB_MODE_INFO *mbmi = &mi->mbmi;
-  const TX_SIZE tx_size_y = mbmi->tx_size;
-  const TX_SIZE tx_size_uv = get_uv_tx_size(mbmi);
-  const BLOCK_SIZE block_size = 3 * (int)tx_size_y;
-  const int filter_level = get_filter_level(lfi_n, mbmi);
-  uint64_t *const left_y = &lfm->left_y[tx_size_y];
-  uint64_t *const above_y = &lfm->above_y[tx_size_y];
-  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
-  uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
-  uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
-  uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
-  int i;
-
-  // If filter level is 0 we don't loop filter.
-  if (!filter_level) {
-    return;
-  } else {
-    const int w = num_8x8_blocks_wide_lookup[block_size];
-    const int h = num_8x8_blocks_high_lookup[block_size];
-    int index = shift_y;
-    for (i = 0; i < h; i++) {
-      vpx_memset(&lfm->lfl_y[index], filter_level, w);
-      index += 8;
-    }
-  }
-
-  // These set 1 in the current block size for the block size edges.
-  // For instance if the block size is 32x16,   we'll set :
-  //    above =   1111
-  //              0000
-  //    and
-  //    left  =   1000
-  //          =   1000
-  // NOTE : In this example the low bit is left most ( 1000 ) is stored as
-  //        1,  not 8...
-  //
-  // U and v set things on a 16 bit scale.
-  //
-  *above_y |= above_prediction_mask[block_size] << shift_y;
-  *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
-  *left_y |= left_prediction_mask[block_size] << shift_y;
-  *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
-
-  // If the block has no coefficients and is not intra we skip applying
-  // the loop filter on block edges.
-  if (mbmi->skip && is_inter_block(mbmi))
-    return;
-
-  // Here we are adding a mask for the transform size.  The transform
-  // size mask is set to be correct for a 64x64 prediction block size. We
-  // mask to match the size of the block we are working on and then shift it
-  // into place..
-  *above_y |= (size_mask[block_size] &
-               above_64x64_txform_mask[tx_size_y]) << shift_y;
-  *above_uv |= (size_mask_uv[block_size] &
-                above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv;
-
-  *left_y |= (size_mask[block_size] &
-              left_64x64_txform_mask[tx_size_y]) << shift_y;
-  *left_uv |= (size_mask_uv[block_size] &
-               left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv;
-
-  // Here we are trying to determine what to do with the internal 4x4 block
-  // boundaries.  These differ from the 4x4 boundaries on the outside edge of
-  // an 8x8 in that the internal ones can be skipped and don't depend on
-  // the prediction block size.
-  if (tx_size_y == TX_4X4)
-    *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
-
-  if (tx_size_uv == TX_4X4)
-    *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
-}
-#endif
-
 // This function does the same thing as the one above with the exception that
 // it only affects the y masks.   It exists because for blocks < 16x16 in size,
 // we only update u and v masks on the first block.
@@ -701,48 +615,6 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
    *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
 }

-#if CONFIG_SUPERTX
-static void build_y_mask_supertx(const loop_filter_info_n *const lfi_n,
-                                 const MODE_INFO *mi, const int shift_y,
-                                 LOOP_FILTER_MASK *lfm) {
-  const MB_MODE_INFO *mbmi = &mi->mbmi;
-  const TX_SIZE tx_size_y = mbmi->tx_size;
-  const BLOCK_SIZE block_size = 3 * (int)tx_size_y;
-  const int filter_level = get_filter_level(lfi_n, mbmi);
-  uint64_t *const left_y = &lfm->left_y[tx_size_y];
-  uint64_t *const above_y = &lfm->above_y[tx_size_y];
-  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
-  int i;
-
-  if (!filter_level) {
-    return;
-  } else {
-    const int w = num_8x8_blocks_wide_lookup[block_size];
-    const int h = num_8x8_blocks_high_lookup[block_size];
-    int index = shift_y;
-    for (i = 0; i < h; i++) {
-      vpx_memset(&lfm->lfl_y[index], filter_level, w);
-      index += 8;
-    }
-  }
-
-  *above_y |= above_prediction_mask[block_size] << shift_y;
-  *left_y |= left_prediction_mask[block_size] << shift_y;
-
-  if (mbmi->skip && is_inter_block(mbmi))
-    return;
-
-  *above_y |= (size_mask[block_size] &
-               above_64x64_txform_mask[tx_size_y]) << shift_y;
-
-  *left_y |= (size_mask[block_size] &
-              left_64x64_txform_mask[tx_size_y]) << shift_y;
-
-  if (tx_size_y == TX_4X4)
-    *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
-}
-#endif
-
 // This function sets up the bit masks for the entire 64x64 region represented
 // by mi_row, mi_col.
 // TODO(JBB): This function only works for yv12.
@@ -778,9 +650,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
                        cm->mi_rows - mi_row : MI_BLOCK_SIZE);
  const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ?
                        cm->mi_cols - mi_col : MI_BLOCK_SIZE);
-#if CONFIG_SUPERTX
-  int supertx;
-#endif

  vp9_zero(*lfm);

@@ -818,43 +687,20 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
            build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
            break;
          case BLOCK_32X16:
-#if CONFIG_SUPERTX
-            supertx = supertx_enabled_lpf(&mip[0]->mbmi);
-            if (!supertx) {
-#endif
            build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
            if (mi_32_row_offset + 2 >= max_rows)
              continue;
            mip2 = mip + mode_info_stride * 2;
            build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm);
-#if CONFIG_SUPERTX
-            } else {
-              build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm);
-            }
-#endif
            break;
          case BLOCK_16X32:
-#if CONFIG_SUPERTX
-            supertx = supertx_enabled_lpf(&mip[0]->mbmi);
-            if (!supertx) {
-#endif
            build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
            if (mi_32_col_offset + 2 >= max_cols)
              continue;
            mip2 = mip + 2;
            build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
-#if CONFIG_SUPERTX
-            } else {
-              build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm);
-            }
-#endif
            break;
          default:
-#if CONFIG_SUPERTX
-            if (mip[0]->mbmi.tx_size == TX_32X32) {
-              build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm);
-            } else {
-#endif
            for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
              const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
              const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
@@ -871,56 +717,24 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
                  break;
                case BLOCK_16X8:
-#if CONFIG_SUPERTX
-                  supertx = supertx_enabled_lpf(&mip[0]->mbmi);
-                  if (!supertx) {
-#endif
                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
                  if (mi_16_row_offset + 1 >= max_rows)
                    continue;
                  mip2 = mip + mode_info_stride;
                  build_y_mask(lfi_n, mip2[0], shift_y+8, lfm);
-#if CONFIG_SUPERTX
-                  } else {
-                    build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm);
-                  }
-#endif
                  break;
                case BLOCK_8X16:
-#if CONFIG_SUPERTX
-                  supertx = supertx_enabled_lpf(&mip[0]->mbmi);
-                  if (!supertx) {
-#endif
                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
                  if (mi_16_col_offset +1 >= max_cols)
                    continue;
                  mip2 = mip + 1;
                  build_y_mask(lfi_n, mip2[0], shift_y+1, lfm);
-#if CONFIG_SUPERTX
-                  } else {
-                    build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm);
-                  }
-#endif
                  break;
                default: {
-#if CONFIG_SUPERTX
-                  if (mip[0]->mbmi.tx_size == TX_16X16) {
-                    build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm);
-                  } else {
-#endif
                  const int shift_y = shift_32_y[idx_32] +
                                      shift_16_y[idx_16] +
                                      shift_8_y[0];
-#if CONFIG_SUPERTX
-                  supertx = supertx_enabled_lpf(&mip[0]->mbmi);
-                  if (!supertx) {
-#endif
                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
-#if CONFIG_SUPERTX
-                  } else {
-                    build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm);
-                  }
-#endif
                  mip += offset[0];
                  for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
                    const int shift_y = shift_32_y[idx_32] +
@@ -934,26 +748,12 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
                    if (mi_8_col_offset >= max_cols ||
                        mi_8_row_offset >= max_rows)
                      continue;
-#if CONFIG_SUPERTX
-                    supertx = supertx_enabled_lpf(&mip[0]->mbmi);
-                    if (!supertx)
-#endif
                    build_y_mask(lfi_n, mip[0], shift_y, lfm);
-#if CONFIG_SUPERTX
-                    else
-                      build_y_mask_supertx(lfi_n, mip[0], shift_y, lfm);
-#endif
                  }
-#if CONFIG_SUPERTX
-                  }
-#endif
                  break;
                }
              }
            }
-#if CONFIG_SUPERTX
-            }
-#endif
            break;
        }
      }
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -11,6 +11,181 @@

 #include "vp9/common/vp9_mvref_common.h"

+#define MVREF_NEIGHBOURS 8
+
+typedef struct position {
+  int row;
+  int col;
+} POSITION;
+
+typedef enum {
+  BOTH_ZERO = 0,
+  ZERO_PLUS_PREDICTED = 1,
+  BOTH_PREDICTED = 2,
+  NEW_PLUS_NON_INTRA = 3,
+  BOTH_NEW = 4,
+  INTRA_PLUS_NON_INTRA = 5,
+  BOTH_INTRA = 6,
+  INVALID_CASE = 9
+} motion_vector_context;
+
+// This is used to figure out a context for the ref blocks. The code flattens
+// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
+// adding 9 for each intra block, 3 for each zero mv and 1 for each new
+// motion vector. This single number is then converted into a context
+// with a single lookup ( counter_to_context ).
+static const int mode_2_counter[MB_MODE_COUNT] = {
+  9,  // DC_PRED
+  9,  // V_PRED
+  9,  // H_PRED
+  9,  // D45_PRED
+  9,  // D135_PRED
+  9,  // D117_PRED
+  9,  // D153_PRED
+  9,  // D207_PRED
+  9,  // D63_PRED
+  9,  // TM_PRED
+  0,  // NEARESTMV
+  0,  // NEARMV
+  3,  // ZEROMV
+  1,  // NEWMV
+};
+
+// There are 3^3 different combinations of 3 counts that can be either 0,1 or
+// 2. However the actual count can never be greater than 2 so the highest
+// counter we need is 18. 9 is an invalid counter that's never used.
+static const int counter_to_context[19] = {
+  BOTH_PREDICTED,  // 0
+  NEW_PLUS_NON_INTRA,  // 1
+  BOTH_NEW,  // 2
+  ZERO_PLUS_PREDICTED,  // 3
+  NEW_PLUS_NON_INTRA,  // 4
+  INVALID_CASE,  // 5
+  BOTH_ZERO,  // 6
+  INVALID_CASE,  // 7
+  INVALID_CASE,  // 8
+  INTRA_PLUS_NON_INTRA,  // 9
+  INTRA_PLUS_NON_INTRA,  // 10
+  INVALID_CASE,  // 11
+  INTRA_PLUS_NON_INTRA,  // 12
+  INVALID_CASE,  // 13
+  INVALID_CASE,  // 14
+  INVALID_CASE,  // 15
+  INVALID_CASE,  // 16
+  INVALID_CASE,  // 17
+  BOTH_INTRA  // 18
+};
+
+static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
+  // 4X4
+  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
+  // 4X8
+  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
+  // 8X4
+  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
+  // 8X8
+  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
+  // 8X16
+  {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}},
+  // 16X8
+  {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}},
+  // 16X16
+  {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
+  // 16X32
+  {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}},
+  // 32X16
+  {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
+  // 32X32
+  {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
+  // 32X64
+  {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}},
+  // 64X32
+  {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
+  // 64X64
+  {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}
+};
+
+static const int idx_n_column_to_subblock[4][2] = {
+  {1, 2},
+  {1, 3},
+  {3, 2},
+  {3, 3}
+};
+
+// clamp_mv_ref
+#define MV_BORDER (16 << 3)  // Allow 16 pels in 1/8th pel units
+
+static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
+  clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
+               xd->mb_to_right_edge + MV_BORDER,
+               xd->mb_to_top_edge - MV_BORDER,
+               xd->mb_to_bottom_edge + MV_BORDER);
+}
+
+// This function returns either the appropriate sub block or block's mv
+// on whether the block_size < 8x8 and we have check_sub_blocks set.
+static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
+                                      int search_col, int block_idx) {
+  return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
+          ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
+              .as_mv[which_mv]
+          : candidate->mbmi.mv[which_mv];
+}
+
+
+// Performs mv sign inversion if indicated by the reference frame combination.
+static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
+                              const MV_REFERENCE_FRAME this_ref_frame,
+                              const int *ref_sign_bias) {
+  int_mv mv = mbmi->mv[ref];
+  if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
+    mv.as_mv.row *= -1;
+    mv.as_mv.col *= -1;
+  }
+  return mv;
+}
+
+// This macro is used to add a motion vector mv_ref list if it isn't
+// already in the list.  If it's the second motion vector it will also
+// skip all additional processing and jump to done!
+#define ADD_MV_REF_LIST(mv) \
+  do { \
+    if (refmv_count) { \
+      if ((mv).as_int != mv_ref_list[0].as_int) { \
+        mv_ref_list[refmv_count] = (mv); \
+        goto Done; \
+      } \
+    } else { \
+      mv_ref_list[refmv_count++] = (mv); \
+    } \
+  } while (0)
+
+// If either reference frame is different, not INTRA, and they
+// are different from each other scale and add the mv to our list.
+#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \
+  do { \
+    if (is_inter_block(mbmi)) { \
+      if ((mbmi)->ref_frame[0] != ref_frame) \
+        ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \
+      if (has_second_ref(mbmi) && \
+          (mbmi)->ref_frame[1] != ref_frame && \
+          (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
+        ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \
+    } \
+  } while (0)
+
+
+// Checks that the given mi_row, mi_col and search point
+// are inside the borders of the tile.
+static INLINE int is_inside(const TileInfo *const tile,
+                            int mi_col, int mi_row, int mi_rows,
+                            const POSITION *mi_pos) {
+  return !(mi_row + mi_pos->row < 0 ||
+           mi_col + mi_pos->col < tile->mi_col_start ||
+           mi_row + mi_pos->row >= mi_rows ||
+           mi_col + mi_pos->col >= tile->mi_col_end);
+}
+
 // This function searches the neighbourhood of a given MB/SB
 // to try and find candidate reference vectors.
 static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
@@ -188,176 +363,3 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
      assert("Invalid block index.");
  }
 }
-
-#if CONFIG_COPY_CODING
-static int compare_interinfo(MB_MODE_INFO *mbmi, MB_MODE_INFO *ref_mbmi) {
-  if (mbmi == ref_mbmi) {
-    return 1;
-  } else {
-    int is_same;
-#if CONFIG_INTERINTRA
-    MV_REFERENCE_FRAME mbmi_ref1_backup = mbmi->ref_frame[1];
-    MV_REFERENCE_FRAME refmbmi_ref1_backup = ref_mbmi->ref_frame[1];
-
-    if (mbmi->ref_frame[1] == INTRA_FRAME)
-      mbmi->ref_frame[1] = NONE;
-    if (ref_mbmi->ref_frame[1] == INTRA_FRAME)
-      ref_mbmi->ref_frame[1] = NONE;
-#endif
-    if (mbmi->ref_frame[0] == ref_mbmi->ref_frame[0] &&
-        mbmi->ref_frame[1] == ref_mbmi->ref_frame[1]) {
-      if (mbmi->ref_frame[1] > INTRA_FRAME)
-        is_same = mbmi->mv[0].as_int == ref_mbmi->mv[0].as_int &&
-                  mbmi->mv[1].as_int == ref_mbmi->mv[1].as_int &&
-                  mbmi->interp_filter == ref_mbmi->interp_filter;
-      else
-        is_same = mbmi->mv[0].as_int == ref_mbmi->mv[0].as_int &&
-                  mbmi->interp_filter == ref_mbmi->interp_filter;
-    } else {
-      is_same = 0;
-    }
-#if CONFIG_INTERINTRA
-    mbmi->ref_frame[1] = mbmi_ref1_backup;
-    ref_mbmi->ref_frame[1] = refmbmi_ref1_backup;
-#endif
-
-    return is_same;
-  }
-}
-
-static int check_inside(VP9_COMMON *cm, int mi_row, int mi_col) {
-  return mi_row >= 0 && mi_col >= 0 &&
-         mi_row < cm->mi_rows && mi_col < cm->mi_cols;
-}
-
-static int is_right_available(BLOCK_SIZE bsize, int mi_row, int mi_col) {
-  int depth, max_depth = 4 - MIN(b_width_log2(bsize), b_height_log2(bsize));
-  int block[4] = {0};
-
-  if (bsize == BLOCK_64X64)
-    return 1;
-  mi_row = mi_row % 8;
-  mi_col = mi_col % 8;
-  for (depth = 1; depth <= max_depth; depth++) {
-    block[depth] = (mi_row >> (3 - depth)) * 2 + (mi_col >> (3 - depth));
-    mi_row = mi_row % (8 >> depth);
-    mi_col = mi_col % (8 >> depth);
-  }
-
-  if (b_width_log2(bsize) < b_height_log2(bsize)) {
-    if (block[max_depth] == 0)
-      return 1;
-  } else if (b_width_log2(bsize) > b_height_log2(bsize)) {
-    if (block[max_depth] > 0)
-      return 0;
-  } else {
-    if (block[max_depth] == 0 || block[max_depth] == 2)
-      return 1;
-    else if (block[max_depth] == 3)
-      return 0;
-  }
-
-  for (depth = max_depth - 1; depth > 0; depth--) {
-    if (block[depth] == 0 || block[depth] == 2)
-      return 1;
-    else if (block[depth] == 3)
-      return 0;
-  }
-  return 1;
-}
-
-static int is_second_rec(int mi_row, int mi_col, BLOCK_SIZE bsize) {
-  int bw = 4 << b_width_log2(bsize);
-  int bh = 4 << b_height_log2(bsize);
-
-  if (bw < bh)
-    return (mi_col << 3) % (bw << 1) == 0 ? 0 : 1;
-  else if (bh < bw)
-    return (mi_row << 3) % (bh << 1) == 0 ? 0 : 2;
-  else
-    return 0;
-}
-
-int vp9_construct_ref_inter_list(VP9_COMMON *cm,  MACROBLOCKD *xd,
-                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                 MB_MODE_INFO *ref_list[18]) {
-  int bw = 4 << b_width_log2(bsize);
-  int bh = 4 << b_height_log2(bsize);
-  int row_offset, col_offset;
-  int mi_offset;
-  MB_MODE_INFO *ref_mbmi;
-  int ref_index, ref_num = 0;
-  int row_offset_cand[18], col_offset_cand[18];
-  int offset_num = 0, i, switchflag;
-  int is_sec_rec = is_second_rec(mi_row, mi_col, bsize);
-
-  if (is_sec_rec != 2) {
-    row_offset_cand[offset_num] = -1; col_offset_cand[offset_num] = 0;
-    offset_num++;
-  }
-  if (is_sec_rec != 1) {
-    row_offset_cand[offset_num] = bh / 16; col_offset_cand[offset_num] = -1;
-    offset_num++;
-  }
-
-  row_offset = bh / 8 - 1;
-  col_offset = 1;
-  if (is_sec_rec < 2)
-    switchflag = 1;
-  else
-    switchflag = 0;
-  while ((is_sec_rec == 0 && ((row_offset >=0) || col_offset < (bw / 8 + 1))) ||
-         (is_sec_rec == 1 && col_offset < (bw / 8 + 1)) ||
-         (is_sec_rec == 2 && row_offset >=0)) {
-    switch (switchflag) {
-      case 0:
-        if (row_offset >= 0) {
-          if (row_offset != bh / 16) {
-            row_offset_cand[offset_num] = row_offset;
-            col_offset_cand[offset_num] = -1;
-            offset_num++;
-          }
-          row_offset--;
-        }
-        break;
-      case 1:
-        if (col_offset < (bw / 8 + 1)) {
-          row_offset_cand[offset_num] = -1;
-          col_offset_cand[offset_num] = col_offset;
-          offset_num++;
-          col_offset++;
-        }
-        break;
-      default:
-        assert(0);
-    }
-    if (is_sec_rec == 0)
-      switchflag = 1 - switchflag;
-  }
-  row_offset_cand[offset_num] = -1;
-  col_offset_cand[offset_num] = -1;
-  offset_num++;
-
-  for (i = 0; i < offset_num; i++) {
-    row_offset = row_offset_cand[i];
-    col_offset = col_offset_cand[i];
-    if ((col_offset < (bw / 8) ||
-        (col_offset == (bw / 8) && is_right_available(bsize, mi_row, mi_col)))
-        && check_inside(cm, mi_row + row_offset, mi_col + col_offset)) {
-      mi_offset = row_offset * cm->mi_stride + col_offset;
-      ref_mbmi = &xd->mi[mi_offset]->mbmi;
-      if (is_inter_block(ref_mbmi)) {
-        for (ref_index = 0; ref_index < ref_num; ref_index++) {
-          if (compare_interinfo(ref_mbmi, ref_list[ref_index]))
-            break;
-        }
-        if (ref_index == ref_num) {
-          ref_list[ref_num] = ref_mbmi;
-          ref_num++;
-        }
-      }
-    }
-  }
-  return ref_num;
-}
-#endif
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -21,181 +21,6 @@ extern "C" {
 #define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\
                                VP9_INTERP_EXTEND) << 3)

-#define MVREF_NEIGHBOURS 8
-
-typedef struct position {
-  int row;
-  int col;
-} POSITION;
-
-typedef enum {
-  BOTH_ZERO = 0,
-  ZERO_PLUS_PREDICTED = 1,
-  BOTH_PREDICTED = 2,
-  NEW_PLUS_NON_INTRA = 3,
-  BOTH_NEW = 4,
-  INTRA_PLUS_NON_INTRA = 5,
-  BOTH_INTRA = 6,
-  INVALID_CASE = 9
-} motion_vector_context;
-
-// This is used to figure out a context for the ref blocks. The code flattens
-// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
-// adding 9 for each intra block, 3 for each zero mv and 1 for each new
-// motion vector. This single number is then converted into a context
-// with a single lookup ( counter_to_context ).
-static const int mode_2_counter[MB_MODE_COUNT] = {
-  9,  // DC_PRED
-  9,  // V_PRED
-  9,  // H_PRED
-  9,  // D45_PRED
-  9,  // D135_PRED
-  9,  // D117_PRED
-  9,  // D153_PRED
-  9,  // D207_PRED
-  9,  // D63_PRED
-  9,  // TM_PRED
-  0,  // NEARESTMV
-  0,  // NEARMV
-  3,  // ZEROMV
-  1,  // NEWMV
-};
-
-// There are 3^3 different combinations of 3 counts that can be either 0,1 or
-// 2. However the actual count can never be greater than 2 so the highest
-// counter we need is 18. 9 is an invalid counter that's never used.
-static const int counter_to_context[19] = {
-  BOTH_PREDICTED,  // 0
-  NEW_PLUS_NON_INTRA,  // 1
-  BOTH_NEW,  // 2
-  ZERO_PLUS_PREDICTED,  // 3
-  NEW_PLUS_NON_INTRA,  // 4
-  INVALID_CASE,  // 5
-  BOTH_ZERO,  // 6
-  INVALID_CASE,  // 7
-  INVALID_CASE,  // 8
-  INTRA_PLUS_NON_INTRA,  // 9
-  INTRA_PLUS_NON_INTRA,  // 10
-  INVALID_CASE,  // 11
-  INTRA_PLUS_NON_INTRA,  // 12
-  INVALID_CASE,  // 13
-  INVALID_CASE,  // 14
-  INVALID_CASE,  // 15
-  INVALID_CASE,  // 16
-  INVALID_CASE,  // 17
-  BOTH_INTRA  // 18
-};
-
-static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
-  // 4X4
-  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
-  // 4X8
-  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
-  // 8X4
-  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
-  // 8X8
-  {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
-  // 8X16
-  {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}},
-  // 16X8
-  {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}},
-  // 16X16
-  {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
-  // 16X32
-  {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}},
-  // 32X16
-  {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
-  // 32X32
-  {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
-  // 32X64
-  {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}},
-  // 64X32
-  {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
-  // 64X64
-  {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}
-};
-
-static const int idx_n_column_to_subblock[4][2] = {
-  {1, 2},
-  {1, 3},
-  {3, 2},
-  {3, 3}
-};
-
-// clamp_mv_ref
-#define MV_BORDER (16 << 3)  // Allow 16 pels in 1/8th pel units
-
-static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
-  clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
-               xd->mb_to_right_edge + MV_BORDER,
-               xd->mb_to_top_edge - MV_BORDER,
-               xd->mb_to_bottom_edge + MV_BORDER);
-}
-
-// This function returns either the appropriate sub block or block's mv
-// on whether the block_size < 8x8 and we have check_sub_blocks set.
-static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
-                                      int search_col, int block_idx) {
-  return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
-          ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
-              .as_mv[which_mv]
-          : candidate->mbmi.mv[which_mv];
-}
-
-
-// Performs mv sign inversion if indicated by the reference frame combination.
-static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
-                              const MV_REFERENCE_FRAME this_ref_frame,
-                              const int *ref_sign_bias) {
-  int_mv mv = mbmi->mv[ref];
-  if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
-    mv.as_mv.row *= -1;
-    mv.as_mv.col *= -1;
-  }
-  return mv;
-}
-
-// This macro is used to add a motion vector mv_ref list if it isn't
-// already in the list.  If it's the second motion vector it will also
-// skip all additional processing and jump to done!
-#define ADD_MV_REF_LIST(mv) \
-  do { \
-    if (refmv_count) { \
-      if ((mv).as_int != mv_ref_list[0].as_int) { \
-        mv_ref_list[refmv_count] = (mv); \
-        goto Done; \
-      } \
-    } else { \
-      mv_ref_list[refmv_count++] = (mv); \
-    } \
-  } while (0)
-
-// If either reference frame is different, not INTRA, and they
-// are different from each other scale and add the mv to our list.
-#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \
-  do { \
-    if (is_inter_block(mbmi)) { \
-      if ((mbmi)->ref_frame[0] != ref_frame) \
-        ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \
-      if (has_second_ref(mbmi) && \
-          (mbmi)->ref_frame[1] != ref_frame && \
-          (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
-        ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \
-    } \
-  } while (0)
-
-
-// Checks that the given mi_row, mi_col and search point
-// are inside the borders of the tile.
-static INLINE int is_inside(const TileInfo *const tile,
-                            int mi_col, int mi_row, int mi_rows,
-                            const POSITION *mi_pos) {
-  return !(mi_row + mi_pos->row < 0 ||
-           mi_col + mi_pos->col < tile->mi_col_start ||
-           mi_row + mi_pos->row >= mi_rows ||
-           mi_col + mi_pos->col >= tile->mi_col_end);
-}
-
 // TODO(jingning): this mv clamping function should be block size dependent.
 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
  clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
@@ -220,12 +45,6 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
                                   int block, int ref, int mi_row, int mi_col,
                                   int_mv *nearest, int_mv *near);

-#if CONFIG_COPY_CODING
-int vp9_construct_ref_inter_list(VP9_COMMON *cm,  MACROBLOCKD *xd,
-                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                 MB_MODE_INFO *ref_list[18]);
-#endif
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -213,14 +213,8 @@ typedef struct VP9Common {
  PARTITION_CONTEXT *above_seg_context;
  ENTROPY_CONTEXT *above_context;

-#if CONFIG_MASKED_INTERINTER
-  int use_masked_interinter;
-#endif
-#if CONFIG_INTERINTRA
-  int use_interintra;
-#if CONFIG_MASKED_INTERINTRA
-  int use_masked_interintra;
-#endif
+#if CONFIG_TRANSCODE
+  FILE *mi_array_pf;
 #endif
 } VP9_COMMON;

--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -383,47 +383,3 @@ int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids,
  assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
  return segment_id;
 }
-
-#if CONFIG_COPY_CODING
-int vp9_get_copy_mode_context(const MACROBLOCKD *xd) {
-  const MB_MODE_INFO *const above_mbmi = get_mbmi(get_above_mi(xd));
-  const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd));
-  const int has_above = above_mbmi != NULL;
-  const int has_left = left_mbmi != NULL;
-
-  if (has_above && has_left) {
-    const int above_intra = !is_inter_block(above_mbmi);
-    const int left_intra = !is_inter_block(left_mbmi);
-
-    if (above_intra && left_intra) {
-      return 4;
-    } else if (above_intra || left_intra) {
-      return 3;
-    } else {
-      const int above_predict = above_mbmi->copy_mode != NOREF;
-      const int left_predict = left_mbmi->copy_mode != NOREF;
-      if (above_predict && left_predict)
-        return 0;
-      else if (above_predict || left_predict)
-        return 1;
-      else
-        return 2;
-    }
-  } else if (has_above || has_left) {
-    const MB_MODE_INFO *const ref_mbmi = has_above ? above_mbmi : left_mbmi;
-    const int ref_intra = !is_inter_block(ref_mbmi);
-
-    if (ref_intra) {
-      return 3;
-    } else {
-     const int ref_predict = ref_mbmi != NOREF;
-      if (ref_predict)
-        return 0;
-      else
-        return 1;
-    }
-  } else {
-    return 0;
-  }
-}
-#endif
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -134,10 +134,6 @@ static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
  }
 }

-#if CONFIG_COPY_CODING
-int vp9_get_copy_mode_context(const MACROBLOCKD *xd);
-#endif
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -12,6 +12,7 @@
 #include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_seg_common.h"

+#if 1
 static const int16_t dc_qlookup[QINDEX_RANGE] = {
  4,       8,    8,    9,   10,   11,   12,   12,
  13,     14,   15,   16,   17,   18,   19,   19,
@@ -82,6 +83,44 @@ static const int16_t ac_qlookup[QINDEX_RANGE] = {
  1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
 };

+void vp9_init_quant_tables(void) { }
+#else
+static int16_t dc_qlookup[QINDEX_RANGE];
+static int16_t ac_qlookup[QINDEX_RANGE];
+
+#define ACDC_MIN 8
+
+// TODO(dkovalev) move to common and reuse
+static double poly3(double a, double b, double c, double d, double x) {
+  return a*x*x*x + b*x*x + c*x + d;
+}
+
+void vp9_init_quant_tables() {
+  int i, val = 4;
+
+  // A "real" q of 1.0 forces lossless mode.
+  // In practice non lossless Q's between 1.0 and 2.0 (represented here by
+  // integer values from 5-7 give poor rd results (lower psnr and often
+  // larger size than the lossless encode. To block out those "not very useful"
+  // values we increment the ac and dc q lookup values by 4 after position 0.
+  ac_qlookup[0] = val;
+  dc_qlookup[0] = val;
+  val += 4;
+
+  for (i = 1; i < QINDEX_RANGE; i++) {
+    const int ac_val = val;
+
+    val = (int)(val * 1.01975);
+    if (val == ac_val)
+      ++val;
+
+    ac_qlookup[i] = (int16_t)ac_val;
+    dc_qlookup[i] = (int16_t)MAX(ACDC_MIN, poly3(0.000000305, -0.00065, 0.9,
+                                                 0.5, ac_val));
+  }
+}
+#endif
+
 int16_t vp9_dc_quant(int qindex, int delta) {
  return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
 }
--- a/vp9/common/vp9_quant_common.h
+++ b/vp9/common/vp9_quant_common.h
@@ -22,6 +22,8 @@ extern "C" {
 #define QINDEX_RANGE (MAXQ - MINQ + 1)
 #define QINDEX_BITS 8

+void vp9_init_quant_tables();
+
 int16_t vp9_dc_quant(int qindex, int delta);
 int16_t vp9_ac_quant(int qindex, int delta);

--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -139,349 +139,9 @@ MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv,
  return clamped_mv;
 }

-#if CONFIG_MASKED_INTERINTER
-#define MASK_WEIGHT_BITS 6
-
-static int get_masked_weight(int m) {
-  #define SMOOTHER_LEN  32
-  static const uint8_t smoothfn[2 * SMOOTHER_LEN + 1] = {
-      0,  0,  0,  0,  0,  0,  0,  0,
-      0,  0,  0,  0,  0,  1,  1,  1,
-      1,  1,  2,  2,  3,  4,  5,  6,
-      8,  9, 12, 14, 17, 21, 24, 28,
-      32,
-      36, 40, 43, 47, 50, 52, 55, 56,
-      58, 59, 60, 61, 62, 62, 63, 63,
-      63, 63, 63, 64, 64, 64, 64, 64,
-      64, 64, 64, 64, 64, 64, 64, 64,
-  };
-  if (m < -SMOOTHER_LEN)
-    return 0;
-  else if (m > SMOOTHER_LEN)
-    return (1 << MASK_WEIGHT_BITS);
-  else
-    return smoothfn[m + SMOOTHER_LEN];
-}
-
-static int get_hard_mask(int m) {
-  return 1 << MASK_WEIGHT_BITS * (m > 0);
-}
-
-// Equation of line: f(x, y) = a[0]*(x - a[2]*w/4) + a[1]*(y - a[3]*h/4) = 0
-// The soft mask is obtained by computing f(x, y) and then calling
-// get_masked_weight(f(x, y)).
-static const int mask_params_sml[1 << MASK_BITS_SML][4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-};
-
-static const int mask_params_med_hgtw[1 << MASK_BITS_MED][4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-
-  {-1,  2, 2, 1},
-  { 1, -2, 2, 1},
-  {-1,  2, 2, 3},
-  { 1, -2, 2, 3},
-  { 1,  2, 2, 1},
-  {-1, -2, 2, 1},
-  { 1,  2, 2, 3},
-  {-1, -2, 2, 3},
-};
-
-static const int mask_params_med_hltw[1 << MASK_BITS_MED][4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-
-  {-2,  1, 1, 2},
-  { 2, -1, 1, 2},
-  {-2,  1, 3, 2},
-  { 2, -1, 3, 2},
-  { 2,  1, 1, 2},
-  {-2, -1, 1, 2},
-  { 2,  1, 3, 2},
-  {-2, -1, 3, 2},
-};
-
-static const int mask_params_med_heqw[1 << MASK_BITS_MED][4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-
-  { 0,  2, 0, 1},
-  { 0, -2, 0, 1},
-  { 0,  2, 0, 3},
-  { 0, -2, 0, 3},
-  { 2,  0, 1, 0},
-  {-2,  0, 1, 0},
-  { 2,  0, 3, 0},
-  {-2,  0, 3, 0},
-};
-
-static const int mask_params_big_hgtw[1 << MASK_BITS_BIG][4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-
-  {-1,  2, 2, 1},
-  { 1, -2, 2, 1},
-  {-1,  2, 2, 3},
-  { 1, -2, 2, 3},
-  { 1,  2, 2, 1},
-  {-1, -2, 2, 1},
-  { 1,  2, 2, 3},
-  {-1, -2, 2, 3},
-
-  {-2,  1, 1, 2},
-  { 2, -1, 1, 2},
-  {-2,  1, 3, 2},
-  { 2, -1, 3, 2},
-  { 2,  1, 1, 2},
-  {-2, -1, 1, 2},
-  { 2,  1, 3, 2},
-  {-2, -1, 3, 2},
-
-  { 0,  2, 0, 1},
-  { 0, -2, 0, 1},
-  { 0,  2, 0, 2},
-  { 0, -2, 0, 2},
-  { 0,  2, 0, 3},
-  { 0, -2, 0, 3},
-  { 2,  0, 2, 0},
-  {-2,  0, 2, 0},
-};
-
-static const int mask_params_big_hltw[1 << MASK_BITS_BIG][4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-
-  {-1,  2, 2, 1},
-  { 1, -2, 2, 1},
-  {-1,  2, 2, 3},
-  { 1, -2, 2, 3},
-  { 1,  2, 2, 1},
-  {-1, -2, 2, 1},
-  { 1,  2, 2, 3},
-  {-1, -2, 2, 3},
-
-  {-2,  1, 1, 2},
-  { 2, -1, 1, 2},
-  {-2,  1, 3, 2},
-  { 2, -1, 3, 2},
-  { 2,  1, 1, 2},
-  {-2, -1, 1, 2},
-  { 2,  1, 3, 2},
-  {-2, -1, 3, 2},
-
-  { 0,  2, 0, 2},
-  { 0, -2, 0, 2},
-  { 2,  0, 1, 0},
-  {-2,  0, 1, 0},
-  { 2,  0, 2, 0},
-  {-2,  0, 2, 0},
-  { 2,  0, 3, 0},
-  {-2,  0, 3, 0},
-};
-
-static const int mask_params_big_heqw[1 << MASK_BITS_BIG][4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-
-  {-1,  2, 2, 1},
-  { 1, -2, 2, 1},
-  {-1,  2, 2, 3},
-  { 1, -2, 2, 3},
-  { 1,  2, 2, 1},
-  {-1, -2, 2, 1},
-  { 1,  2, 2, 3},
-  {-1, -2, 2, 3},
-
-  {-2,  1, 1, 2},
-  { 2, -1, 1, 2},
-  {-2,  1, 3, 2},
-  { 2, -1, 3, 2},
-  { 2,  1, 1, 2},
-  {-2, -1, 1, 2},
-  { 2,  1, 3, 2},
-  {-2, -1, 3, 2},
-
-  { 0,  2, 0, 1},
-  { 0, -2, 0, 1},
-  { 0,  2, 0, 3},
-  { 0, -2, 0, 3},
-  { 2,  0, 1, 0},
-  {-2,  0, 1, 0},
-  { 2,  0, 3, 0},
-  {-2,  0, 3, 0},
-};
-
-static const int *get_mask_params(int mask_index,
-                                  BLOCK_SIZE sb_type,
-                                  int h, int w) {
-  const int *a;
-  const int mask_bits = get_mask_bits(sb_type);
-
-  if (mask_index == MASK_NONE)
-    return NULL;
-
-  if (mask_bits == MASK_BITS_SML) {
-    a = mask_params_sml[mask_index];
-  } else if (mask_bits == MASK_BITS_MED) {
-    if (h > w)
-      a = mask_params_med_hgtw[mask_index];
-    else if (h < w)
-      a = mask_params_med_hltw[mask_index];
-    else
-      a = mask_params_med_heqw[mask_index];
-  } else if (mask_bits == MASK_BITS_BIG) {
-    if (h > w)
-      a = mask_params_big_hgtw[mask_index];
-    else if (h < w)
-      a = mask_params_big_hltw[mask_index];
-    else
-      a = mask_params_big_heqw[mask_index];
-  } else {
-    assert(0);
-  }
-  return a;
-}
-
-void vp9_generate_masked_weight(int mask_index,
-                                BLOCK_SIZE sb_type,
-                                int h, int w,
-                                uint8_t *mask, int stride) {
-  int i, j;
-  const int *a = get_mask_params(mask_index, sb_type, h, w);
-  if (!a) return;
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) {
-      int x = (j - (a[2] * w) / 4);
-      int y = (i - (a[3] * h) / 4);
-      int m = a[0] * x + a[1] * y;
-      mask[i * stride + j] = get_masked_weight(m);
-    }
-}
-
-void vp9_generate_hard_mask(int mask_index, BLOCK_SIZE sb_type,
-                            int h, int w, uint8_t *mask, int stride) {
-  int i, j;
-  const int *a = get_mask_params(mask_index, sb_type, h, w);
-  if (!a) return;
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) {
-      int x = (j - (a[2] * w) / 4);
-      int y = (i - (a[3] * h) / 4);
-      int m = a[0] * x + a[1] * y;
-      mask[i * stride + j] = get_hard_mask(m);
-    }
-}
-
-static void build_masked_compound(uint8_t *dst, int dst_stride,
-                                  uint8_t *dst2, int dst2_stride,
-                                  int mask_index, BLOCK_SIZE sb_type,
-                                  int h, int w) {
-  int i, j;
-  uint8_t mask[4096];
-  vp9_generate_masked_weight(mask_index, sb_type, h, w, mask, 64);
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) {
-      int m = mask[i * 64 + j];
-      dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m +
-                                 dst2[i * dst2_stride + j] *
-                                 ((1 << MASK_WEIGHT_BITS) - m) +
-                                 (1 << (MASK_WEIGHT_BITS - 1))) >>
-                                 MASK_WEIGHT_BITS;
-    }
-}
-
-#if CONFIG_SUPERTX
-void generate_masked_weight_extend(int mask_index, int plane,
-                                   BLOCK_SIZE sb_type, int h, int w,
-                                   int mask_offset_x, int mask_offset_y,
-                                   uint8_t *mask, int stride) {
-  int i, j;
-  int subh = (plane ? 2 : 4) << b_height_log2(sb_type);
-  int subw = (plane ? 2 : 4) << b_width_log2(sb_type);
-  const int *a = get_mask_params(mask_index, sb_type, subh, subw);
-  if (!a) return;
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) {
-      int x = (j - (a[2] * subw) / 4 - mask_offset_x);
-      int y = (i - (a[3] * subh) / 4 - mask_offset_y);
-      int m = a[0] * x + a[1] * y;
-      mask[i * stride + j] = get_masked_weight(m);
-    }
-}
-
-static void build_masked_compound_extend(uint8_t *dst, int dst_stride,
-                                         uint8_t *dst2, int dst2_stride,
-                                         int plane,
-                                         int mask_index, BLOCK_SIZE sb_type,
-                                         int mask_offset_x, int mask_offset_y,
-                                         int h, int w) {
-  int i, j;
-  uint8_t mask[4096];
-  generate_masked_weight_extend(mask_index, plane, sb_type, h, w,
-                                mask_offset_x, mask_offset_y, mask, 64);
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) {
-      int m = mask[i * 64 + j];
-      dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m +
-                                 dst2[i * dst2_stride + j] *
-                                 ((1 << MASK_WEIGHT_BITS) - m) +
-                                 (1 << (MASK_WEIGHT_BITS - 1))) >>
-                                 MASK_WEIGHT_BITS;
-    }
-}
-#endif
-#endif
-
 static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
                                   int bw, int bh,
                                   int x, int y, int w, int h,
-#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER
-                                   int mask_offset_x, int mask_offset_y,
-#endif
                                   int mi_x, int mi_y) {
  struct macroblockd_plane *const pd = &xd->plane[plane];
  const MODE_INFO *mi = xd->mi[0];
@@ -533,27 +193,8 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
    pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride
           + (scaled_mv.col >> SUBPEL_BITS);

-#if CONFIG_MASKED_INTERINTER
-    if (ref && get_mask_bits(mi->mbmi.sb_type)
-        && mi->mbmi.use_masked_interinter) {
-      uint8_t tmp_dst[4096];
-      inter_predictor(pre, pre_buf->stride, tmp_dst, 64,
-                     subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys);
-#if CONFIG_SUPERTX
-      build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane,
-                                   mi->mbmi.mask_index, mi->mbmi.sb_type,
-                                   mask_offset_x, mask_offset_y, h, w);
-#else
-      build_masked_compound(dst, dst_buf->stride, tmp_dst, 64,
-                            mi->mbmi.mask_index, mi->mbmi.sb_type, h, w);
-#endif
-    } else {
-#endif
    inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
                    subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
-#if CONFIG_MASKED_INTERINTER
-    }
-#endif
  }
 }

@@ -577,18 +218,10 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
      for (y = 0; y < num_4x4_h; ++y)
        for (x = 0; x < num_4x4_w; ++x)
           build_inter_predictors(xd, plane, i++, bw, bh,
-                                  4 * x, 4 * y, 4, 4,
-#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER
-                                  0, 0,
-#endif
-                                  mi_x, mi_y);
+                                  4 * x, 4 * y, 4, 4, mi_x, mi_y);
    } else {
      build_inter_predictors(xd, plane, 0, bw, bh,
-                             0, 0, bw, bh,
-#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER
-                             0, 0,
-#endif
-                             mi_x, mi_y);
+                             0, 0, bw, bh, mi_x, mi_y);
    }
  }
 }
@@ -596,323 +229,23 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
 void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
                                    BLOCK_SIZE bsize) {
  build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0);
-#if CONFIG_INTERINTRA
-  if (xd->mi[0]->mbmi.ref_frame[1] == INTRA_FRAME &&
-      is_interintra_allowed(xd->mi[0]->mbmi.sb_type))
-    vp9_build_interintra_predictors_sby(xd, xd->plane[0].dst.buf,
-                                        xd->plane[0].dst.stride, bsize);
-#endif
 }
 void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
                                     BLOCK_SIZE bsize) {
  build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1,
                                    MAX_MB_PLANE - 1);
-#if CONFIG_INTERINTRA
-  if (xd->mi[0]->mbmi.ref_frame[1] == INTRA_FRAME &&
-      is_interintra_allowed(xd->mi[0]->mbmi.sb_type))
-    vp9_build_interintra_predictors_sbuv(xd, xd->plane[1].dst.buf,
-                                         xd->plane[2].dst.buf,
-                                         xd->plane[1].dst.stride,
-                                         xd->plane[2].dst.stride, bsize);
-#endif
 }
-
 void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
                                   BLOCK_SIZE bsize) {
  build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0,
                                    MAX_MB_PLANE - 1);
-#if CONFIG_INTERINTRA
-  if (xd->mi[0]->mbmi.ref_frame[1] == INTRA_FRAME &&
-      is_interintra_allowed(xd->mi[0]->mbmi.sb_type))
-    vp9_build_interintra_predictors(xd, xd->plane[0].dst.buf,
-                                    xd->plane[1].dst.buf, xd->plane[2].dst.buf,
-                                    xd->plane[0].dst.stride,
-                                    xd->plane[1].dst.stride,
-                                    xd->plane[2].dst.stride, bsize);
-#endif
 }

-#if CONFIG_SUPERTX
-static int get_masked_weight_supertx(int m) {
-  #define SMOOTHER_LEN  32
-  static const uint8_t smoothfn[2 * SMOOTHER_LEN + 1] = {
-      0,  0,  0,  0,  0,  0,  0,  0,
-      0,  0,  0,  0,  0,  1,  1,  1,
-      1,  1,  2,  2,  3,  4,  5,  6,
-      8,  9, 12, 14, 17, 21, 24, 28,
-      32,
-      36, 40, 43, 47, 50, 52, 55, 56,
-      58, 59, 60, 61, 62, 62, 63, 63,
-      63, 63, 63, 64, 64, 64, 64, 64,
-      64, 64, 64, 64, 64, 64, 64, 64,
-  };
-  if (m < -SMOOTHER_LEN)
-    return 0;
-  else if (m > SMOOTHER_LEN)
-    return 64;
-  else
-    return smoothfn[m + SMOOTHER_LEN];
-}
-
-static const uint8_t mask_8[8] = {
-  64, 64, 62, 52, 12,  2,  0,  0
-};
-
-static const uint8_t mask_16[16] = {
-  63, 62, 60, 58, 55, 50, 43, 36, 28, 21, 14, 9, 6, 4, 2, 1
-};
-
-static const uint8_t mask_32[32] = {
-  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 61, 57, 52, 45, 36,
-  28, 19, 12,  7,  3,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
-};
-
-static void generate_1dmask(int length, uint8_t *mask) {
-  int i;
-  switch (length) {
-    case 8:
-      vpx_memcpy(mask, mask_8, length);
-      break;
-    case 16:
-      vpx_memcpy(mask, mask_16, length);
-      break;
-    case 32:
-      vpx_memcpy(mask, mask_32, length);
-      break;
-    default:
-      assert(0);
-  }
-  if (length > 16) {
-    for (i = 0; i < length; ++i)
-      mask[i] = get_masked_weight_supertx(-1 * (2 * i - length + 1));
-  }
-}
-
-void vp9_build_masked_inter_predictor_complex(uint8_t *dst, int dst_stride,
-                                              uint8_t *dst2, int dst2_stride,
-                                              int plane,
-                                              int mi_row, int mi_col,
-                                              int mi_row_ori, int mi_col_ori,
-                                              BLOCK_SIZE bsize,
-                                              BLOCK_SIZE top_bsize,
-                                              PARTITION_TYPE partition) {
-  int i, j;
-  uint8_t mask[32];
-  int top_w = 4 << b_width_log2(top_bsize),
-      top_h = 4 << b_height_log2(top_bsize);
-  int w = 4 << b_width_log2(bsize), h = 4 << b_height_log2(bsize);
-  int w_offset = (mi_col - mi_col_ori) << 3,
-      h_offset = (mi_row - mi_row_ori) << 3;
-  int m;
-
-  if (plane > 0) {
-    top_w = top_w >> 1; top_h = top_h >> 1;
-    w = w >> 1; h = h >> 1;
-    w_offset = w_offset >> 1; h_offset = h_offset >> 1;
-  }
-  switch (partition) {
-    case PARTITION_HORZ:
-      generate_1dmask(h, mask + h_offset);
-      vpx_memset(mask, 64, h_offset);
-      vpx_memset(mask + h_offset + h, 0, top_h - h_offset - h);
-      break;
-    case PARTITION_VERT:
-      generate_1dmask(w, mask + w_offset);
-      vpx_memset(mask, 64, w_offset);
-      vpx_memset(mask + w_offset + w, 0, top_w - w_offset - w);
-      break;
-    default:
-      assert(0);
-  }
-  for (i = 0; i < top_h; ++i)
-    for (j = 0; j < top_w; ++j) {
-      m = partition == PARTITION_HORZ ? mask[i] : mask[j];
-      if (m == 64)
-        continue;
-      if (m == 0)
-        dst[i * dst_stride + j] = dst2[i * dst2_stride + j];
-      else
-        dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m +
-                                  dst2[i * dst2_stride + j] *
-                                  (64 - m) + 32) >> 6;
-    }
-}
-
-#if CONFIG_MASKED_INTERINTER
-void vp9_build_inter_predictors_sb_extend(MACROBLOCKD *xd,
-                                          int mi_row, int mi_col,
-                                          int mi_row_ori, int mi_col_ori,
-                                          BLOCK_SIZE bsize) {
-  int plane;
-  const int mi_x = mi_col_ori * MI_SIZE;
-  const int mi_y = mi_row_ori * MI_SIZE;
-  const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE;
-  const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE;
-  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
-                                                        &xd->plane[plane]);
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
-    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
-    const int bw = 4 * num_4x4_w;
-    const int bh = 4 * num_4x4_h;
-
-    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
-      int i = 0, x, y;
-      assert(bsize == BLOCK_8X8);
-      for (y = 0; y < num_4x4_h; ++y)
-        for (x = 0; x < num_4x4_w; ++x)
-           build_inter_predictors(xd, plane, i++, bw, bh, 4 * x, 4 * y, 4, 4,
-                                  mask_offset_x, mask_offset_y, mi_x, mi_y);
-    } else {
-      build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh,
-                             mask_offset_x, mask_offset_y, mi_x, mi_y);
-    }
-  }
-}
-#endif
-
-void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd,
-                                                  int mi_row, int mi_col,
-                                                  int mi_row_ori,
-                                                  int mi_col_ori,
-                                                  BLOCK_SIZE top_bsize,
-                                                  PARTITION_TYPE partition) {
-  const int mi_x = mi_col_ori * MI_SIZE;
-  const int mi_y = mi_row_ori * MI_SIZE;
-#if CONFIG_MASKED_INTERINTER
-  const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE;
-  const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE;
-#endif
-  uint8_t *orig_dst;
-  int orig_dst_stride;
-  int bw = 4 << b_width_log2(top_bsize);
-  int bh = 4 << b_height_log2(top_bsize);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, 32 * 32);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, 32 * 32);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, 32 * 32);
-
-  orig_dst = xd->plane[0].dst.buf;
-  orig_dst_stride = xd->plane[0].dst.stride;
-  build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                         mask_offset_x, mask_offset_y,
-#endif
-                         mi_x, mi_y);
-
-  xd->plane[0].dst.buf = tmp_buf;
-  xd->plane[0].dst.stride = 32;
-  switch (partition) {
-    case PARTITION_HORZ:
-      build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                             mask_offset_x, mask_offset_y,
-#endif
-                             mi_x, mi_y);
-      break;
-    case PARTITION_VERT:
-      build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                             mask_offset_x, mask_offset_y,
-#endif
-                             mi_x, mi_y);
-      break;
-    case PARTITION_SPLIT:
-      build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                             mask_offset_x, mask_offset_y,
-#endif
-                             mi_x, mi_y);
-      xd->plane[0].dst.buf = tmp_buf1;
-      xd->plane[0].dst.stride = 32;
-      build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                             mask_offset_x, mask_offset_y,
-#endif
-                             mi_x, mi_y);
-      xd->plane[0].dst.buf = tmp_buf2;
-      xd->plane[0].dst.stride = 32;
-      build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                             mask_offset_x, mask_offset_y,
-#endif
-                             mi_x, mi_y);
-      break;
-    default:
-      assert(0);
-  }
-
-  if (partition != PARTITION_SPLIT) {
-    vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride,
-                                             tmp_buf, 32,
-                                             0, mi_row, mi_col,
-                                             mi_row_ori, mi_col_ori,
-                                             BLOCK_8X8, top_bsize,
-                                             partition);
-    xd->plane[0].dst.buf = orig_dst;
-    xd->plane[0].dst.stride = orig_dst_stride;
-  } else {
-    vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride,
-                                             tmp_buf, 32,
-                                             0, mi_row, mi_col,
-                                             mi_row_ori, mi_col_ori,
-                                             BLOCK_8X8, top_bsize,
-                                             PARTITION_VERT);
-    vp9_build_masked_inter_predictor_complex(tmp_buf1, 32,
-                                             tmp_buf2, 32,
-                                             0, mi_row, mi_col,
-                                             mi_row_ori, mi_col_ori,
-                                             BLOCK_8X8, top_bsize,
-                                             PARTITION_VERT);
-    vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride,
-                                             tmp_buf1, 32,
-                                             0, mi_row, mi_col,
-                                             mi_row_ori, mi_col_ori,
-                                             BLOCK_8X8, top_bsize,
-                                             PARTITION_HORZ);
-    xd->plane[0].dst.buf = orig_dst;
-    xd->plane[0].dst.stride = orig_dst_stride;
-  }
-}
-
-void vp9_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd,
-#if CONFIG_MASKED_INTERINTER
-                                                   int mi_row, int mi_col,
-#endif
-                                                   int mi_row_ori,
-                                                   int mi_col_ori,
-                                                   BLOCK_SIZE top_bsize) {
-  int plane;
-  const int mi_x = mi_col_ori * MI_SIZE;
-  const int mi_y = mi_row_ori * MI_SIZE;
-#if CONFIG_MASKED_INTERINTER
-  const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE;
-  const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE;
-#endif
-  for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
-    const BLOCK_SIZE plane_bsize = get_plane_block_size(top_bsize,
-                                                        &xd->plane[plane]);
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
-    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
-    const int bw = 4 * num_4x4_w;
-    const int bh = 4 * num_4x4_h;
-
-    build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                           mask_offset_x, mask_offset_y,
-#endif
-                           mi_x, mi_y);
-  }
-}
-#endif
-
 // TODO(jingning): This function serves as a placeholder for decoder prediction
 // using on demand border extension. It should be moved to /decoder/ directory.
 static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
                                       int bw, int bh,
                                       int x, int y, int w, int h,
-#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER
-                                       int mask_offset_x, int mask_offset_y,
-#endif
                                       int mi_x, int mi_y) {
  struct macroblockd_plane *const pd = &xd->plane[plane];
  const MODE_INFO *mi = xd->mi[0];
@@ -1044,27 +377,8 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
      }
    }

-#if CONFIG_MASKED_INTERINTER
-    if (ref && get_mask_bits(mi->mbmi.sb_type)
-        && mi->mbmi.use_masked_interinter) {
-      uint8_t tmp_dst[4096];
-      inter_predictor(buf_ptr, buf_stride, tmp_dst, 64,
-                     subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys);
-#if CONFIG_SUPERTX
-      build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane,
-                                   mi->mbmi.mask_index, mi->mbmi.sb_type,
-                                   mask_offset_x, mask_offset_y, h, w);
-#else
-      build_masked_compound(dst, dst_buf->stride, tmp_dst, 64,
-                            mi->mbmi.mask_index, mi->mbmi.sb_type, h, w);
-#endif
-    } else {
-#endif
    inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
                    subpel_y, sf, w, h, ref, kernel, xs, ys);
-#if CONFIG_MASKED_INTERINTER
-    }
-#endif
  }
 }

@@ -1087,198 +401,13 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
      for (y = 0; y < num_4x4_h; ++y)
        for (x = 0; x < num_4x4_w; ++x)
          dec_build_inter_predictors(xd, plane, i++, bw, bh,
-                                     4 * x, 4 * y, 4, 4,
-#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER
-                                     0, 0,
-#endif
-                                     mi_x, mi_y);
+                                     4 * x, 4 * y, 4, 4, mi_x, mi_y);
    } else {
      dec_build_inter_predictors(xd, plane, 0, bw, bh,
-                                 0, 0, bw, bh,
-#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER
-                                 0, 0,
-#endif
-                                 mi_x, mi_y);
-    }
-  }
-#if CONFIG_INTERINTRA
-  if (xd->mi[0]->mbmi.ref_frame[1] == INTRA_FRAME &&
-      is_interintra_allowed(xd->mi[0]->mbmi.sb_type))
-    vp9_build_interintra_predictors(xd, xd->plane[0].dst.buf,
-                                    xd->plane[1].dst.buf, xd->plane[2].dst.buf,
-                                    xd->plane[0].dst.stride,
-                                    xd->plane[1].dst.stride,
-                                    xd->plane[2].dst.stride, bsize);
-#endif
-}
-
-#if CONFIG_SUPERTX
-#if CONFIG_MASKED_INTERINTER
-void vp9_dec_build_inter_predictors_sb_extend(MACROBLOCKD *xd,
-                                              int mi_row, int mi_col,
-                                              int mi_row_ori, int mi_col_ori,
-                                              BLOCK_SIZE bsize) {
-  int plane;
-  const int mi_x = mi_col_ori * MI_SIZE;
-  const int mi_y = mi_row_ori * MI_SIZE;
-  const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE;
-  const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE;
-  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
-                                                        &xd->plane[plane]);
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
-    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
-    const int bw = 4 * num_4x4_w;
-    const int bh = 4 * num_4x4_h;
-
-    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
-      int i = 0, x, y;
-      assert(bsize == BLOCK_8X8);
-      for (y = 0; y < num_4x4_h; ++y)
-        for (x = 0; x < num_4x4_w; ++x)
-          dec_build_inter_predictors(xd, plane, i++, bw, bh, 4 * x, 4 * y, 4, 4,
-                                     mask_offset_x, mask_offset_y, mi_x, mi_y);
-    } else {
-      dec_build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh,
-                                 mask_offset_x, mask_offset_y, mi_x, mi_y);
+                                 0, 0, bw, bh, mi_x, mi_y);
    }
  }
 }
-#endif
-
-void vp9_dec_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd,
-                                                  int mi_row, int mi_col,
-                                                  int mi_row_ori,
-                                                  int mi_col_ori,
-                                                  BLOCK_SIZE top_bsize,
-                                                  PARTITION_TYPE partition) {
-  const int mi_x = mi_col_ori * MI_SIZE;
-  const int mi_y = mi_row_ori * MI_SIZE;
-#if CONFIG_MASKED_INTERINTER
-  const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE;
-  const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE;
-#endif
-  uint8_t *orig_dst;
-  int orig_dst_stride;
-  int bw = 4 << b_width_log2(top_bsize);
-  int bh = 4 << b_height_log2(top_bsize);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, 32 * 32);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, 32 * 32);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, 32 * 32);
-
-  orig_dst = xd->plane[0].dst.buf;
-  orig_dst_stride = xd->plane[0].dst.stride;
-  dec_build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                             mask_offset_x, mask_offset_y,
-#endif
-                             mi_x, mi_y);
-
-  xd->plane[0].dst.buf = tmp_buf;
-  xd->plane[0].dst.stride = 32;
-  switch (partition) {
-    case PARTITION_HORZ:
-      dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                                 mask_offset_x, mask_offset_y,
-#endif
-                                 mi_x, mi_y);
-      break;
-    case PARTITION_VERT:
-      dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                                 mask_offset_x, mask_offset_y,
-#endif
-                                 mi_x, mi_y);
-      break;
-    case PARTITION_SPLIT:
-      dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                                 mask_offset_x, mask_offset_y,
-#endif
-                                 mi_x, mi_y);
-      xd->plane[0].dst.buf = tmp_buf1;
-      xd->plane[0].dst.stride = 32;
-      dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                                 mask_offset_x, mask_offset_y,
-#endif
-                                 mi_x, mi_y);
-      xd->plane[0].dst.buf = tmp_buf2;
-      xd->plane[0].dst.stride = 32;
-      dec_build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                                 mask_offset_x, mask_offset_y,
-#endif
-                                 mi_x, mi_y);
-      break;
-    default:
-      assert(0);
-  }
-
-  if (partition != PARTITION_SPLIT) {
-    vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride,
-                                             tmp_buf, 32,
-                                             0, mi_row, mi_col,
-                                             mi_row_ori, mi_col_ori,
-                                             BLOCK_8X8, top_bsize,
-                                             partition);
-    xd->plane[0].dst.buf = orig_dst;
-    xd->plane[0].dst.stride = orig_dst_stride;
-  } else {
-    vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride,
-                                             tmp_buf, 32,
-                                             0, mi_row, mi_col,
-                                             mi_row_ori, mi_col_ori,
-                                             BLOCK_8X8, top_bsize,
-                                             PARTITION_VERT);
-    vp9_build_masked_inter_predictor_complex(tmp_buf1, 32,
-                                             tmp_buf2, 32,
-                                             0, mi_row, mi_col,
-                                             mi_row_ori, mi_col_ori,
-                                             BLOCK_8X8, top_bsize,
-                                             PARTITION_VERT);
-    vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride,
-                                             tmp_buf1, 32,
-                                             0, mi_row, mi_col,
-                                             mi_row_ori, mi_col_ori,
-                                             BLOCK_8X8, top_bsize,
-                                             PARTITION_HORZ);
-    xd->plane[0].dst.buf = orig_dst;
-    xd->plane[0].dst.stride = orig_dst_stride;
-  }
-}
-
-void vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd,
-#if CONFIG_MASKED_INTERINTER
-                                                       int mi_row, int mi_col,
-#endif
-                                                       int mi_row_ori,
-                                                       int mi_col_ori,
-                                                       BLOCK_SIZE top_bsize) {
-  int plane;
-  const int mi_x = mi_col_ori * MI_SIZE;
-  const int mi_y = mi_row_ori * MI_SIZE;
-#if CONFIG_MASKED_INTERINTER
-  const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE;
-  const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE;
-#endif
-  for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
-    const BLOCK_SIZE plane_bsize = get_plane_block_size(top_bsize,
-                                                        &xd->plane[plane]);
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
-    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
-    const int bw = 4 * num_4x4_w;
-    const int bh = 4 * num_4x4_h;
-
-    dec_build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_MASKED_INTERINTER
-                               mask_offset_x, mask_offset_y,
-#endif
-                               mi_x, mi_y);
-  }
-}
-#endif

 void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
                          const YV12_BUFFER_CONFIG *src,
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -65,60 +65,6 @@ void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx,
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
                          const struct scale_factors *sf);

-#if CONFIG_MASKED_INTERINTER
-void vp9_generate_masked_weight(int mask_index, BLOCK_SIZE sb_type,
-                              int h, int w, uint8_t *mask, int stride);
-void vp9_generate_hard_mask(int mask_index, BLOCK_SIZE sb_type,
-                          int h, int w, uint8_t *mask, int stride);
-#endif
-
-#if CONFIG_SUPERTX
-void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd,
-                                                  int mi_row, int mi_col,
-                                                  int mi_row_ori,
-                                                  int mi_col_ori,
-                                                  BLOCK_SIZE top_bsize,
-                                                  PARTITION_TYPE partition);
-void vp9_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd,
-#if CONFIG_MASKED_INTERINTER
-                                                   int mi_row, int mi_col,
-#endif
-                                                   int mi_row_ori,
-                                                   int mi_col_ori,
-                                                   BLOCK_SIZE top_bsize);
-void vp9_build_masked_inter_predictor_complex(uint8_t *dst, int dst_stride,
-                                              uint8_t *dst2, int dst2_stride,
-                                              int plane,
-                                              int mi_row, int mi_col,
-                                              int mi_row_ori, int mi_col_ori,
-                                              BLOCK_SIZE bsize,
-                                              BLOCK_SIZE top_bsize,
-                                              PARTITION_TYPE partition);
-void vp9_dec_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd,
-                                                      int mi_row, int mi_col,
-                                                      int mi_row_ori,
-                                                      int mi_col_ori,
-                                                      BLOCK_SIZE top_bsize,
-                                                      PARTITION_TYPE p);
-void vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd,
-#if CONFIG_MASKED_INTERINTER
-                                                       int mi_row, int mi_col,
-#endif
-                                                       int mi_row_ori,
-                                                       int mi_col_ori,
-                                                       BLOCK_SIZE top_bsize);
-#if CONFIG_MASKED_INTERINTER
-void vp9_build_inter_predictors_sb_extend(MACROBLOCKD *xd,
-                                          int mi_row, int mi_col,
-                                          int mi_row_ori, int mi_col_ori,
-                                          BLOCK_SIZE bsize);
-void vp9_dec_build_inter_predictors_sb_extend(MACROBLOCKD *xd,
-                                              int mi_row, int mi_col,
-                                              int mi_row_ori, int mi_col_ori,
-                                              BLOCK_SIZE bsize);
-#endif
-#endif
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -444,227 +444,8 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
  }
 }

-#if CONFIG_FILTERINTRA
-static void filter_intra_predictors_4tap(uint8_t *ypred_ptr, int y_stride,
-                                         int bs,
-                                         const uint8_t *yabove_row,
-                                         const uint8_t *yleft_col,
-                                         int mode) {
-  static const int prec_bits = 10;
-  static const int round_val = 511;
-
-  int k, r, c;
-  int pred[33][33];
-  int mean, ipred;
-
-  int taps4_4[10][4] = {
-      {735, 881, -537, -54},
-      {1005, 519, -488, -11},
-      {383, 990, -343, -6},
-      {442, 805, -542, 319},
-      {658, 616, -133, -116},
-      {875, 442, -141, -151},
-      {386, 741, -23, -80},
-      {390, 1027, -446, 51},
-      {679, 606, -523, 262},
-      {903, 922, -778, -23}
-  };
-  int taps4_8[10][4] = {
-      {648, 803, -444, 16},
-      {972, 620, -576, 7},
-      {561, 967, -499, -5},
-      {585, 762, -468, 144},
-      {596, 619, -182, -9},
-      {895, 459, -176, -153},
-      {557, 722, -126, -129},
-      {601, 839, -523, 105},
-      {562, 709, -499, 251},
-      {803, 872, -695, 43}
-  };
-  int taps4_16[10][4] = {
-      {423, 728, -347, 111},
-      {963, 685, -665, 23},
-      {281, 1024, -480, 216},
-      {640, 596, -437, 78},
-      {429, 669, -259, 99},
-      {740, 646, -415, 23},
-      {568, 771, -346, 40},
-      {404, 833, -486, 209},
-      {398, 712, -423, 307},
-      {939, 935, -887, 17}
-  };
-  int taps4_32[10][4] = {
-      {477, 737, -393, 150},
-      {881, 630, -546, 67},
-      {506, 984, -443, -20},
-      {114, 459, -270, 528},
-      {433, 528, 14, 3},
-      {837, 470, -301, -30},
-      {181, 777, 89, -107},
-      {-29, 716, -232, 259},
-      {589, 646, -495, 255},
-      {740, 884, -728, 77}
-  };
-
-  const int c1 = (bs >= 32) ? taps4_32[mode][0] : ((bs >= 16) ?
-      taps4_16[mode][0] : ((bs >= 8) ? taps4_8[mode][0] : taps4_4[mode][0]));
-  const int c2 = (bs >= 32) ? taps4_32[mode][1] : ((bs >= 16) ?
-      taps4_16[mode][1] : ((bs >= 8) ? taps4_8[mode][1] : taps4_4[mode][1]));
-  const int c3 = (bs >= 32) ? taps4_32[mode][2] : ((bs >= 16) ?
-      taps4_16[mode][2] : ((bs >= 8) ? taps4_8[mode][2] : taps4_4[mode][2]));
-  const int c4 = (bs >= 32) ? taps4_32[mode][3] : ((bs >= 16) ?
-      taps4_16[mode][3] : ((bs >= 8) ? taps4_8[mode][3] : taps4_4[mode][3]));
-
-  k = 0;
-  mean = 0;
-  while (k < bs) {
-    mean = mean + (int)yleft_col[k];
-    mean = mean + (int)yabove_row[k];
-    k++;
-  }
-  mean = (mean + bs) / (2 * bs);
-
-  for (r = 0; r < bs; r++)
-    pred[r + 1][0] = (int)yleft_col[r] - mean;
-
-  for (c = 0; c < 2 * bs + 1; c++)
-    pred[0][c] = (int)yabove_row[c - 1] - mean;
-
-  for (r = 1; r < bs + 1; r++)
-    for (c = 1; c < 2 * bs + 1 - r; c++) {
-      ipred = c1 * pred[r - 1][c] + c2 * pred[r][c - 1]
-                    + c3 * pred[r - 1][c - 1] + c4 * pred[r - 1][c + 1];
-      pred[r][c] = ipred < 0 ? -((-ipred + round_val) >> prec_bits) :
-                               ((ipred + round_val) >> prec_bits);
-    }
-
-  for (r = 0; r < bs; r++) {
-    for (c = 0; c < bs; c++) {
-      ipred = pred[r + 1][c + 1] + mean;
-      ypred_ptr[c] = clip_pixel(ipred);
-    }
-    ypred_ptr += y_stride;
-  }
-}
-
-static void build_filter_intra_predictors(const MACROBLOCKD *xd,
-                                          const uint8_t *ref, int ref_stride,
-                                          uint8_t *dst, int dst_stride,
-                                          PREDICTION_MODE mode, TX_SIZE tx_size,
-                                          int up_available, int left_available,
-                                          int right_available, int x, int y,
-                                          int plane) {
-  int i;
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, left_col, 64);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, above_data, 128 + 16);
-  uint8_t *above_row = above_data + 16;
-  const uint8_t *const_above_row = above_row;
-  const int bs = 4 << tx_size;
-  int frame_width, frame_height;
-  int x0, y0;
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-
-  // Get current frame pointer, width and height.
-  if (plane == 0) {
-    frame_width = xd->cur_buf->y_width;
-    frame_height = xd->cur_buf->y_height;
-  } else {
-    frame_width = xd->cur_buf->uv_width;
-    frame_height = xd->cur_buf->uv_height;
-  }
-
-  // Get block position in current frame.
-  x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
-  y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
-
-  vpx_memset(left_col, 129, 64);
-
-  // left
-  if (left_available) {
-    if (xd->mb_to_bottom_edge < 0) {
-      /* slower path if the block needs border extension */
-      if (y0 + bs <= frame_height) {
-        for (i = 0; i < bs; ++i)
-          left_col[i] = ref[i * ref_stride - 1];
-      } else {
-        const int extend_bottom = frame_height - y0;
-        for (i = 0; i < extend_bottom; ++i)
-          left_col[i] = ref[i * ref_stride - 1];
-        for (; i < bs; ++i)
-          left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
-      }
-    } else {
-      /* faster path if the block does not need extension */
-      for (i = 0; i < bs; ++i)
-        left_col[i] = ref[i * ref_stride - 1];
-    }
-  }
-
-  // TODO(hkuang) do not extend 2*bs pixels for all modes.
-  // above
-  if (up_available) {
-    const uint8_t *above_ref = ref - ref_stride;
-    if (xd->mb_to_right_edge < 0) {
-      /* slower path if the block needs border extension */
-      if (x0 + 2 * bs <= frame_width) {
-        if (right_available && bs == 4) {
-          vpx_memcpy(above_row, above_ref, 2 * bs);
-        } else {
-          vpx_memcpy(above_row, above_ref, bs);
-          vpx_memset(above_row + bs, above_row[bs - 1], bs);
-        }
-      } else if (x0 + bs <= frame_width) {
-        const int r = frame_width - x0;
-        if (right_available && bs == 4) {
-          vpx_memcpy(above_row, above_ref, r);
-          vpx_memset(above_row + r, above_row[r - 1],
-                     x0 + 2 * bs - frame_width);
-        } else {
-          vpx_memcpy(above_row, above_ref, bs);
-          vpx_memset(above_row + bs, above_row[bs - 1], bs);
-        }
-      } else if (x0 <= frame_width) {
-        const int r = frame_width - x0;
-        if (right_available && bs == 4) {
-          vpx_memcpy(above_row, above_ref, r);
-          vpx_memset(above_row + r, above_row[r - 1],
-                     x0 + 2 * bs - frame_width);
-        } else {
-          vpx_memcpy(above_row, above_ref, r);
-          vpx_memset(above_row + r, above_row[r - 1],
-                     x0 + 2 * bs - frame_width);
-        }
-      }
-      above_row[-1] = left_available ? above_ref[-1] : 129;
-    } else {
-      /* faster path if the block does not need extension */
-      if (bs == 4 && right_available && left_available) {
-        const_above_row = above_ref;
-      } else {
-        vpx_memcpy(above_row, above_ref, bs);
-        if (bs == 4 && right_available)
-          vpx_memcpy(above_row + bs, above_ref + bs, bs);
-        else
-          vpx_memset(above_row + bs, above_row[bs - 1], bs);
-        above_row[-1] = left_available ? above_ref[-1] : 129;
-      }
-    }
-  } else {
-    vpx_memset(above_row, 127, bs * 2);
-    above_row[-1] = 127;
-  }
-
-  // predict
-  filter_intra_predictors_4tap(dst, dst_stride, bs, const_above_row, left_col,
-                               mode);
-}
-#endif
-
 void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
                             TX_SIZE tx_size, PREDICTION_MODE mode,
-#if CONFIG_FILTERINTRA
-                             int filterbit,
-#endif
                             const uint8_t *ref, int ref_stride,
                             uint8_t *dst, int dst_stride,
                             int aoff, int loff, int plane) {
@@ -675,708 +456,8 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
  const int have_right = ((block_idx & wmask) != wmask);
  const int x = aoff * 4;
  const int y = loff * 4;
-#if CONFIG_FILTERINTRA
-  const int filterflag = is_filter_allowed(mode) && is_filter_enabled(tx_size)
-                         && filterbit;
-#endif

  assert(bwl >= 0);
-#if CONFIG_FILTERINTRA
-  if (!filterflag) {
-#endif
  build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
                         have_top, have_left, have_right, x, y, plane);
-#if CONFIG_FILTERINTRA
-  } else {
-    build_filter_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode,
-                        tx_size, have_top, have_left, have_right, x, y, plane);
-  }
-#endif
 }
-
-#if CONFIG_INTERINTRA
-#if CONFIG_MASKED_INTERINTRA
-#define MASK_WEIGHT_BITS_INTERINTRA 6
-
-static int get_masked_weight_interintra(int m) {
-  #define SMOOTHER_LEN_INTERINTRA  32
-  static const uint8_t smoothfn[2 * SMOOTHER_LEN_INTERINTRA + 1] = {
-      0,  0,  0,  0,  0,  0,  0,  0,
-      0,  0,  0,  0,  0,  1,  1,  1,
-      1,  1,  2,  2,  3,  4,  5,  6,
-      8,  9, 12, 14, 17, 21, 24, 28,
-      32,
-      36, 40, 43, 47, 50, 52, 55, 56,
-      58, 59, 60, 61, 62, 62, 63, 63,
-      63, 63, 63, 64, 64, 64, 64, 64,
-      64, 64, 64, 64, 64, 64, 64, 64,
-  };
-  if (m < -SMOOTHER_LEN_INTERINTRA)
-    return 0;
-  else if (m > SMOOTHER_LEN_INTERINTRA)
-    return (1 << MASK_WEIGHT_BITS_INTERINTRA);
-  else
-    return smoothfn[m + SMOOTHER_LEN_INTERINTRA];
-}
-
-static int get_hard_mask_interintra(int m) {
-  return m > 0;
-}
-
-// Equation of line: f(x, y) = a[0]*(x - a[2]*w/4) + a[1]*(y - a[3]*h/4) = 0
-// The soft mask is obtained by computing f(x, y) and then calling
-// get_masked_weight(f(x, y)).
-static const int mask_params_sml_interintra[1 << MASK_BITS_SML_INTERINTRA]
-                                            [4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-};
-
-static const int mask_params_med_hgtw_interintra[1 << MASK_BITS_MED_INTERINTRA]
-                                                 [4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-
-  {-1,  2, 2, 1},
-  { 1, -2, 2, 1},
-  {-1,  2, 2, 3},
-  { 1, -2, 2, 3},
-  { 1,  2, 2, 1},
-  {-1, -2, 2, 1},
-  { 1,  2, 2, 3},
-  {-1, -2, 2, 3},
-};
-
-static const int mask_params_med_hltw_interintra[1 << MASK_BITS_MED_INTERINTRA]
-                                                 [4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-
-  {-2,  1, 1, 2},
-  { 2, -1, 1, 2},
-  {-2,  1, 3, 2},
-  { 2, -1, 3, 2},
-  { 2,  1, 1, 2},
-  {-2, -1, 1, 2},
-  { 2,  1, 3, 2},
-  {-2, -1, 3, 2},
-};
-
-static const int mask_params_med_heqw_interintra[1 << MASK_BITS_MED_INTERINTRA]
-                                                 [4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-
-  { 0,  2, 0, 1},
-  { 0, -2, 0, 1},
-  { 0,  2, 0, 3},
-  { 0, -2, 0, 3},
-  { 2,  0, 1, 0},
-  {-2,  0, 1, 0},
-  { 2,  0, 3, 0},
-  {-2,  0, 3, 0},
-};
-
-static const int mask_params_big_hgtw_interintra[1 << MASK_BITS_BIG_INTERINTRA]
-                                                 [4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-
-  {-1,  2, 2, 1},
-  { 1, -2, 2, 1},
-  {-1,  2, 2, 3},
-  { 1, -2, 2, 3},
-  { 1,  2, 2, 1},
-  {-1, -2, 2, 1},
-  { 1,  2, 2, 3},
-  {-1, -2, 2, 3},
-
-  {-2,  1, 1, 2},
-  { 2, -1, 1, 2},
-  {-2,  1, 3, 2},
-  { 2, -1, 3, 2},
-  { 2,  1, 1, 2},
-  {-2, -1, 1, 2},
-  { 2,  1, 3, 2},
-  {-2, -1, 3, 2},
-
-  { 0,  2, 0, 1},
-  { 0, -2, 0, 1},
-  { 0,  2, 0, 2},
-  { 0, -2, 0, 2},
-  { 0,  2, 0, 3},
-  { 0, -2, 0, 3},
-  { 2,  0, 2, 0},
-  {-2,  0, 2, 0},
-};
-
-static const int mask_params_big_hltw_interintra[1 << MASK_BITS_BIG_INTERINTRA]
-                                                 [4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-
-  {-1,  2, 2, 1},
-  { 1, -2, 2, 1},
-  {-1,  2, 2, 3},
-  { 1, -2, 2, 3},
-  { 1,  2, 2, 1},
-  {-1, -2, 2, 1},
-  { 1,  2, 2, 3},
-  {-1, -2, 2, 3},
-
-  {-2,  1, 1, 2},
-  { 2, -1, 1, 2},
-  {-2,  1, 3, 2},
-  { 2, -1, 3, 2},
-  { 2,  1, 1, 2},
-  {-2, -1, 1, 2},
-  { 2,  1, 3, 2},
-  {-2, -1, 3, 2},
-
-  { 0,  2, 0, 2},
-  { 0, -2, 0, 2},
-  { 2,  0, 1, 0},
-  {-2,  0, 1, 0},
-  { 2,  0, 2, 0},
-  {-2,  0, 2, 0},
-  { 2,  0, 3, 0},
-  {-2,  0, 3, 0},
-};
-
-static const int mask_params_big_heqw_interintra[1 << MASK_BITS_BIG_INTERINTRA]
-                                                 [4] = {
-  {-1,  2, 2, 2},
-  { 1, -2, 2, 2},
-  {-2,  1, 2, 2},
-  { 2, -1, 2, 2},
-  { 2,  1, 2, 2},
-  {-2, -1, 2, 2},
-  { 1,  2, 2, 2},
-  {-1, -2, 2, 2},
-
-  {-1,  2, 2, 1},
-  { 1, -2, 2, 1},
-  {-1,  2, 2, 3},
-  { 1, -2, 2, 3},
-  { 1,  2, 2, 1},
-  {-1, -2, 2, 1},
-  { 1,  2, 2, 3},
-  {-1, -2, 2, 3},
-
-  {-2,  1, 1, 2},
-  { 2, -1, 1, 2},
-  {-2,  1, 3, 2},
-  { 2, -1, 3, 2},
-  { 2,  1, 1, 2},
-  {-2, -1, 1, 2},
-  { 2,  1, 3, 2},
-  {-2, -1, 3, 2},
-
-  { 0,  2, 0, 1},
-  { 0, -2, 0, 1},
-  { 0,  2, 0, 3},
-  { 0, -2, 0, 3},
-  { 2,  0, 1, 0},
-  {-2,  0, 1, 0},
-  { 2,  0, 3, 0},
-  {-2,  0, 3, 0},
-};
-
-static const int *get_mask_params_interintra(int mask_index,
-                                             BLOCK_SIZE sb_type,
-                                             int h, int w) {
-  const int *a;
-  const int mask_bits = get_mask_bits_interintra(sb_type);
-
-  if (mask_index == MASK_NONE_INTERINTRA)
-    return NULL;
-
-  if (mask_bits == MASK_BITS_SML_INTERINTRA) {
-    a = mask_params_sml_interintra[mask_index];
-  } else if (mask_bits == MASK_BITS_MED_INTERINTRA) {
-    if (h > w)
-      a = mask_params_med_hgtw_interintra[mask_index];
-    else if (h < w)
-      a = mask_params_med_hltw_interintra[mask_index];
-    else
-      a = mask_params_med_heqw_interintra[mask_index];
-  } else if (mask_bits == MASK_BITS_BIG_INTERINTRA) {
-    if (h > w)
-      a = mask_params_big_hgtw_interintra[mask_index];
-    else if (h < w)
-      a = mask_params_big_hltw_interintra[mask_index];
-    else
-      a = mask_params_big_heqw_interintra[mask_index];
-  } else {
-    assert(0);
-  }
-  return a;
-}
-
-void vp9_generate_masked_weight_interintra(int mask_index,
-                                           BLOCK_SIZE sb_type,
-                                           int h, int w,
-                                           uint8_t *mask, int stride) {
-  int i, j;
-  const int *a = get_mask_params_interintra(mask_index, sb_type, h, w);
-  if (!a) return;
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) {
-      int x = (j - (a[2] * w) / 4);
-      int y = (i - (a[3] * h) / 4);
-      int m = a[0] * x + a[1] * y;
-      mask[i * stride + j] = get_masked_weight_interintra(m);
-    }
-}
-
-void vp9_generate_hard_mask_interintra(int mask_index, BLOCK_SIZE sb_type,
-                            int h, int w, uint8_t *mask, int stride) {
-  int i, j;
-  const int *a = get_mask_params_interintra(mask_index, sb_type, h, w);
-  if (!a) return;
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) {
-      int x = (j - (a[2] * w) / 4);
-      int y = (i - (a[3] * h) / 4);
-      int m = a[0] * x + a[1] * y;
-      mask[i * stride + j] = get_hard_mask_interintra(m);
-    }
-}
-#endif
-
-static void combine_interintra(PREDICTION_MODE mode,
-#if CONFIG_MASKED_INTERINTRA
-                               int use_masked_interintra,
-                               int mask_index,
-                               BLOCK_SIZE bsize,
-#endif
-                               uint8_t *comppred,
-                               int compstride,
-                               uint8_t *interpred,
-                               int interstride,
-                               uint8_t *intrapred,
-                               int intrastride,
-                               int bw, int bh) {
-  static const int scale_bits = 8;
-  static const int scale_max = 256;
-  static const int scale_round = 127;
-  static const int weights1d[64] = {
-      128, 125, 122, 119, 116, 114, 111, 109,
-      107, 105, 103, 101,  99,  97,  96,  94,
-       93,  91,  90,  89,  88,  86,  85,  84,
-       83,  82,  81,  81,  80,  79,  78,  78,
-       77,  76,  76,  75,  75,  74,  74,  73,
-       73,  72,  72,  71,  71,  71,  70,  70,
-       70,  70,  69,  69,  69,  69,  68,  68,
-       68,  68,  68,  67,  67,  67,  67,  67,
-  };
-
-  int size = MAX(bw, bh);
-  int size_scale = (size >= 64 ? 1 :
-                    size == 32 ? 2 :
-                    size == 16 ? 4 :
-                    size == 8  ? 8 : 16);
-  int i, j;
-
-#if CONFIG_MASKED_INTERINTRA
-  if (use_masked_interintra && get_mask_bits_interintra(bsize)) {
-    uint8_t mask[4096];
-    vp9_generate_masked_weight_interintra(mask_index, bsize, bh, bw, mask, bw);
-    for (i = 0; i < bh; ++i) {
-      for (j = 0; j < bw; ++j) {
-        int m = mask[i * bw + j];
-        comppred[i * compstride + j] =
-            (intrapred[i * intrastride + j] * m +
-            interpred[i * interstride + j] *
-            ((1 << MASK_WEIGHT_BITS_INTERINTRA) - m) +
-            (1 << (MASK_WEIGHT_BITS_INTERINTRA - 1))) >>
-            MASK_WEIGHT_BITS_INTERINTRA;
-      }
-    }
-    return;
-  }
-#endif
-
-  switch (mode) {
-    case V_PRED:
-      for (i = 0; i < bh; ++i) {
-        for (j = 0; j < bw; ++j) {
-          int scale = weights1d[i * size_scale];
-            comppred[i * compstride + j] =
-              ((scale_max - scale) * interpred[i * interstride + j] +
-               scale * intrapred[i * intrastride + j] + scale_round)
-               >> scale_bits;
-        }
-      }
-     break;
-
-    case H_PRED:
-      for (i = 0; i < bh; ++i) {
-        for (j = 0; j < bw; ++j) {
-          int scale = weights1d[j * size_scale];
-            comppred[i * compstride + j] =
-              ((scale_max - scale) * interpred[i * interstride + j] +
-               scale * intrapred[i * intrastride + j] + scale_round)
-               >> scale_bits;
-        }
-      }
-     break;
-
-    case D63_PRED:
-    case D117_PRED:
-      for (i = 0; i < bh; ++i) {
-        for (j = 0; j < bw; ++j) {
-          int scale = (weights1d[i * size_scale] * 3 +
-                       weights1d[j * size_scale]) >> 2;
-            comppred[i * compstride + j] =
-              ((scale_max - scale) * interpred[i * interstride + j] +
-                  scale * intrapred[i * intrastride + j] + scale_round)
-                  >> scale_bits;
-        }
-      }
-     break;
-
-    case D207_PRED:
-    case D153_PRED:
-      for (i = 0; i < bh; ++i) {
-        for (j = 0; j < bw; ++j) {
-          int scale = (weights1d[j * size_scale] * 3 +
-                       weights1d[i * size_scale]) >> 2;
-            comppred[i * compstride + j] =
-              ((scale_max - scale) * interpred[i * interstride + j] +
-                  scale * intrapred[i * intrastride + j] + scale_round)
-                  >> scale_bits;
-        }
-      }
-     break;
-
-    case D135_PRED:
-      for (i = 0; i < bh; ++i) {
-        for (j = 0; j < bw; ++j) {
-          int scale = weights1d[(i < j ? i : j) * size_scale];
-            comppred[i * compstride + j] =
-              ((scale_max - scale) * interpred[i * interstride + j] +
-                  scale * intrapred[i * intrastride + j] + scale_round)
-                  >> scale_bits;
-        }
-      }
-     break;
-
-    case D45_PRED:
-      for (i = 0; i < bh; ++i) {
-        for (j = 0; j < bw; ++j) {
-          int scale = (weights1d[i * size_scale] +
-                       weights1d[j * size_scale]) >> 1;
-            comppred[i * compstride + j] =
-              ((scale_max - scale) * interpred[i * interstride + j] +
-                  scale * intrapred[i * intrastride + j] + scale_round)
-                  >> scale_bits;
-        }
-      }
-     break;
-
-    case TM_PRED:
-    case DC_PRED:
-    default:
-      for (i = 0; i < bh; ++i) {
-        for (j = 0; j < bw; ++j) {
-            comppred[i * compstride + j] = (interpred[i * interstride + j] +
-                intrapred[i * intrastride + j]) >> 1;
-        }
-      }
-      break;
-  }
-}
-
-
-static void build_intra_predictors_for_2nd_block_interintra
-                                  (const MACROBLOCKD *xd, const uint8_t *ref,
-                                   int ref_stride, uint8_t *dst, int dst_stride,
-                                   PREDICTION_MODE mode, TX_SIZE tx_size,
-                                   int up_available, int left_available,
-                                   int right_available, int bwltbh,
-                                   int x, int y, int plane) {
-  int i;
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, left_col, 64);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, above_data, 128 + 16);
-  uint8_t *above_row = above_data + 16;
-  const uint8_t *const_above_row = above_row;
-  const int bs = 4 << tx_size;
-  int frame_width, frame_height;
-  int x0, y0;
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const uint8_t *ref_fi;
-  int ref_stride_fi;
-
-  // 127 127 127 .. 127 127 127 127 127 127
-  // 129  A   B  ..  Y   Z
-  // 129  C   D  ..  W   X
-  // 129  E   F  ..  U   V
-  // 129  G   H  ..  S   T   T   T   T   T
-  // ..
-
-  once(init_intra_pred_fn_ptrs);
-
-  // Get current frame pointer, width and height.
-  if (plane == 0) {
-    frame_width = xd->cur_buf->y_width;
-    frame_height = xd->cur_buf->y_height;
-  } else {
-    frame_width = xd->cur_buf->uv_width;
-    frame_height = xd->cur_buf->uv_height;
-  }
-
-  // Get block position in current frame.
-  x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
-  y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
-
-  vpx_memset(left_col, 129, 64);
-
-  // left
-  if (left_available) {
-    if (bwltbh) {
-      ref_fi = ref;
-      ref_stride_fi = ref_stride;
-    } else {
-      ref_fi = dst;
-      ref_stride_fi = dst_stride;
-    }
-    if (xd->mb_to_bottom_edge < 0) {
-      /* slower path if the block needs border extension */
-      if (y0 + bs <= frame_height) {
-        for (i = 0; i < bs; ++i)
-          left_col[i] = ref_fi[i * ref_stride_fi - 1];
-      } else {
-        const int extend_bottom = frame_height - y0;
-        assert(extend_bottom >= 0);
-        for (i = 0; i < extend_bottom; ++i)
-          left_col[i] = ref_fi[i * ref_stride_fi - 1];
-        for (; i < bs; ++i)
-          left_col[i] = ref_fi[(extend_bottom - 1) * ref_stride_fi - 1];
-      }
-    } else {
-      /* faster path if the block does not need extension */
-      for (i = 0; i < bs; ++i)
-        left_col[i] = ref_fi[i * ref_stride_fi - 1];
-    }
-  }
-
-  // TODO(hkuang) do not extend 2*bs pixels for all modes.
-  // above
-  if (up_available) {
-    const uint8_t *above_ref;
-    if (bwltbh) {
-      ref_fi = dst;
-      ref_stride_fi = dst_stride;
-      above_row[-1] = left_available ? ref[-ref_stride-1] : 129;
-    } else {
-      ref_fi = ref;
-      ref_stride_fi = ref_stride;
-      above_row[-1] = ref[-ref_stride-1];
-    }
-    above_ref = ref_fi - ref_stride_fi;
-    if (xd->mb_to_right_edge < 0) {
-      /* slower path if the block needs border extension */
-      if (x0 + 2 * bs <= frame_width) {
-        if (right_available && bs == 4) {
-          vpx_memcpy(above_row, above_ref, 2 * bs);
-        } else {
-          vpx_memcpy(above_row, above_ref, bs);
-          vpx_memset(above_row + bs, above_row[bs - 1], bs);
-        }
-      } else if (x0 + bs <= frame_width) {
-        const int r = frame_width - x0;
-        if (right_available && bs == 4) {
-          vpx_memcpy(above_row, above_ref, r);
-          vpx_memset(above_row + r, above_row[r - 1],
-                     x0 + 2 * bs - frame_width);
-        } else {
-          vpx_memcpy(above_row, above_ref, bs);
-          vpx_memset(above_row + bs, above_row[bs - 1], bs);
-        }
-      } else if (x0 <= frame_width) {
-        const int r = frame_width - x0;
-        assert(r >= 0);
-        if (right_available && bs == 4) {
-          vpx_memcpy(above_row, above_ref, r);
-          vpx_memset(above_row + r, above_row[r - 1],
-                     x0 + 2 * bs - frame_width);
-        } else {
-          vpx_memcpy(above_row, above_ref, r);
-          vpx_memset(above_row + r, above_row[r - 1],
-                     x0 + 2 * bs - frame_width);
-        }
-      }
-    } else {
-      /* faster path if the block does not need extension */
-      if (bs == 4 && right_available && left_available) {
-        const_above_row = above_ref;
-      } else {
-        vpx_memcpy(above_row, above_ref, bs);
-        if (bs == 4 && right_available)
-          vpx_memcpy(above_row + bs, above_ref + bs, bs);
-        else
-          vpx_memset(above_row + bs, above_row[bs - 1], bs);
-      }
-    }
-  } else {
-    vpx_memset(above_row, 127, bs * 2);
-    above_row[-1] = 127;
-  }
-
-  // predict
-  if (mode == DC_PRED) {
-    dc_pred[left_available][up_available][tx_size](dst, dst_stride,
-                                                   const_above_row, left_col);
-  } else {
-    pred[mode][tx_size](dst, dst_stride, const_above_row, left_col);
-  }
-}
-
-// Break down rectangular intra prediction for joint spatio-temporal prediction
-// into two square intra predictions.
-static void build_intra_predictors_for_interintra(MACROBLOCKD *xd,
-                                           uint8_t *src, int src_stride,
-                                           uint8_t *pred_ptr, int stride,
-                                           PREDICTION_MODE mode,
-                                           int bw, int bh,
-                                           int up_available, int left_available,
-                                           int right_available, int plane) {
-  if (bw == bh) {
-    build_intra_predictors(xd, src, src_stride, pred_ptr, stride,
-                           mode, intra_size_log2_for_interintra(bw),
-                           up_available, left_available, right_available,
-                           0, 0, plane);
-  } else if (bw < bh) {
-    uint8_t *src_bottom = src + bw * src_stride;
-    uint8_t *pred_ptr_bottom = pred_ptr + bw * stride;
-    build_intra_predictors(xd, src, src_stride, pred_ptr, stride,
-                           mode, intra_size_log2_for_interintra(bw),
-                           up_available, left_available, right_available,
-                           0, 0, plane);
-    build_intra_predictors_for_2nd_block_interintra(xd, src_bottom, src_stride,
-                           pred_ptr_bottom, stride,
-                           mode, intra_size_log2_for_interintra(bw),
-                           up_available, left_available, 0, 1,
-                           0, bw, plane);
-  } else {
-    uint8_t *src_right = src + bh;
-    uint8_t *pred_ptr_right = pred_ptr + bh;
-    build_intra_predictors(xd, src, src_stride, pred_ptr, stride,
-                           mode, intra_size_log2_for_interintra(bh),
-                           up_available, left_available, 1,
-                           0, 0, plane);
-    build_intra_predictors_for_2nd_block_interintra(xd, src_right, src_stride,
-                           pred_ptr_right, stride,
-                           mode, intra_size_log2_for_interintra(bh),
-                           up_available, left_available, right_available, 0,
-                           bh, 0, plane);
-  }
-}
-
-void vp9_build_interintra_predictors_sby(MACROBLOCKD *xd,
-                                         uint8_t *ypred,
-                                         int ystride,
-                                         BLOCK_SIZE bsize) {
-  int bw = 4 << b_width_log2(bsize);
-  int bh = 4 << b_height_log2(bsize);
-  uint8_t intrapredictor[4096];
-  build_intra_predictors_for_interintra(
-      xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride,
-      intrapredictor, bw,
-      xd->mi[0]->mbmi.interintra_mode, bw, bh,
-      xd->up_available, xd->left_available, 0, 0);
-  combine_interintra(xd->mi[0]->mbmi.interintra_mode,
-#if CONFIG_MASKED_INTERINTRA
-                     xd->mi[0]->mbmi.use_masked_interintra,
-                     xd->mi[0]->mbmi.interintra_mask_index,
-                     bsize,
-#endif
-                     xd->plane[0].dst.buf, xd->plane[0].dst.stride,
-                     ypred, ystride, intrapredictor, bw, bw, bh);
-}
-
-void vp9_build_interintra_predictors_sbuv(MACROBLOCKD *xd,
-                                          uint8_t *upred,
-                                          uint8_t *vpred,
-                                          int ustride, int vstride,
-                                          BLOCK_SIZE bsize) {
-  int bwl = b_width_log2(bsize), bw = 2 << bwl;
-  int bhl = b_height_log2(bsize), bh = 2 << bhl;
-  uint8_t uintrapredictor[1024];
-  uint8_t vintrapredictor[1024];
-  build_intra_predictors_for_interintra(
-      xd, xd->plane[1].dst.buf, xd->plane[1].dst.stride,
-      uintrapredictor, bw,
-      xd->mi[0]->mbmi.interintra_uv_mode, bw, bh,
-      xd->up_available, xd->left_available, 0, 1);
-  build_intra_predictors_for_interintra(
-      xd, xd->plane[2].dst.buf, xd->plane[1].dst.stride,
-      vintrapredictor, bw,
-      xd->mi[0]->mbmi.interintra_uv_mode, bw, bh,
-      xd->up_available, xd->left_available, 0, 2);
-  combine_interintra(xd->mi[0]->mbmi.interintra_uv_mode,
-#if CONFIG_MASKED_INTERINTRA
-                     xd->mi[0]->mbmi.use_masked_interintra,
-                     xd->mi[0]->mbmi.interintra_uv_mask_index,
-                     bsize,
-#endif
-                     xd->plane[1].dst.buf, xd->plane[1].dst.stride,
-                     upred, ustride, uintrapredictor, bw, bw, bh);
-  combine_interintra(xd->mi[0]->mbmi.interintra_uv_mode,
-#if CONFIG_MASKED_INTERINTRA
-                     xd->mi[0]->mbmi.use_masked_interintra,
-                     xd->mi[0]->mbmi.interintra_uv_mask_index,
-                     bsize,
-#endif
-                     xd->plane[2].dst.buf, xd->plane[2].dst.stride,
-                     vpred, vstride, vintrapredictor, bw, bw, bh);
-}
-
-void vp9_build_interintra_predictors(MACROBLOCKD *xd,
-                                     uint8_t *ypred,
-                                     uint8_t *upred,
-                                     uint8_t *vpred,
-                                     int ystride, int ustride, int vstride,
-                                     BLOCK_SIZE bsize) {
-  vp9_build_interintra_predictors_sby(xd, ypred, ystride, bsize);
-  vp9_build_interintra_predictors_sbuv(xd, upred, vpred,
-                                       ustride, vstride, bsize);
-}
-#endif
--- a/vp9/common/vp9_reconintra.h
+++ b/vp9/common/vp9_reconintra.h
@@ -20,37 +20,9 @@ extern "C" {

 void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
                             TX_SIZE tx_size, PREDICTION_MODE mode,
-#if CONFIG_FILTERINTRA
-                             int filterbit,
-#endif
                             const uint8_t *ref, int ref_stride,
                             uint8_t *dst, int dst_stride,
                             int aoff, int loff, int plane);
-#if CONFIG_INTERINTRA
-void vp9_build_interintra_predictors(MACROBLOCKD *xd,
-                                     uint8_t *ypred,
-                                     uint8_t *upred,
-                                     uint8_t *vpred,
-                                     int ystride,
-                                     int ustride,
-                                     int vstride,
-                                     BLOCK_SIZE bsize);
-void vp9_build_interintra_predictors_sby(MACROBLOCKD *xd,
-                                         uint8_t *ypred,
-                                         int ystride,
-                                         BLOCK_SIZE bsize);
-void vp9_build_interintra_predictors_sbuv(MACROBLOCKD *xd,
-                                          uint8_t *upred,
-                                          uint8_t *vpred,
-                                          int ustride, int vstride,
-                                          BLOCK_SIZE bsize);
-#if CONFIG_MASKED_INTERINTRA
-void vp9_generate_masked_weight_interintra(int mask_index,
-                                           BLOCK_SIZE sb_type,
-                                           int h, int w,
-                                           uint8_t *mask, int stride);
-#endif
-#endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -305,15 +305,15 @@ specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc";
 $vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;

 add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/;
+specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon_asm dspr2/;
 $vp9_convolve8_neon_asm=vp9_convolve8_neon;

 add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/;
+specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon_asm dspr2/;
 $vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;

 add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/;
+specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon_asm dspr2/;
 $vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;

 add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
@@ -402,25 +402,25 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {

 # variance
 add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x16 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance32x16/, "$sse2_x86inc", "$avx2_x86inc";

 add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance16x32/, "$sse2_x86inc";

 add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x32 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance64x32/, "$sse2_x86inc", "$avx2_x86inc";

 add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance32x64/, "$sse2_x86inc";

 add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x32 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance32x32/, "$sse2_x86inc", "$avx2_x86inc";

 add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x64 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";

 add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance16x16 mmx avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";

 add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
@@ -447,10 +447,10 @@ add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_
 specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";

 add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";

 add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";

 add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -477,10 +477,10 @@ add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_
 specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";

 add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";

 add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";

 add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -506,125 +506,6 @@ specialize qw/vp9_sub_pixel_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
 add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
 specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";

-if ((vpx_config("CONFIG_MASKED_INTERINTER") eq "yes") || ((vpx_config("CONFIG_INTERINTRA") eq "yes") && (vpx_config("CONFIG_MASKED_INTERINTRA") eq "yes"))) {
-add_proto qw/unsigned int vp9_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_variance32x16/;
-
-add_proto qw/unsigned int vp9_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masdctked_variance16x32/;
-
-add_proto qw/unsigned int vp9_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_variance64x32/;
-
-add_proto qw/unsigned int vp9_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_variance32x64/;
-
-add_proto qw/unsigned int vp9_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_variance32x32/;
-
-add_proto qw/unsigned int vp9_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_variance64x64/;
-
-add_proto qw/unsigned int vp9_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_variance16x16/;
-
-add_proto qw/unsigned int vp9_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_variance16x8/;
-
-add_proto qw/unsigned int vp9_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_variance8x16/;
-
-add_proto qw/unsigned int vp9_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_variance8x8/;
-
-add_proto qw/unsigned int vp9_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_variance8x4/;
-
-add_proto qw/unsigned int vp9_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_variance4x8/;
-
-add_proto qw/unsigned int vp9_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_variance4x4/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance64x64/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance32x64/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance64x32/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance32x16/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance16x32/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance32x32/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance16x16/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance8x16/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance16x8/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance8x8/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance8x4/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance4x8/;
-
-add_proto qw/unsigned int vp9_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
-specialize qw/vp9_masked_sub_pixel_variance4x4/;
-
-add_proto qw/unsigned int vp9_masked_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad64x64/;
-
-add_proto qw/unsigned int vp9_masked_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad32x64/;
-
-add_proto qw/unsigned int vp9_masked_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad64x32/;
-
-add_proto qw/unsigned int vp9_masked_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad32x16/;
-
-add_proto qw/unsigned int vp9_masked_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad16x32/;
-
-add_proto qw/unsigned int vp9_masked_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad32x32/;
-
-add_proto qw/unsigned int vp9_masked_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad16x16/;
-
-add_proto qw/unsigned int vp9_masked_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad16x8/;
-
-add_proto qw/unsigned int vp9_masked_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad8x16/;
-
-add_proto qw/unsigned int vp9_masked_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad8x8/;
-
-add_proto qw/unsigned int vp9_masked_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad8x4/;
-
-add_proto qw/unsigned int vp9_masked_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad4x8/;
-
-add_proto qw/unsigned int vp9_masked_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *mask, int mask_stride";
-specialize qw/vp9_masked_sad4x4/;
-}
-
 # TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
 add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_sub_pixel_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -772,7 +653,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int  src_stride, const
 specialize qw/vp9_sad4x4x8 sse4/;

 add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad64x64x4d sse2/;
+specialize qw/vp9_sad64x64x4d sse2 avx2/;

 add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
 specialize qw/vp9_sad32x64x4d sse2/;
@@ -787,7 +668,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int  src_stride, co
 specialize qw/vp9_sad16x32x4d sse2/;

 add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
-specialize qw/vp9_sad32x32x4d sse2/;
+specialize qw/vp9_sad32x32x4d sse2 avx2/;

 add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
 specialize qw/vp9_sad16x16x4d sse2/;
@@ -812,7 +693,7 @@ add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int  src_stride, cons
 specialize qw/vp9_sad4x4x4d sse/;

 add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-specialize qw/vp9_mse16x16 mmx avx2/, "$sse2_x86inc";
+specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";

 add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
 specialize qw/vp9_mse8x16/;
@@ -858,31 +739,19 @@ add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int strid
 specialize qw/vp9_fht8x8 sse2 avx2/;

 add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type";
-specialize qw/vp9_fht16x16 sse2/;
+specialize qw/vp9_fht16x16 sse2 avx2/;

 add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
 specialize qw/vp9_fwht4x4/, "$mmx_x86inc";

-add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct4x4_1 sse2/;
-
 add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
 specialize qw/vp9_fdct4x4 sse2 avx2/;

-add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct8x8_1 sse2/;
-
 add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride";
 specialize qw/vp9_fdct8x8 sse2 avx2/, "$ssse3_x86_64";

-add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct16x16_1 sse2/;
-
 add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct16x16 sse2/;
-
-add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct32x32_1 sse2/;
+specialize qw/vp9_fdct16x16 sse2 avx2/;

 add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride";
 specialize qw/vp9_fdct32x32 sse2 avx2/;
--- a/vp9/common/vp9_scale.h
+++ b/vp9/common/vp9_scale.h
@@ -46,8 +46,8 @@ static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) {
 }

 static INLINE int vp9_is_scaled(const struct scale_factors *sf) {
-  return vp9_is_valid_scale(sf) &&
-         (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE);
+  return sf->x_scale_fp != REF_NO_SCALE ||
+         sf->y_scale_fp != REF_NO_SCALE;
 }

 #ifdef __cplusplus
--- a/vp9/common/x86/vp9_postproc_mmx.asm
+++ b/vp9/common/x86/vp9_postproc_mmx.asm
@@ -464,6 +464,7 @@ sym(vp9_mbpost_proc_down_mmx):
 ;                            unsigned char whiteclamp[16],
 ;                            unsigned char bothclamp[16],
 ;                            unsigned int width, unsigned int height, int pitch)
+extern sym(rand)
 global sym(vp9_plane_add_noise_mmx) PRIVATE
 sym(vp9_plane_add_noise_mmx):
    push        rbp
@@ -475,7 +476,7 @@ sym(vp9_plane_add_noise_mmx):
    ; end prolog

 .addnoise_loop:
-    call sym(LIBVPX_RAND) WRT_PLT
+    call sym(rand) WRT_PLT
    mov     rcx, arg(1) ;noise
    and     rax, 0xff
    add     rcx, rax
--- a/vp9/common/x86/vp9_postproc_sse2.asm
+++ b/vp9/common/x86/vp9_postproc_sse2.asm
@@ -629,6 +629,7 @@ sym(vp9_mbpost_proc_across_ip_xmm):
 ;                            unsigned char whiteclamp[16],
 ;                            unsigned char bothclamp[16],
 ;                            unsigned int width, unsigned int height, int pitch)
+extern sym(rand)
 global sym(vp9_plane_add_noise_wmt) PRIVATE
 sym(vp9_plane_add_noise_wmt):
    push        rbp
@@ -640,7 +641,7 @@ sym(vp9_plane_add_noise_wmt):
    ; end prolog

 .addnoise_loop:
-    call sym(LIBVPX_RAND) WRT_PLT
+    call sym(rand) WRT_PLT
    mov     rcx, arg(1) ;noise
    and     rax, 0xff
    add     rcx, rax
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -254,24 +254,11 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
                                            : mi->mbmi.uv_mode;
  int x, y;
  uint8_t *dst;
-#if CONFIG_FILTERINTRA
-  int fbit;
-  if (plane == 0)
-    if (mi->mbmi.sb_type < BLOCK_8X8)
-      fbit = mi->b_filter_info[block];
-    else
-      fbit = is_filter_enabled(tx_size) ? mi->mbmi.filterbit : 0;
-  else
-    fbit = is_filter_enabled(tx_size) ? mi->mbmi.uv_filterbit : 0;
-#endif
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
  dst = &pd->dst.buf[4 * y * pd->dst.stride + 4 * x];

  vp9_predict_intra_block(xd, block >> (tx_size << 1),
                          b_width_log2(plane_bsize), tx_size, mode,
-#if CONFIG_FILTERINTRA
-                          fbit,
-#endif
                          dst, pd->dst.stride, dst, pd->dst.stride,
                          x, y, plane);

@@ -325,6 +312,13 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
    for (x = !y; x < x_mis; ++x)
      xd->mi[y * cm->mi_stride + x] = xd->mi[0];

+#if CONFIG_TRANSCODE && WRITE_MI_ARRAY
+  for (y = 0; y < y_mis; ++y)
+    for (x = !y; x < x_mis; ++x)
+      vpx_memcpy(&cm->mi[offset + y * cm->mi_stride + x],
+                 &cm->mi[offset], sizeof(MODE_INFO));
+#endif
+
  set_skip_context(xd, mi_row, mi_col);

  // Distance of Mb to the various image edges. These are specified to 8th pel
@@ -335,84 +329,6 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
  return &xd->mi[0]->mbmi;
 }

-#if CONFIG_SUPERTX
-static void set_offsets_extend(VP9_COMMON *const cm, MACROBLOCKD *const xd,
-                               const TileInfo *const tile,
-                               BLOCK_SIZE top_bsize,
-                               int mi_row, int mi_col,
-                               int mi_row_ori, int mi_col_ori) {
-  const int bw = num_8x8_blocks_wide_lookup[top_bsize];
-  const int bh = num_8x8_blocks_high_lookup[top_bsize];
-  const int offset = mi_row * cm->mi_stride + mi_col;
-
-  xd->mi = cm->mi_grid_visible + offset;
-  xd->mi[0] = &cm->mi[offset];
-
-  set_mi_row_col(xd, tile, mi_row_ori, bh, mi_col_ori, bw,
-                 cm->mi_rows, cm->mi_cols);
-}
-
-static void set_mb_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
-                           const TileInfo *const tile,
-                           BLOCK_SIZE bsize, int mi_row, int mi_col) {
-  const int bw = num_8x8_blocks_wide_lookup[bsize];
-  const int bh = num_8x8_blocks_high_lookup[bsize];
-  const int x_mis = MIN(bw, cm->mi_cols - mi_col);
-  const int y_mis = MIN(bh, cm->mi_rows - mi_row);
-  const int offset = mi_row * cm->mi_stride + mi_col;
-  int x, y;
-
-  xd->mi = cm->mi_grid_visible + offset;
-  xd->mi[0] = &cm->mi[offset];
-  xd->mi[0]->mbmi.sb_type = bsize;
-  for (y = 0; y < y_mis; ++y)
-    for (x = !y; x < x_mis; ++x)
-      xd->mi[y * cm->mi_stride + x] = xd->mi[0];
-
-  set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
-}
-
-static void set_offsets_topblock(VP9_COMMON *const cm, MACROBLOCKD *const xd,
-                                 const TileInfo *const tile,
-                                 BLOCK_SIZE bsize, int mi_row, int mi_col) {
-  const int bw = num_8x8_blocks_wide_lookup[bsize];
-  const int bh = num_8x8_blocks_high_lookup[bsize];
-  const int offset = mi_row * cm->mi_stride + mi_col;
-
-  xd->mi = cm->mi_grid_visible + offset;
-  xd->mi[0] = &cm->mi[offset];
-
-  set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
-
-  vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
-}
-
-static void set_param_topblock(VP9_COMMON *const cm,  MACROBLOCKD *const xd,
-                              BLOCK_SIZE bsize, int mi_row, int mi_col,
-#if CONFIG_EXT_TX
-                              int txfm,
-#endif
-                              int skip) {
-  const int bw = num_8x8_blocks_wide_lookup[bsize];
-  const int bh = num_8x8_blocks_high_lookup[bsize];
-  const int x_mis = MIN(bw, cm->mi_cols - mi_col);
-  const int y_mis = MIN(bh, cm->mi_rows - mi_row);
-  const int offset = mi_row * cm->mi_stride + mi_col;
-  int x, y;
-
-  xd->mi = cm->mi_grid_visible + offset;
-  xd->mi[0] = &cm->mi[offset];
-
-  for (y = 0; y < y_mis; ++y)
-    for (x = 0; x < x_mis; ++x) {
-      xd->mi[y * cm->mi_stride + x]->mbmi.skip = skip;
-#if CONFIG_EXT_TX
-      xd->mi[y * cm->mi_stride + x]->mbmi.ext_txfrm = txfm;
-#endif
-    }
-}
-#endif
-
 static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                    int idx, int mi_row, int mi_col) {
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -426,246 +342,14 @@ static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd,
  xd->corrupted |= ref_buffer->buf->corrupted;
 }

-#if CONFIG_SUPERTX
-static void dec_predict_b_extend(VP9_COMMON *const cm, MACROBLOCKD *const xd,
-                                 const TileInfo *const tile,
-                                 int mi_row, int mi_col,
-                                 int mi_row_ori, int mi_col_ori,
-                                 BLOCK_SIZE top_bsize) {
-  set_offsets_extend(cm, xd, tile, top_bsize, mi_row, mi_col,
-                     mi_row_ori, mi_col_ori);
-
-  set_ref(cm, xd, 0, mi_row_ori, mi_col_ori);
-  if (has_second_ref(&xd->mi[0]->mbmi))
-    set_ref(cm, xd, 1, mi_row_ori, mi_col_ori);
-  xd->mi[0]->mbmi.tx_size = b_width_log2(top_bsize);
-#if !CONFIG_MASKED_INTERINTER
-  vp9_dec_build_inter_predictors_sb(xd, mi_row_ori, mi_col_ori, top_bsize);
-#else
-  vp9_dec_build_inter_predictors_sb_extend(xd, mi_row, mi_col,
-                                           mi_row_ori, mi_col_ori, top_bsize);
-#endif
-}
-
-static void dec_predict_b_sub8x8_extend(VP9_COMMON *const cm,
-                                        MACROBLOCKD *const xd,
-                                        const TileInfo *const tile,
-                                        int mi_row, int mi_col,
-                                        int mi_row_ori, int mi_col_ori,
-                                        BLOCK_SIZE top_bsize,
-                                        PARTITION_TYPE partition) {
-  set_offsets_extend(cm, xd, tile, top_bsize, mi_row, mi_col,
-                     mi_row_ori, mi_col_ori);
-
-  set_ref(cm, xd, 0, mi_row_ori, mi_col_ori);
-  if (has_second_ref(&xd->mi[0]->mbmi))
-    set_ref(cm, xd, 1, mi_row_ori, mi_col_ori);
-  xd->mi[0]->mbmi.tx_size = b_width_log2(top_bsize);
-  vp9_dec_build_inter_predictors_sby_sub8x8_extend(xd, mi_row, mi_col,
-                                                   mi_row_ori, mi_col_ori,
-                                                   top_bsize, partition);
-  vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(xd,
-#if CONFIG_MASKED_INTERINTER
-                                                    mi_row, mi_col,
-#endif
-                                                    mi_row_ori, mi_col_ori,
-                                                    top_bsize);
-}
-
-static void dec_predict_sb_complex(VP9_COMMON *const cm, MACROBLOCKD *const xd,
-                                   const TileInfo *const tile,
-                                   int mi_row, int mi_col,
-                                   int mi_row_ori, int mi_col_ori,
-                                   BLOCK_SIZE bsize, BLOCK_SIZE top_bsize,
-                                   uint8_t *dst_buf[3], int dst_stride[3]) {
-  const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
-  PARTITION_TYPE partition;
-  BLOCK_SIZE subsize;
-  MB_MODE_INFO *mbmi;
-  int i, offset = mi_row * cm->mi_stride + mi_col;
-
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, MAX_MB_PLANE * 32 * 32);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, MAX_MB_PLANE * 32 * 32);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf3, MAX_MB_PLANE * 32 * 32);
-  uint8_t *dst_buf1[3] = {tmp_buf1, tmp_buf1 + 32 * 32, tmp_buf1 + 2 * 32 * 32};
-  uint8_t *dst_buf2[3] = {tmp_buf2, tmp_buf2 + 32 * 32, tmp_buf2 + 2 * 32 * 32};
-  uint8_t *dst_buf3[3] = {tmp_buf3, tmp_buf3 + 32 * 32, tmp_buf3 + 2 * 32 * 32};
-  int dst_stride1[3] = {32, 32, 32};
-  int dst_stride2[3] = {32, 32, 32};
-  int dst_stride3[3] = {32, 32, 32};
-
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
-    return;
-
-  xd->mi = cm->mi_grid_visible + offset;
-  xd->mi[0] = &cm->mi[offset];
-  mbmi = &xd->mi[0]->mbmi;
-  partition = partition_lookup[bsl][mbmi->sb_type];
-  subsize = get_subsize(bsize, partition);
-
-  for (i = 0; i < MAX_MB_PLANE; i++) {
-    xd->plane[i].dst.buf = dst_buf[i];
-    xd->plane[i].dst.stride = dst_stride[i];
-  }
-
-  switch (partition) {
-    case PARTITION_NONE:
-      assert(bsize < top_bsize);
-      dec_predict_b_extend(cm, xd, tile, mi_row, mi_col, mi_row_ori, mi_col_ori,
-                           top_bsize);
-      break;
-    case PARTITION_HORZ:
-      if (bsize > BLOCK_8X8) {
-        dec_predict_b_extend(cm, xd, tile, mi_row, mi_col, mi_row_ori,
-                             mi_col_ori, top_bsize);
-      } else {
-        dec_predict_b_sub8x8_extend(cm, xd, tile, mi_row, mi_col,
-                                    mi_row_ori, mi_col_ori,
-                                    top_bsize, partition);
-      }
-      if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
-        for (i = 0; i < MAX_MB_PLANE; i++) {
-          xd->plane[i].dst.buf = tmp_buf1 + i * 32 * 32;
-          xd->plane[i].dst.stride = 32;
-        }
-        dec_predict_b_extend(cm, xd, tile, mi_row + hbs, mi_col,
-                             mi_row_ori, mi_col_ori, top_bsize);
-        for (i = 0; i < MAX_MB_PLANE; i++) {
-          xd->plane[i].dst.buf = dst_buf[i];
-          xd->plane[i].dst.stride = dst_stride[i];
-          vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i],
-                                                   dst_buf1[i], dst_stride1[i],
-                                                   i,
-                                                   mi_row, mi_col,
-                                                   mi_row_ori, mi_col_ori,
-                                                   bsize, top_bsize,
-                                                   PARTITION_HORZ);
-        }
-      }
-      break;
-    case PARTITION_VERT:
-      if (bsize > BLOCK_8X8) {
-        dec_predict_b_extend(cm, xd, tile, mi_row, mi_col, mi_row_ori,
-                             mi_col_ori, top_bsize);
-      } else {
-        dec_predict_b_sub8x8_extend(cm, xd, tile, mi_row, mi_col,
-                                    mi_row_ori, mi_col_ori,
-                                    top_bsize, partition);
-      }
-      if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
-        for (i = 0; i < MAX_MB_PLANE; i++) {
-          xd->plane[i].dst.buf = tmp_buf1 + i * 32 * 32;
-          xd->plane[i].dst.stride = 32;
-        }
-        dec_predict_b_extend(cm, xd, tile, mi_row, mi_col + hbs, mi_row_ori,
-                             mi_col_ori, top_bsize);
-        for (i = 0; i < MAX_MB_PLANE; i++) {
-          xd->plane[i].dst.buf = dst_buf[i];
-          xd->plane[i].dst.stride = dst_stride[i];
-          vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i],
-                                                   dst_buf1[i], dst_stride1[i],
-                                                   i,
-                                                   mi_row, mi_col,
-                                                   mi_row_ori, mi_col_ori,
-                                                   bsize, top_bsize,
-                                                   PARTITION_VERT);
-        }
-      }
-      break;
-    case PARTITION_SPLIT:
-      if (bsize == BLOCK_8X8) {
-        dec_predict_b_sub8x8_extend(cm, xd, tile, mi_row, mi_col,
-                                    mi_row_ori, mi_col_ori,
-                                    top_bsize, partition);
-      } else {
-        dec_predict_sb_complex(cm, xd, tile, mi_row, mi_col,
-                               mi_row_ori, mi_col_ori, subsize, top_bsize,
-                               dst_buf, dst_stride);
-        if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols)
-          dec_predict_sb_complex(cm, xd, tile, mi_row, mi_col + hbs,
-                                 mi_row_ori, mi_col_ori, subsize, top_bsize,
-                                 dst_buf1, dst_stride1);
-        if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols)
-          dec_predict_sb_complex(cm, xd, tile, mi_row + hbs, mi_col,
-                                 mi_row_ori, mi_col_ori, subsize, top_bsize,
-                                 dst_buf2, dst_stride2);
-        if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols)
-          dec_predict_sb_complex(cm, xd, tile, mi_row + hbs, mi_col + hbs,
-                                 mi_row_ori, mi_col_ori, subsize, top_bsize,
-                                 dst_buf3, dst_stride3);
-        for (i = 0; i < MAX_MB_PLANE; i++) {
-          if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
-            vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i],
-                                                     dst_buf1[i],
-                                                     dst_stride1[i],
-                                                     i, mi_row, mi_col,
-                                                     mi_row_ori, mi_col_ori,
-                                                     bsize, top_bsize,
-                                                     PARTITION_VERT);
-            if (mi_row + hbs < cm->mi_rows) {
-              vp9_build_masked_inter_predictor_complex(dst_buf2[i],
-                                                       dst_stride2[i],
-                                                       dst_buf3[i],
-                                                       dst_stride3[i],
-                                                       i, mi_row, mi_col,
-                                                       mi_row_ori, mi_col_ori,
-                                                       bsize, top_bsize,
-                                                       PARTITION_VERT);
-              vp9_build_masked_inter_predictor_complex(dst_buf[i],
-                                                       dst_stride[i],
-                                                       dst_buf2[i],
-                                                       dst_stride2[i],
-                                                       i, mi_row, mi_col,
-                                                       mi_row_ori, mi_col_ori,
-                                                       bsize, top_bsize,
-                                                       PARTITION_HORZ);
-            }
-          } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) {
-            vp9_build_masked_inter_predictor_complex(dst_buf[i],
-                                                     dst_stride[i],
-                                                     dst_buf2[i],
-                                                     dst_stride2[i],
-                                                     i, mi_row, mi_col,
-                                                     mi_row_ori, mi_col_ori,
-                                                     bsize, top_bsize,
-                                                     PARTITION_HORZ);
-          }
-        }
-      }
-      break;
-    default:
-      assert(0);
-  }
-}
-#endif
-
 static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                         const TileInfo *const tile,
-#if CONFIG_SUPERTX
-                         int supertx_enabled,
-#endif
                         int mi_row, int mi_col,
                         vp9_reader *r, BLOCK_SIZE bsize) {
  const int less8x8 = bsize < BLOCK_8X8;
-#if !CONFIG_SUPERTX
  MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col);
-#else
-  MB_MODE_INFO *mbmi;
-  if (!supertx_enabled) {
-    mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col);
-  } else {
-    set_mb_offsets(cm, xd, tile, bsize, mi_row, mi_col);
-  }
-#endif
-  vp9_read_mode_info(cm, xd, tile,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row, mi_col, r);
+  vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r);

-#if CONFIG_SUPERTX
-  if (!supertx_enabled) {
-#endif
  if (less8x8)
    bsize = BLOCK_8X8;

@@ -699,9 +383,6 @@ static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
        mbmi->skip = 1;  // skip loopfilter
    }
  }
-#if CONFIG_SUPERTX
-  }
-#endif

  xd->corrupted |= vp9_reader_has_error(r);
 }
@@ -732,19 +413,38 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs,

 static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                             const TileInfo *const tile,
-#if CONFIG_SUPERTX
-                             int read_token, int supertx_enabled,
-#endif
                             int mi_row, int mi_col,
                             vp9_reader* r, BLOCK_SIZE bsize) {
  const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
  PARTITION_TYPE partition;
  BLOCK_SIZE subsize;
-#if CONFIG_SUPERTX
-  int skip = 0;
-#if CONFIG_EXT_TX
-  int txfm = 0;
-#endif
+
+#if CONFIG_TRANSCODE && READ_MI_ARRAY
+  // This is for test purpose only. It verifies the external file
+  // contains the right mode_info array.
+  if (bsize == BLOCK_64X64) {
+    MODE_INFO mi_array[64];
+    FILE *pf = cm->mi_array_pf;
+    if (pf) {
+      int i, j;
+      for (j = 0; j < MI_BLOCK_SIZE; ++j)
+        for (i = 0; i < MI_BLOCK_SIZE; ++i)
+          fread(&mi_array[j * 8 + i], 1, sizeof(MODE_INFO), pf);
+    }
+
+    if (pf && mi_row == 0 && mi_col == 8) {
+      int i, j;
+      for (j = 0; j < MI_BLOCK_SIZE; ++j) {
+        for (i = 0; i < MI_BLOCK_SIZE; ++i) {
+          MB_MODE_INFO *mbmi = &mi_array[j * 8 + i].mbmi;
+          b_mode_info *bmi = mi_array[j * 8 + i].bmi;
+          fprintf(stderr, "pos (%d, %d), bsize %d, mode %d\n",
+                  mi_row + j , mi_col + i, mbmi->sb_type, bmi[0].as_mode);
+        }
+      }
+      fprintf(stderr, "\n");
+    }
+  }
 #endif

  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
@@ -752,145 +452,54 @@ static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,

  partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r);
  subsize = get_subsize(bsize, partition);
-#if CONFIG_SUPERTX
-  if (cm->frame_type != KEY_FRAME &&
-      partition != PARTITION_NONE &&
-      bsize <= BLOCK_32X32 &&
-      !supertx_enabled) {
-    TX_SIZE supertx_size = b_width_log2(bsize);
-    if (partition == PARTITION_SPLIT) {
-      supertx_enabled = vp9_read(r, cm->fc.supertxsplit_prob[supertx_size]);
-      cm->counts.supertxsplit[supertx_size][supertx_enabled]++;
-    } else {
-      supertx_enabled = vp9_read(r, cm->fc.supertx_prob[supertx_size]);
-      cm->counts.supertx[supertx_size][supertx_enabled]++;
-    }
-  }
-  if (supertx_enabled && read_token) {
-    int offset = mi_row * cm->mi_stride + mi_col;
-    xd->mi = cm->mi_grid_visible + offset;
-    xd->mi[0] = &cm->mi[offset];
-    set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[bsize],
-                   mi_col, num_8x8_blocks_wide_lookup[bsize],
-                   cm->mi_rows, cm->mi_cols);
-    set_skip_context(xd, mi_row, mi_col);
-    // Here we assume mbmi->segment_id = 0
-    skip = read_skip(cm, xd, 0, r);
-    if (skip)
-      reset_skip_context(xd, bsize);
-#if CONFIG_EXT_TX
-    if (bsize <= BLOCK_16X16 && !skip) {
-      txfm = vp9_read(r, cm->fc.ext_tx_prob);
-      if (!cm->frame_parallel_decoding_mode)
-        ++cm->counts.ext_tx[txfm];
-    }
-#endif
-  }
-#endif
  if (subsize < BLOCK_8X8) {
-    decode_block(cm, xd, tile,
-#if CONFIG_SUPERTX
-                 supertx_enabled,
-#endif
-                 mi_row, mi_col, r, subsize);
+    decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
  } else {
    switch (partition) {
      case PARTITION_NONE:
-        decode_block(cm, xd, tile,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row, mi_col, r, subsize);
+        decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
        break;
      case PARTITION_HORZ:
-        decode_block(cm, xd, tile,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row, mi_col, r, subsize);
+        decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
        if (mi_row + hbs < cm->mi_rows)
-          decode_block(cm, xd, tile,
-#if CONFIG_SUPERTX
-                       supertx_enabled,
-#endif
-                       mi_row + hbs, mi_col, r, subsize);
+          decode_block(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
        break;
      case PARTITION_VERT:
-        decode_block(cm, xd, tile,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row, mi_col, r, subsize);
+        decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
        if (mi_col + hbs < cm->mi_cols)
-          decode_block(cm, xd, tile,
-#if CONFIG_SUPERTX
-                       supertx_enabled,
-#endif
-                       mi_row, mi_col + hbs, r, subsize);
+          decode_block(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
        break;
      case PARTITION_SPLIT:
-        decode_partition(cm, xd, tile,
-#if CONFIG_SUPERTX
-                         !supertx_enabled, supertx_enabled,
-#endif
-                         mi_row,       mi_col,       r, subsize);
-        decode_partition(cm, xd, tile,
-#if CONFIG_SUPERTX
-                         !supertx_enabled, supertx_enabled,
-#endif
-                         mi_row,       mi_col + hbs, r, subsize);
-        decode_partition(cm, xd, tile,
-#if CONFIG_SUPERTX
-                         !supertx_enabled, supertx_enabled,
-#endif
-                         mi_row + hbs, mi_col,       r, subsize);
-        decode_partition(cm, xd, tile,
-#if CONFIG_SUPERTX
-                         !supertx_enabled, supertx_enabled,
-#endif
-                         mi_row + hbs, mi_col + hbs, r, subsize);
+        decode_partition(cm, xd, tile, mi_row,       mi_col,       r, subsize);
+        decode_partition(cm, xd, tile, mi_row,       mi_col + hbs, r, subsize);
+        decode_partition(cm, xd, tile, mi_row + hbs, mi_col,       r, subsize);
+        decode_partition(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
        break;
      default:
        assert(0 && "Invalid partition type");
    }
  }

-#if CONFIG_SUPERTX
-  if (supertx_enabled && read_token) {
-    uint8_t *dst_buf[3];
-    int dst_stride[3], i;
-
-    vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
-    for (i = 0; i < MAX_MB_PLANE; i++) {
-      dst_buf[i] = xd->plane[i].dst.buf;
-      dst_stride[i] = xd->plane[i].dst.stride;
-    }
-    dec_predict_sb_complex(cm, xd, tile, mi_row, mi_col, mi_row, mi_col,
-                           bsize, bsize, dst_buf, dst_stride);
-
-    if (!skip) {
-      int eobtotal = 0;
-      struct inter_args arg = { cm, xd, r, &eobtotal };
-      set_offsets_topblock(cm, xd, tile, bsize, mi_row, mi_col);
-#if CONFIG_EXT_TX
-      xd->mi[0]->mbmi.ext_txfrm = txfm;
-#endif
-      vp9_foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
-      if (!(subsize < BLOCK_8X8) && eobtotal == 0)
-        skip = 1;
-    }
-    set_param_topblock(cm, xd, bsize, mi_row, mi_col,
-#if CONFIG_EXT_TX
-                       txfm,
-#endif
-                       skip);
-  }
-#endif
-
  // update partition context
  if (bsize >= BLOCK_8X8 &&
      (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
    update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+
+#if CONFIG_TRANSCODE && WRITE_MI_ARRAY
+  if (bsize == BLOCK_64X64) {
+    FILE *pf = cm->mi_array_pf;
+    if (pf) {
+      int i, j;
+      int offset = mi_row * cm->mi_stride + mi_col;
+      for (j = 0; j < MI_BLOCK_SIZE; ++j)
+        for (i = 0; i < MI_BLOCK_SIZE; ++i)
+          fwrite(&cm->mi[offset + j * cm->mi_stride + i],
+                 1, sizeof(MODE_INFO), pf);
+    } else {
+      assert(0);
+    }
+  }
+#endif
 }

 static void setup_token_decoder(const uint8_t *data,
@@ -1127,10 +736,6 @@ static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
  while (max_ones-- && vp9_rb_read_bit(rb))
    cm->log2_tile_cols++;

-  if (cm->log2_tile_cols > 6)
-    vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
-                       "Invalid number of tile columns");
-
  // rows
  cm->log2_tile_rows = vp9_rb_read_bit(rb);
  if (cm->log2_tile_rows)
@@ -1284,11 +889,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
        vp9_zero(tile_data->xd.left_seg_context);
        for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
             mi_col += MI_BLOCK_SIZE) {
-          decode_partition(tile_data->cm, &tile_data->xd, &tile,
-#if CONFIG_SUPERTX
-                           1, 0,
-#endif
-                           mi_row, mi_col,
+          decode_partition(tile_data->cm, &tile_data->xd, &tile, mi_row, mi_col,
                           &tile_data->bit_reader, BLOCK_64X64);
        }
      }
@@ -1342,9 +943,6 @@ static int tile_worker_hook(void *arg1, void *arg2) {
    for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
         mi_col += MI_BLOCK_SIZE) {
      decode_partition(tile_data->cm, &tile_data->xd, tile,
-#if CONFIG_SUPERTX
-                       1, 0,
-#endif
                       mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64);
    }
  }
@@ -1530,7 +1128,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
    // Show an existing frame directly.
    const int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];

-    if (frame_to_show < 0 || cm->frame_bufs[frame_to_show].ref_count < 1)
+    if (cm->frame_bufs[frame_to_show].ref_count < 1)
      vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                         "Buffer %d does not contain a decoded frame",
                         frame_to_show);
@@ -1695,62 +1293,6 @@ static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data,
        vp9_diff_update_prob(&r, &fc->partition_prob[j][i]);

    read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
-
-#if CONFIG_EXT_TX
-    vp9_diff_update_prob(&r, &fc->ext_tx_prob);
-#endif
-
-#if CONFIG_MASKED_INTERINTER
-    if (cm->reference_mode != SINGLE_REFERENCE) {
-      cm->use_masked_interinter = vp9_read_bit(&r);
-      if (cm->use_masked_interinter) {
-        for (i = 0; i < BLOCK_SIZES; i++) {
-          if (get_mask_bits(i))
-            vp9_diff_update_prob(&r, &fc->masked_interinter_prob[i]);
-        }
-      }
-    } else {
-      cm->use_masked_interinter = 0;
-    }
-#endif
-
-#if CONFIG_INTERINTRA
-    if (cm->reference_mode != COMPOUND_REFERENCE) {
-      cm->use_interintra = vp9_read_bit(&r);
-      if (cm->use_interintra) {
-        for (i = 0; i < BLOCK_SIZES; i++) {
-          if (is_interintra_allowed(i)) {
-            vp9_diff_update_prob(&r, &fc->interintra_prob[i]);
-          }
-        }
-#if CONFIG_MASKED_INTERINTRA
-        cm->use_masked_interintra = vp9_read_bit(&r);
-        if (cm->use_masked_interintra) {
-          for (i = 0; i < BLOCK_SIZES; i++) {
-            if (is_interintra_allowed(i) && get_mask_bits_interintra(i))
-              vp9_diff_update_prob(&r, &fc->masked_interintra_prob[i]);
-          }
-        }
-      } else {
-        cm->use_masked_interintra = 0;
-#endif
-      }
-    } else {
-      cm->use_interintra = 0;
-#if CONFIG_MASKED_INTERINTRA
-      cm->use_masked_interintra = 0;
-#endif
-    }
-#endif
-
-#if CONFIG_COPY_CODING
-    for (j = 0; j < COPY_MODE_CONTEXTS; j++) {
-      for (i = 0; i < 1; i++)
-        vp9_diff_update_prob(&r, &fc->copy_mode_probs_l2[j][i]);
-      for (i = 0; i < 2; i++)
-        vp9_diff_update_prob(&r, &fc->copy_mode_probs[j][i]);
-    }
-#endif
  }

  return vp9_reader_has_error(&r);
@@ -1802,10 +1344,6 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) {
  assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
  assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
  assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
-#if CONFIG_EXT_TX
-  assert(!memcmp(cm->counts.ext_tx, zero_counts.ext_tx,
-                 sizeof(cm->counts.ext_tx)));
-#endif
 }
 #endif  // NDEBUG

--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -54,36 +54,6 @@ static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r, int ctx) {
  return NEARESTMV + mode;
 }

-#if CONFIG_COPY_CODING
-static COPY_MODE read_copy_mode(VP9_COMMON *cm, vp9_reader *r,
-                                int num_candidate, int ctx) {
-  COPY_MODE mode;
-
-  switch (num_candidate) {
-    case 0:
-      assert(0);
-      break;
-    case 1:
-      mode = REF0;
-      break;
-    case 2:
-      mode = REF0 + vp9_read_tree(r, vp9_copy_mode_tree_l2,
-                                  cm->fc.copy_mode_probs_l2[ctx]);
-      if (!cm->frame_parallel_decoding_mode)
-          ++cm->counts.copy_mode_l2[ctx][mode - REF0];
-      break;
-    default:
-      mode = REF0 + vp9_read_tree(r, vp9_copy_mode_tree,
-                                  cm->fc.copy_mode_probs[ctx]);
-      if (!cm->frame_parallel_decoding_mode)
-          ++cm->counts.copy_mode[ctx][mode - REF0];
-      break;
-  }
-
-  return mode;
-}
-#endif
-
 static int read_segment_id(vp9_reader *r, const struct segmentation *seg) {
  return vp9_read_tree(r, vp9_segment_tree, seg->tree_probs);
 }
@@ -174,11 +144,7 @@ static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,
  return segment_id;
 }

-#if CONFIG_SUPERTX
-int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
-#else
 static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
-#endif
                     int segment_id, vp9_reader *r) {
  if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
    return 1;
@@ -209,85 +175,29 @@ static void read_intra_frame_mode_info(VP9_COMMON *const cm,

  switch (bsize) {
    case BLOCK_4X4:
-#if !CONFIG_FILTERINTRA
      for (i = 0; i < 4; ++i)
-#else
-      for (i = 0; i < 4; ++i) {
-#endif
        mi->bmi[i].as_mode =
            read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i));
-#if CONFIG_FILTERINTRA
-        if (is_filter_allowed(mi->bmi[i].as_mode))
-          mi->b_filter_info[i] =
-              vp9_read(r, cm->fc.filterintra_prob[0][mi->bmi[i].as_mode]);
-        else
-          mi->b_filter_info[i] = 0;
-      }
-      mbmi->filterbit = mi->b_filter_info[3];
-#endif
      mbmi->mode = mi->bmi[3].as_mode;
      break;
    case BLOCK_4X8:
      mi->bmi[0].as_mode = mi->bmi[2].as_mode =
          read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0));
-#if CONFIG_FILTERINTRA
-      if (is_filter_allowed(mi->bmi[0].as_mode))
-        mi->b_filter_info[0] = mi->b_filter_info[2] =
-            vp9_read(r, cm->fc.filterintra_prob[0][mi->bmi[0].as_mode]);
-      else
-        mi->b_filter_info[0] = mi->b_filter_info[2] = 0;
-#endif
      mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
          read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1));
-#if CONFIG_FILTERINTRA
-      if (is_filter_allowed(mi->bmi[1].as_mode))
-        mi->b_filter_info[1] = mi->b_filter_info[3] = mbmi->filterbit =
-            vp9_read(r, cm->fc.filterintra_prob[0][mi->bmi[1].as_mode]);
-      else
-        mi->b_filter_info[1] = mi->b_filter_info[3] = mbmi->filterbit = 0;
-#endif
      break;
    case BLOCK_8X4:
      mi->bmi[0].as_mode = mi->bmi[1].as_mode =
          read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0));
-#if CONFIG_FILTERINTRA
-      if (is_filter_allowed(mi->bmi[0].as_mode))
-        mi->b_filter_info[0] = mi->b_filter_info[1] =
-            vp9_read(r, cm->fc.filterintra_prob[0][mi->bmi[0].as_mode]);
-      else
-        mi->b_filter_info[0] = mi->b_filter_info[1] = 0;
-#endif
      mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
          read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2));
-#if CONFIG_FILTERINTRA
-      if (is_filter_allowed(mi->bmi[2].as_mode))
-        mi->b_filter_info[2] = mi->b_filter_info[3] = mbmi->filterbit =
-            vp9_read(r, cm->fc.filterintra_prob[0][mi->bmi[2].as_mode]);
-      else
-        mi->b_filter_info[2] = mi->b_filter_info[3] = mbmi->filterbit = 0;
-#endif
      break;
    default:
      mbmi->mode = read_intra_mode(r,
                                   get_y_mode_probs(mi, above_mi, left_mi, 0));
-#if CONFIG_FILTERINTRA
-      if (is_filter_enabled(mbmi->tx_size) && is_filter_allowed(mbmi->mode))
-        mbmi->filterbit = vp9_read(r,
-                            cm->fc.filterintra_prob[mbmi->tx_size][mbmi->mode]);
-      else
-        mbmi->filterbit = 0;
-#endif
  }

  mbmi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mbmi->mode]);
-#if CONFIG_FILTERINTRA
-  if (is_filter_enabled(get_uv_tx_size(mbmi)) &&
-      is_filter_allowed(mbmi->uv_mode))
-    mbmi->uv_filterbit = vp9_read(r,
-        cm->fc.filterintra_prob[get_uv_tx_size(mbmi)][mbmi->uv_mode]);
-  else
-    mbmi->uv_filterbit = 0;
-#endif
 }

 static int read_mv_component(vp9_reader *r,
@@ -425,97 +335,25 @@ static void read_intra_block_mode_info(VP9_COMMON *const cm, MODE_INFO *mi,

  switch (bsize) {
    case BLOCK_4X4:
-#if !CONFIG_FILTERINTRA
      for (i = 0; i < 4; ++i)
-#else
-      for (i = 0; i < 4; ++i) {
-#endif
        mi->bmi[i].as_mode = read_intra_mode_y(cm, r, 0);
-#if CONFIG_FILTERINTRA
-        if (is_filter_allowed(mi->bmi[i].as_mode)) {
-          mi->b_filter_info[i] =
-              vp9_read(r, cm->fc.filterintra_prob[0][mi->bmi[i].as_mode]);
-          cm->counts.filterintra[0][mi->bmi[i].as_mode]
-                                   [mi->b_filter_info[i]]++;
-        } else {
-          mi->b_filter_info[i] = 0;
-        }
-      }
-      mbmi->filterbit = mi->b_filter_info[3];
-#endif
      mbmi->mode = mi->bmi[3].as_mode;
      break;
    case BLOCK_4X8:
      mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, r, 0);
-#if CONFIG_FILTERINTRA
-      if (is_filter_allowed(mi->bmi[0].as_mode)) {
-        mi->b_filter_info[0] = mi->b_filter_info[2] =
-            vp9_read(r, cm->fc.filterintra_prob[0][mi->bmi[0].as_mode]);
-        cm->counts.filterintra[0][mi->bmi[0].as_mode][mi->b_filter_info[0]]++;
-      } else {
-        mi->b_filter_info[0] = mi->b_filter_info[2] = 0;
-      }
-#endif
      mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
          read_intra_mode_y(cm, r, 0);
-#if CONFIG_FILTERINTRA
-      if (is_filter_allowed(mi->bmi[1].as_mode)) {
-        mi->b_filter_info[1] = mi->b_filter_info[3] = mbmi->filterbit =
-            vp9_read(r, cm->fc.filterintra_prob[0][mi->bmi[1].as_mode]);
-        cm->counts.filterintra[0][mi->bmi[1].as_mode][mi->b_filter_info[1]]++;
-      } else {
-        mi->b_filter_info[1] = mi->b_filter_info[3] = mbmi->filterbit = 0;
-      }
-#endif
      break;
    case BLOCK_8X4:
      mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, r, 0);
-#if CONFIG_FILTERINTRA
-      if (is_filter_allowed(mi->bmi[0].as_mode)) {
-        mi->b_filter_info[0] = mi->b_filter_info[1] =
-            vp9_read(r, cm->fc.filterintra_prob[0][mi->bmi[0].as_mode]);
-        cm->counts.filterintra[0][mi->bmi[0].as_mode][mi->b_filter_info[0]]++;
-      } else {
-        mi->b_filter_info[0] = mi->b_filter_info[1] = 0;
-      }
-#endif
      mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
          read_intra_mode_y(cm, r, 0);
-#if CONFIG_FILTERINTRA
-      if (is_filter_allowed(mi->bmi[2].as_mode)) {
-        mi->b_filter_info[2] = mi->b_filter_info[3] = mbmi->filterbit =
-            vp9_read(r, cm->fc.filterintra_prob[0][mi->bmi[2].as_mode]);
-        cm->counts.filterintra[0][mi->bmi[2].as_mode][mi->b_filter_info[2]]++;
-      } else {
-        mi->b_filter_info[2] = mi->b_filter_info[3] = mbmi->filterbit = 0;
-      }
-#endif
      break;
    default:
      mbmi->mode = read_intra_mode_y(cm, r, size_group_lookup[bsize]);
-#if CONFIG_FILTERINTRA
-      if (is_filter_allowed(mbmi->mode) && is_filter_enabled(mbmi->tx_size)) {
-        mbmi->filterbit = vp9_read(r,
-            cm->fc.filterintra_prob[mbmi->tx_size][mbmi->mode]);
-        cm->counts.filterintra[mbmi->tx_size][mbmi->mode][mbmi->filterbit]++;
-      } else {
-        mbmi->filterbit = 0;
-      }
-#endif
  }

  mbmi->uv_mode = read_intra_mode_uv(cm, r, mbmi->mode);
-#if CONFIG_FILTERINTRA
-  if (is_filter_allowed(mbmi->uv_mode) &&
-      is_filter_enabled(get_uv_tx_size(mbmi))) {
-    mbmi->uv_filterbit = vp9_read(r,
-        cm->fc.filterintra_prob[get_uv_tx_size(mbmi)][mbmi->uv_mode]);
-    cm->counts.filterintra[get_uv_tx_size(mbmi)]
-                           [mbmi->uv_mode][mbmi->uv_filterbit]++;
-  } else {
-    mbmi->uv_filterbit = 0;
-  }
-#endif
 }

 static INLINE int is_mv_valid(const MV *mv) {
@@ -584,9 +422,6 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
                                       MACROBLOCKD *const xd,
                                       const TileInfo *const tile,
                                       MODE_INFO *const mi,
-#if CONFIG_SUPERTX && CONFIG_EXT_TX
-                                       int supertx_enabled,
-#endif
                                       int mi_row, int mi_col, vp9_reader *r) {
  MB_MODE_INFO *const mbmi = &mi->mbmi;
  const BLOCK_SIZE bsize = mbmi->sb_type;
@@ -629,37 +464,6 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
                      ? read_switchable_interp_filter(cm, xd, r)
                      : cm->interp_filter;

-#if CONFIG_INTERINTRA
-    if ((cm->use_interintra) &&
-        is_interintra_allowed(bsize) &&
-        is_inter_mode(mbmi->mode) &&
-        (mbmi->ref_frame[1] <= INTRA_FRAME)) {
-      mbmi->ref_frame[1] = vp9_read(r, cm->fc.interintra_prob[bsize]) ?
-                           INTRA_FRAME : NONE;
-      cm->counts.interintra[bsize][mbmi->ref_frame[1] == INTRA_FRAME]++;
-#if CONFIG_MASKED_INTERINTRA
-      mbmi->use_masked_interintra = 0;
-#endif
-      if (mbmi->ref_frame[1] == INTRA_FRAME) {
-        mbmi->interintra_mode =
-            read_intra_mode_y(cm, r, size_group_lookup[bsize]);
-        mbmi->interintra_uv_mode = mbmi->interintra_mode;
-#if CONFIG_MASKED_INTERINTRA
-        if (cm->use_masked_interintra && get_mask_bits_interintra(bsize)) {
-          mbmi->use_masked_interintra = vp9_read(r,
-                                          cm->fc.masked_interintra_prob[bsize]);
-          cm->counts.masked_interintra[bsize][mbmi->use_masked_interintra]++;
-          if (mbmi->use_masked_interintra) {
-            mbmi->interintra_mask_index = vp9_read_literal(r,
-                                               get_mask_bits_interintra(bsize));
-            mbmi->interintra_uv_mask_index = mbmi->interintra_mask_index;
-          }
-        }
-#endif
-      }
-    }
-#endif
-
  if (bsize < BLOCK_8X8) {
    const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];  // 1 or 2
    const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];  // 1 or 2
@@ -693,6 +497,10 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
          mi->bmi[j + 2] = mi->bmi[j];
        if (num_4x4_w == 2)
          mi->bmi[j + 1] = mi->bmi[j];
+
+#if CONFIG_TRANSCODE
+        mi->bmi[j].as_mode = b_mode;
+#endif
      }
    }

@@ -704,160 +512,35 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
    xd->corrupted |= !assign_mv(cm, mbmi->mode, mbmi->mv, nearestmv,
                                nearestmv, nearmv, is_compound, allow_hp, r);
  }
-#if CONFIG_MASKED_INTERINTER
-  mbmi->use_masked_interinter = 0;
-  if (cm->use_masked_interinter &&
-      cm->reference_mode != SINGLE_REFERENCE &&
-      is_inter_mode(mbmi->mode) &&
-      get_mask_bits(bsize) &&
-      mbmi->ref_frame[1] > INTRA_FRAME) {
-    mbmi->use_masked_interinter =
-        vp9_read(r, cm->fc.masked_interinter_prob[bsize]);
-    cm->counts.masked_interinter[bsize][mbmi->use_masked_interinter]++;
-    if (mbmi->use_masked_interinter) {
-      mbmi->mask_index = vp9_read_literal(r, get_mask_bits(bsize));
-    }
-  }
-#endif
 }

 static void read_inter_frame_mode_info(VP9_COMMON *const cm,
                                       MACROBLOCKD *const xd,
                                       const TileInfo *const tile,
-#if CONFIG_SUPERTX
-                                       int supertx_enabled,
-#endif
                                       int mi_row, int mi_col, vp9_reader *r) {
  MODE_INFO *const mi = xd->mi[0];
  MB_MODE_INFO *const mbmi = &mi->mbmi;
  int inter_block;
-#if CONFIG_COPY_CODING
-  int num_candidate = 0;
-  MB_MODE_INFO *inter_ref_list[18] = {NULL};
-#endif

  mbmi->mv[0].as_int = 0;
  mbmi->mv[1].as_int = 0;
-
-#if CONFIG_COPY_CODING
-  if (mbmi->sb_type >= BLOCK_8X8)
-    num_candidate = vp9_construct_ref_inter_list(cm, xd, mbmi->sb_type,
-                                               mi_row, mi_col, inter_ref_list);
-  if (mbmi->sb_type >= BLOCK_8X8 && num_candidate > 0) {
-    int ctx = vp9_get_copy_mode_context(xd);
-    int is_copy = vp9_read(r, cm->fc.copy_noref_prob[ctx][mbmi->sb_type]);
-
-    ++cm->counts.copy_noref[ctx][mbmi->sb_type][is_copy];
-    if (!is_copy) {
-      mbmi->copy_mode = NOREF;
-    } else {
-      mbmi->copy_mode = read_copy_mode(cm, r, num_candidate, ctx);
-    }
-  } else {
-    mbmi->copy_mode = NOREF;
-  }
-  if (mbmi->copy_mode != NOREF) {
-    BLOCK_SIZE bsize_backup = mbmi->sb_type;
-    int skip_backup = mbmi->skip;
-    COPY_MODE copy_mode_backup = mbmi->copy_mode;
-#if CONFIG_SUPERTX
-    TX_SIZE tx_size_backup = mbmi->tx_size;
-#endif
-#if CONFIG_EXT_TX
-    EXT_TX_TYPE ext_txfrm_backup = mbmi->ext_txfrm;
-#endif
-
-    inter_block = 1;
-    *mbmi = *inter_ref_list[mbmi->copy_mode - REF0];
-#if CONFIG_MASKED_INTERINTER
-    mbmi->use_masked_interinter = 0;
-#endif
-#if CONFIG_INTERINTRA
-    if (mbmi->ref_frame[1] == INTRA_FRAME)
-      mbmi->ref_frame[1] = NONE;
-#endif
-#if CONFIG_SUPERTX
-    mbmi->tx_size = tx_size_backup;
-#endif
-#if CONFIG_EXT_TX
-    mbmi->ext_txfrm = ext_txfrm_backup;
-#endif
-    mbmi->sb_type = bsize_backup;
-    mbmi->mode = NEARESTMV;
-    mbmi->skip = skip_backup;
-    mbmi->copy_mode = copy_mode_backup;
-  }
-#endif
-
-#if CONFIG_SUPERTX
-  if (!supertx_enabled) {
-#endif
  mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r);
  mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
-
-#if CONFIG_COPY_CODING
-  if (mbmi->copy_mode == NOREF)
-#endif
  inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
  mbmi->tx_size = read_tx_size(cm, xd, cm->tx_mode, mbmi->sb_type,
                               !mbmi->skip || !inter_block, r);
-#if CONFIG_EXT_TX
-  if (inter_block &&
-      mbmi->tx_size <= TX_16X16 &&
-      mbmi->sb_type >= BLOCK_8X8 &&
-#if CONFIG_SUPERTX
-      !supertx_enabled &&
-#endif
-      !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
-      !mbmi->skip) {
-    mbmi->ext_txfrm = vp9_read(r, cm->fc.ext_tx_prob);
-    if (!cm->frame_parallel_decoding_mode)
-      ++cm->counts.ext_tx[mbmi->ext_txfrm];
-  } else {
-    mbmi->ext_txfrm = NORM;
-  }
-#endif
-#if CONFIG_SUPERTX
-  } else {
-    const int ctx = vp9_get_intra_inter_context(xd);
-    mbmi->segment_id = 0;
-    inter_block = 1;
-    if (!cm->frame_parallel_decoding_mode)
-#if CONFIG_COPY_CODING
-      if (mbmi->copy_mode == NOREF)
-#endif
-      ++cm->counts.intra_inter[ctx][1];
-  }
-#endif

-#if CONFIG_COPY_CODING
-  if (mbmi->copy_mode == NOREF) {
-#endif
  if (inter_block)
-    read_inter_block_mode_info(cm, xd, tile, mi,
-#if CONFIG_SUPERTX && CONFIG_EXT_TX
-                               supertx_enabled,
-#endif
-                               mi_row, mi_col, r);
+    read_inter_block_mode_info(cm, xd, tile, mi, mi_row, mi_col, r);
  else
    read_intra_block_mode_info(cm, mi, r);
-#if CONFIG_COPY_CODING
-  }
-#endif
 }

 void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd,
                        const TileInfo *const tile,
-#if CONFIG_SUPERTX
-                        int supertx_enabled,
-#endif
                        int mi_row, int mi_col, vp9_reader *r) {
  if (frame_is_intra_only(cm))
    read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r);
  else
-    read_inter_frame_mode_info(cm, xd, tile,
-#if CONFIG_SUPERTX
-                               supertx_enabled,
-#endif
-                               mi_row, mi_col, r);
+    read_inter_frame_mode_info(cm, xd, tile, mi_row, mi_col, r);
 }
--- a/vp9/decoder/vp9_decodemv.h
+++ b/vp9/decoder/vp9_decodemv.h
@@ -21,16 +21,8 @@ struct TileInfo;

 void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd,
                        const struct TileInfo *const tile,
-#if CONFIG_SUPERTX
-                        int supertx_enabled,
-#endif
                        int mi_row, int mi_col, vp9_reader *r);

-#if CONFIG_SUPERTX
-int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
-              int segment_id, vp9_reader *r);
-#endif
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -32,11 +32,14 @@
 #include "vp9/decoder/vp9_detokenize.h"
 #include "vp9/decoder/vp9_dthread.h"

+#include <stdio.h>
+
 static void initialize_dec() {
  static int init_done = 0;

  if (!init_done) {
    vp9_init_neighbors();
+    vp9_init_quant_tables();
    init_done = 1;
  }
 }
@@ -78,6 +81,9 @@ VP9Decoder *vp9_decoder_create() {

  vp9_worker_init(&pbi->lf_worker);

+#if CONFIG_TRANSCODE && WRITE_MI_ARRAY
+  cm->mi_array_pf = fopen("mode_info_array_2.bin", "rb");
+#endif
  return pbi;
 }

@@ -85,6 +91,10 @@ void vp9_decoder_remove(VP9Decoder *pbi) {
  VP9_COMMON *const cm = &pbi->common;
  int i;

+#if CONFIG_TRANSCODE && WRITE_MI_ARRAY
+  fclose(cm->mi_array_pf);
+#endif
+
  vp9_remove_common(cm);
  vp9_worker_end(&pbi->lf_worker);
  vpx_free(pbi->lf_worker.data1);
@@ -210,10 +220,7 @@ static void swap_frame_buffers(VP9Decoder *pbi) {
  }

  cm->frame_to_show = get_frame_new_buffer(cm);
-
-  if (!pbi->frame_parallel_decode || !cm->show_frame) {
-    --cm->frame_bufs[cm->new_fb_idx].ref_count;
-  }
+  cm->frame_bufs[cm->new_fb_idx].ref_count--;

  // Invalidate these references until the next frame starts.
  for (ref_index = 0; ref_index < 3; ref_index++)
@@ -242,9 +249,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
  }

  // Check if the previous frame was a frame without any references to it.
-  // Release frame buffer if not decoding in frame parallel mode.
-  if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0
-      && cm->frame_bufs[cm->new_fb_idx].ref_count == 0)
+  if (cm->new_fb_idx >= 0 && cm->frame_bufs[cm->new_fb_idx].ref_count == 0)
    cm->release_fb_cb(cm->cb_priv,
                      &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer);
  cm->new_fb_idx = get_free_fb(cm);
@@ -259,10 +264,10 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
    // TODO(jkoleszar): Error concealment is undefined and non-normative
    // at this point, but if it becomes so, [0] may not always be the correct
    // thing to do here.
-    if (cm->frame_refs[0].idx != INT_MAX && cm->frame_refs[0].buf != NULL)
+    if (cm->frame_refs[0].idx != INT_MAX)
      cm->frame_refs[0].buf->corrupted = 1;

-    if (cm->new_fb_idx > 0 && cm->frame_bufs[cm->new_fb_idx].ref_count > 0)
+    if (cm->frame_bufs[cm->new_fb_idx].ref_count > 0)
      cm->frame_bufs[cm->new_fb_idx].ref_count--;

    return -1;
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -27,6 +27,11 @@
 extern "C" {
 #endif

+#if CONFIG_TRANSCODE
+#define WRITE_MI_ARRAY 0
+#define READ_MI_ARRAY  0
+#endif
+
 // TODO(hkuang): combine this with TileWorkerData.
 typedef struct TileData {
  VP9_COMMON *cm;
@@ -43,8 +48,6 @@ typedef struct VP9Decoder {

  int refresh_frame_flags;

-  int frame_parallel_decode;  // frame-based threading.
-
  VP9Worker lf_worker;
  VP9Worker *tile_workers;
  int num_tile_workers;
--- a/vp9/decoder/vp9_dthread.h
+++ b/vp9/decoder/vp9_dthread.h
@@ -40,23 +40,6 @@ typedef struct VP9LfSyncData {
  int sync_range;
 } VP9LfSync;

-// WorkerData for the FrameWorker thread. It contains all the information of
-// the worker and decode structures for decoding a frame.
-typedef struct FrameWorkerData {
-  struct VP9Decoder *pbi;
-  const uint8_t *data;
-  const uint8_t *data_end;
-  size_t data_size;
-  void *user_priv;
-  int result;
-  int worker_id;
-
-  // scratch_buffer is used in frame parallel mode only.
-  // It is used to make a copy of the compressed data.
-  uint8_t *scratch_buffer;
-  size_t scratch_buffer_size;
-} FrameWorkerData;
-
 // Allocate memory for loopfilter row synchronization.
 void vp9_loop_filter_alloc(struct VP9Common *cm, VP9LfSync *lf_sync,
                           int rows, int width);
--- a/vp9/decoder/vp9_read_bit_buffer.c
+++ b/vp9/decoder/vp9_read_bit_buffer.c
@@ -10,7 +10,7 @@
 #include "vp9/decoder/vp9_read_bit_buffer.h"

 size_t vp9_rb_bytes_read(struct vp9_read_bit_buffer *rb) {
-  return (rb->bit_offset + CHAR_BIT - 1) / CHAR_BIT;
+  return rb->bit_offset / CHAR_BIT + (rb->bit_offset % CHAR_BIT > 0);
 }

 int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb) {
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -38,32 +38,12 @@ static struct vp9_token intra_mode_encodings[INTRA_MODES];
 static struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS];
 static struct vp9_token partition_encodings[PARTITION_TYPES];
 static struct vp9_token inter_mode_encodings[INTER_MODES];
-#if CONFIG_COPY_CODING
-static struct vp9_token copy_mode_encodings_l2[2];
-static struct vp9_token copy_mode_encodings[COPY_MODE_COUNT - 1];
-#endif
-
-#if CONFIG_SUPERTX
-static int vp9_check_supertx(VP9_COMMON *cm, int mi_row, int mi_col,
-                             BLOCK_SIZE bsize) {
-  MODE_INFO **mi;
-
-  mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
-
-  return mi[0]->mbmi.tx_size == bsize_to_tx_size(bsize) &&
-         mi[0]->mbmi.sb_type < bsize;
-}
-#endif

 void vp9_entropy_mode_init() {
  vp9_tokens_from_tree(intra_mode_encodings, vp9_intra_mode_tree);
  vp9_tokens_from_tree(switchable_interp_encodings, vp9_switchable_interp_tree);
  vp9_tokens_from_tree(partition_encodings, vp9_partition_tree);
  vp9_tokens_from_tree(inter_mode_encodings, vp9_inter_mode_tree);
-#if CONFIG_COPY_CODING
-  vp9_tokens_from_tree(copy_mode_encodings_l2, vp9_copy_mode_tree_l2);
-  vp9_tokens_from_tree(copy_mode_encodings, vp9_copy_mode_tree);
-#endif
 }

 static void write_intra_mode(vp9_writer *w, PREDICTION_MODE mode,
@@ -78,21 +58,6 @@ static void write_inter_mode(vp9_writer *w, PREDICTION_MODE mode,
                  &inter_mode_encodings[INTER_OFFSET(mode)]);
 }

-#if CONFIG_COPY_CODING
-static void write_copy_mode(VP9_COMMON *cm, vp9_writer *w, COPY_MODE mode,
-                            int inter_ref_count, int copy_mode_context) {
-  if (inter_ref_count == 2) {
-    vp9_write_token(w, vp9_copy_mode_tree_l2,
-                    cm->fc.copy_mode_probs_l2[copy_mode_context],
-                    &copy_mode_encodings_l2[mode - REF0]);
-  } else if (inter_ref_count > 2) {
-    vp9_write_token(w, vp9_copy_mode_tree,
-                    cm->fc.copy_mode_probs[copy_mode_context],
-                    &copy_mode_encodings[mode - REF0]);
-  }
-}
-#endif
-
 static void encode_unsigned_max(struct vp9_write_bit_buffer *wb,
                                int data, int max) {
  vp9_wb_write_literal(wb, data, get_unsigned_bits(max));
@@ -260,9 +225,6 @@ static void write_ref_frames(const VP9_COMP *cpi, vp9_writer *w) {
 }

 static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
-#if CONFIG_SUPERTX
-                                int supertx_enabled,
-#endif
                                vp9_writer *w) {
  VP9_COMMON *const cm = &cpi->common;
  const nmv_context *nmvc = &cm->fc.nmvc;
@@ -277,19 +239,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
  const int is_inter = is_inter_block(mbmi);
  const int is_compound = has_second_ref(mbmi);
  int skip, ref;
-#if CONFIG_COPY_CODING
-  int copy_mode_context = vp9_get_copy_mode_context(xd);
-#endif

-#if CONFIG_COPY_CODING
-  if (bsize >= BLOCK_8X8 && mbmi->inter_ref_count > 0) {
-      vp9_write(w, mbmi->copy_mode != NOREF,
-                cm->fc.copy_noref_prob[copy_mode_context][bsize]);
-      if (mbmi->copy_mode != NOREF)
-        write_copy_mode(cm, w, mbmi->copy_mode, mbmi->inter_ref_count,
-                        copy_mode_context);
-  }
-#endif
  if (seg->update_map) {
    if (seg->temporal_update) {
      const int pred_flag = mbmi->seg_id_predicted;
@@ -302,57 +252,20 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
    }
  }

-#if CONFIG_SUPERTX
-  if (!supertx_enabled)
-#endif
  skip = write_skip(cpi, segment_id, mi, w);
-#if CONFIG_SUPERTX
-  else
-    skip = mbmi->skip;
-#endif

-#if CONFIG_SUPERTX
-  if (!supertx_enabled) {
-#endif
-#if CONFIG_COPY_CODING
-  if (mbmi->copy_mode == NOREF)
-#endif
  if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
    vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
-#if CONFIG_SUPERTX
-  }
-#endif

  if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
-#if CONFIG_SUPERTX
-      !supertx_enabled &&
-#endif
      !(is_inter &&
        (skip || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) {
    write_selected_tx_size(cpi, mbmi->tx_size, bsize, w);
  }
-#if CONFIG_EXT_TX
-    if (is_inter &&
-        mbmi->tx_size <= TX_16X16 &&
-        bsize >= BLOCK_8X8 &&
-#if CONFIG_SUPERTX
-        !supertx_enabled &&
-#endif
-        !mbmi->skip &&
-        !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
-      vp9_write(w, mbmi->ext_txfrm, cm->fc.ext_tx_prob);
-    }
-#endif

  if (!is_inter) {
    if (bsize >= BLOCK_8X8) {
      write_intra_mode(w, mode, cm->fc.y_mode_prob[size_group_lookup[bsize]]);
-#if CONFIG_FILTERINTRA
-      if (is_filter_allowed(mode) && is_filter_enabled(mbmi->tx_size)) {
-        vp9_write(w, mbmi->filterbit,
-                  cm->fc.filterintra_prob[mbmi->tx_size][mode]);
-      }
-#endif
    } else {
      int idx, idy;
      const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
@@ -361,28 +274,11 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
        for (idx = 0; idx < 2; idx += num_4x4_w) {
          const PREDICTION_MODE b_mode = mi->bmi[idy * 2 + idx].as_mode;
          write_intra_mode(w, b_mode, cm->fc.y_mode_prob[0]);
-#if CONFIG_FILTERINTRA
-          if (is_filter_allowed(b_mode)) {
-            vp9_write(w, mi->b_filter_info[idy * 2 + idx],
-                      cm->fc.filterintra_prob[0][b_mode]);
-          }
-#endif
        }
      }
    }
    write_intra_mode(w, mbmi->uv_mode, cm->fc.uv_mode_prob[mode]);
-#if CONFIG_FILTERINTRA
-    if (is_filter_allowed(mbmi->uv_mode) &&
-        is_filter_enabled(get_uv_tx_size(mbmi))) {
-      vp9_write(w, mbmi->uv_filterbit,
-                cm->fc.filterintra_prob[get_uv_tx_size(mbmi)][mbmi->uv_mode]);
-    }
-#endif
-#if !CONFIG_COPY_CODING
  } else {
-#else
-  } else if (mbmi->copy_mode == NOREF) {
-#endif
    const int mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
    const vp9_prob *const inter_probs = cm->fc.inter_mode_probs[mode_ctx];
    write_ref_frames(cpi, w);
@@ -404,32 +300,6 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
      assert(mbmi->interp_filter == cm->interp_filter);
    }

-#if CONFIG_INTERINTRA
-    if ((cm->use_interintra) &&
-        cpi->common.reference_mode != COMPOUND_REFERENCE &&
-        is_interintra_allowed(bsize) &&
-        is_inter_mode(mode) &&
-        (mbmi->ref_frame[1] <= INTRA_FRAME)) {
-        vp9_write(w, mbmi->ref_frame[1] == INTRA_FRAME,
-                  cm->fc.interintra_prob[bsize]);
-        if (mbmi->ref_frame[1] == INTRA_FRAME) {
-          write_intra_mode(w, mbmi->interintra_mode,
-                           cm->fc.y_mode_prob[size_group_lookup[bsize]]);
-#if CONFIG_MASKED_INTERINTRA
-        if (get_mask_bits_interintra(bsize) &&
-            cm->use_masked_interintra) {
-          vp9_write(w, mbmi->use_masked_interintra,
-                    cm->fc.masked_interintra_prob[bsize]);
-          if (mbmi->use_masked_interintra) {
-            vp9_write_literal(w, mbmi->interintra_mask_index,
-                              get_mask_bits_interintra(bsize));
-          }
-        }
-#endif
-      }
-    }
-#endif
-
    if (bsize < BLOCK_8X8) {
      const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
      const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -456,18 +326,6 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
                        allow_hp);
      }
    }
-#if CONFIG_MASKED_INTERINTER
-  if (cm->use_masked_interinter &&
-      cm->reference_mode != SINGLE_REFERENCE &&
-      is_inter_mode(mode) &&
-      get_mask_bits(mbmi->sb_type) &&
-      mbmi->ref_frame[1] > INTRA_FRAME) {
-    vp9_write(w, mbmi->use_masked_interinter,
-              cm->fc.masked_interinter_prob[bsize]);
-    if (mbmi->use_masked_interinter)
-      vp9_write_literal(w, mbmi->mask_index, get_mask_bits(mbmi->sb_type));
-  }
-#endif
  }
 }

@@ -492,11 +350,6 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8,

  if (bsize >= BLOCK_8X8) {
    write_intra_mode(w, mbmi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0));
-#if CONFIG_FILTERINTRA
-    if (is_filter_allowed(mbmi->mode) && is_filter_enabled(mbmi->tx_size))
-      vp9_write(w, mbmi->filterbit,
-                cm->fc.filterintra_prob[mbmi->tx_size][mbmi->mode]);
-#endif
  } else {
    const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
    const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -507,29 +360,15 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8,
        const int block = idy * 2 + idx;
        write_intra_mode(w, mi->bmi[block].as_mode,
                         get_y_mode_probs(mi, above_mi, left_mi, block));
-#if CONFIG_FILTERINTRA
-        if (is_filter_allowed(mi->bmi[block].as_mode))
-          vp9_write(w, mi->b_filter_info[block],
-                    cm->fc.filterintra_prob[0][mi->bmi[block].as_mode]);
-#endif
      }
    }
  }

  write_intra_mode(w, mbmi->uv_mode, vp9_kf_uv_mode_prob[mbmi->mode]);
-#if CONFIG_FILTERINTRA
-  if (is_filter_allowed(mbmi->uv_mode) &&
-      is_filter_enabled(get_uv_tx_size(mbmi)))
-    vp9_write(w, mbmi->uv_filterbit,
-              cm->fc.filterintra_prob[get_uv_tx_size(mbmi)][mbmi->uv_mode]);
-#endif
 }

 static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
                          vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end,
-#if CONFIG_SUPERTX
-                          int supertx_enabled,
-#endif
                          int mi_row, int mi_col) {
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
@@ -545,21 +384,11 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
  if (frame_is_intra_only(cm)) {
    write_mb_modes_kf(cpi, xd->mi, w);
  } else {
-#if CONFIG_SUPERTX
-    pack_inter_mode_mvs(cpi, m, supertx_enabled, w);
-#else
    pack_inter_mode_mvs(cpi, m, w);
-#endif
  }

-#if CONFIG_SUPERTX
-  if (!supertx_enabled) {
-#endif
  assert(*tok < tok_end);
  pack_mb_tokens(w, tok, tok_end);
-#if CONFIG_SUPERTX
-  }
-#endif
 }

 static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd,
@@ -586,9 +415,6 @@ static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd,
 static void write_modes_sb(VP9_COMP *cpi,
                           const TileInfo *const tile,
                           vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end,
-#if CONFIG_SUPERTX
-                           int pack_token, int supertx_enabled,
-#endif
                           int mi_row, int mi_col, BLOCK_SIZE bsize) {
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
@@ -605,105 +431,36 @@ static void write_modes_sb(VP9_COMP *cpi,
  partition = partition_lookup[bsl][m->mbmi.sb_type];
  write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w);
  subsize = get_subsize(bsize, partition);
-#if CONFIG_SUPERTX
-  xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
-  set_mi_row_col(xd, tile,
-                 mi_row, num_8x8_blocks_high_lookup[bsize],
-                 mi_col, num_8x8_blocks_wide_lookup[bsize],
-                 cm->mi_rows, cm->mi_cols);
-  if (!supertx_enabled && cm->frame_type != KEY_FRAME &&
-      partition != PARTITION_NONE && bsize <= BLOCK_32X32) {
-    TX_SIZE supertx_size = bsize_to_tx_size(bsize);  // b_width_log2(bsize);
-    vp9_prob prob = partition == PARTITION_SPLIT ?
-                    cm->fc.supertxsplit_prob[supertx_size] :
-                    cm->fc.supertx_prob[supertx_size];
-    supertx_enabled = (xd->mi[0]->mbmi.tx_size == supertx_size);
-    vp9_write(w, supertx_enabled, prob);
-    if (supertx_enabled) {
-      vp9_write(w, xd->mi[0]->mbmi.skip, vp9_get_skip_prob(cm, xd));
-#if CONFIG_EXT_TX
-      if (supertx_size <= TX_16X16 && !xd->mi[0]->mbmi.skip)
-        vp9_write(w, xd->mi[0]->mbmi.ext_txfrm, cm->fc.ext_tx_prob);
-#endif
-    }
-  }
-#endif
  if (subsize < BLOCK_8X8) {
-    write_modes_b(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
-                  supertx_enabled,
-#endif
-                  mi_row, mi_col);
+    write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
  } else {
    switch (partition) {
      case PARTITION_NONE:
-        write_modes_b(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
-                      supertx_enabled,
-#endif
-                      mi_row, mi_col);
+        write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
        break;
      case PARTITION_HORZ:
-        write_modes_b(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
-                      supertx_enabled,
-#endif
-                      mi_row, mi_col);
+        write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
        if (mi_row + bs < cm->mi_rows)
-          write_modes_b(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
-                        supertx_enabled,
-#endif
-                        mi_row + bs, mi_col);
+          write_modes_b(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col);
        break;
      case PARTITION_VERT:
-        write_modes_b(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
-                      supertx_enabled,
-#endif
-                      mi_row, mi_col);
+        write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
        if (mi_col + bs < cm->mi_cols)
-          write_modes_b(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
-                        supertx_enabled,
-#endif
-                        mi_row, mi_col + bs);
+          write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs);
        break;
      case PARTITION_SPLIT:
-        write_modes_sb(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
-                       !supertx_enabled, supertx_enabled,
-#endif
-                       mi_row, mi_col, subsize);
-        write_modes_sb(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
-                       !supertx_enabled, supertx_enabled,
-#endif
-                       mi_row, mi_col + bs,
+        write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize);
+        write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs,
                       subsize);
-        write_modes_sb(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
-                       !supertx_enabled, supertx_enabled,
-#endif
-                       mi_row + bs, mi_col,
+        write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col,
                       subsize);
-        write_modes_sb(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
-                       !supertx_enabled, supertx_enabled,
-#endif
-                       mi_row + bs, mi_col + bs,
+        write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col + bs,
                       subsize);
        break;
      default:
        assert(0);
    }
  }
-#if CONFIG_SUPERTX
-  if (partition != PARTITION_NONE && supertx_enabled && pack_token) {
-    assert(*tok < tok_end);
-    pack_mb_tokens(w, tok, tok_end);
-  }
-#endif

  // update partition context
  if (bsize >= BLOCK_8X8 &&
@@ -721,11 +478,7 @@ static void write_modes(VP9_COMP *cpi,
    vp9_zero(cpi->mb.e_mbd.left_seg_context);
    for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
         mi_col += MI_BLOCK_SIZE)
-      write_modes_sb(cpi, tile, w, tok, tok_end,
-#if CONFIG_SUPERTX
-                     1, 0,
-#endif
-                     mi_row, mi_col,
+      write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col,
                     BLOCK_64X64);
  }
 }
@@ -1137,8 +890,14 @@ static void write_tile_info(VP9_COMMON *cm, struct vp9_write_bit_buffer *wb) {
 }

 static int get_refresh_mask(VP9_COMP *cpi) {
-    if (!cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
-        cpi->rc.is_src_frame_alt_ref && !cpi->use_svc) {
+    // Should the GF or ARF be updated using the transmitted frame or buffer
+#if CONFIG_MULTIPLE_ARF
+    if (!cpi->multi_arf_enabled && cpi->refresh_golden_frame &&
+        !cpi->refresh_alt_ref_frame) {
+#else
+    if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame &&
+        !cpi->use_svc) {
+#endif
      // Preserve the previously existing golden frame and update the frame in
      // the alt ref slot instead. This is highly specific to the use of
      // alt-ref as a forward reference, and this needs to be generalized as
@@ -1151,10 +910,15 @@ static int get_refresh_mask(VP9_COMP *cpi) {
             (cpi->refresh_golden_frame << cpi->alt_fb_idx);
    } else {
      int arf_idx = cpi->alt_fb_idx;
-      if ((cpi->pass == 2) && cpi->multi_arf_allowed) {
-        const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-        arf_idx = gf_group->arf_update_idx[gf_group->index];
+#if CONFIG_MULTIPLE_ARF
+      // Determine which ARF buffer to use to encode this ARF frame.
+      if (cpi->multi_arf_enabled) {
+        int sn = cpi->sequence_number;
+        arf_idx = (cpi->frame_coding_order[sn] < 0) ?
+            cpi->arf_buffer_idx[sn + 1] :
+            cpi->arf_buffer_idx[sn];
      }
+#endif
      return (cpi->refresh_last_frame << cpi->lst_fb_idx) |
             (cpi->refresh_golden_frame << cpi->gld_fb_idx) |
             (cpi->refresh_alt_ref_frame << arf_idx);
@@ -1423,104 +1187,6 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
                       cm->counts.partition[i], PARTITION_TYPES, &header_bc);

    vp9_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc);
-
-#if CONFIG_EXT_TX
-    vp9_cond_prob_diff_update(&header_bc, &fc->ext_tx_prob, cm->counts.ext_tx);
-#endif
-
-#if CONFIG_MASKED_INTERINTER
-    if (cm->reference_mode != SINGLE_REFERENCE) {
-      if (!cpi->dummy_packing && cm->use_masked_interinter) {
-        cm->use_masked_interinter = 0;
-        for (i = 0; i < BLOCK_SIZES; i++)
-          if (get_mask_bits(i) && (cm->counts.masked_interinter[i][1] > 0)) {
-            cm->use_masked_interinter = 1;
-            break;
-          }
-      }
-      vp9_write_bit(&header_bc, cm->use_masked_interinter);
-      if (cm->use_masked_interinter) {
-        for (i = 0; i < BLOCK_SIZES; i++)
-          if (get_mask_bits(i))
-            vp9_cond_prob_diff_update(&header_bc,
-                                      &fc->masked_interinter_prob[i],
-                                      cm->counts.masked_interinter[i]);
-      } else {
-        vp9_zero(cm->counts.masked_interinter);
-      }
-    } else {
-      if (!cpi->dummy_packing)
-        cm->use_masked_interinter = 0;
-      vp9_zero(cm->counts.masked_interinter);
-    }
-#endif
-
-#if CONFIG_INTERINTRA
-    if (cm->reference_mode != COMPOUND_REFERENCE) {
-      if (!cpi->dummy_packing && cm->use_interintra) {
-        cm->use_interintra = 0;
-        for (i = 0; i < BLOCK_SIZES; i++) {
-          if (is_interintra_allowed(i) && (cm->counts.interintra[i][1] > 0)) {
-            cm->use_interintra = 1;
-            break;
-          }
-        }
-      }
-      vp9_write_bit(&header_bc, cm->use_interintra);
-      if (cm->use_interintra) {
-        for (i = 0; i < BLOCK_SIZES; i++) {
-          if (is_interintra_allowed(i)) {
-            vp9_cond_prob_diff_update(&header_bc,
-                                      &fc->interintra_prob[i],
-                                      cm->counts.interintra[i]);
-          }
-        }
-#if CONFIG_MASKED_INTERINTRA
-        if (!cpi->dummy_packing && cm->use_masked_interintra) {
-          cm->use_masked_interintra = 0;
-          for (i = 0; i < BLOCK_SIZES; i++) {
-            if (is_interintra_allowed(i) && get_mask_bits_interintra(i) &&
-                (cm->counts.masked_interintra[i][1] > 0)) {
-              cm->use_masked_interintra = 1;
-              break;
-            }
-          }
-        }
-        vp9_write_bit(&header_bc, cm->use_masked_interintra);
-        if (cm->use_masked_interintra) {
-          for (i = 0; i < BLOCK_SIZES; i++) {
-            if (is_interintra_allowed(i) && get_mask_bits_interintra(i))
-              vp9_cond_prob_diff_update(&header_bc,
-                                        &fc->masked_interintra_prob[i],
-                                        cm->counts.masked_interintra[i]);
-          }
-        } else {
-          vp9_zero(cm->counts.masked_interintra);
-        }
-#endif
-      } else {
-        vp9_zero(cm->counts.interintra);
-      }
-    } else {
-      if (!cpi->dummy_packing)
-        cm->use_interintra = 0;
-      vp9_zero(cm->counts.interintra);
-#if CONFIG_MASKED_INTERINTRA
-      if (!cpi->dummy_packing)
-        cm->use_masked_interintra = 0;
-      vp9_zero(cm->counts.masked_interintra);
-#endif
-    }
-#endif
-
-#if CONFIG_COPY_CODING
-    for (i = 0; i < COPY_MODE_CONTEXTS; i++) {
-      prob_diff_update(vp9_copy_mode_tree_l2, cm->fc.copy_mode_probs_l2[i],
-                       cm->counts.copy_mode_l2[i], 2, &header_bc);
-      prob_diff_update(vp9_copy_mode_tree, cm->fc.copy_mode_probs[i],
-                       cm->counts.copy_mode[i], 3, &header_bc);
-    }
-#endif
  }

  vp9_stop_encode(&header_bc);
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -28,7 +28,6 @@ struct macroblock_plane {
  struct buf_2d src;

  // Quantizer setings
-  int16_t *quant_fp;
  int16_t *quant;
  int16_t *quant_shift;
  int16_t *zbin;
@@ -49,7 +48,7 @@ struct macroblock {

  MACROBLOCKD e_mbd;
  int skip_block;
-  int select_tx_size;
+  int select_txfm_size;
  int skip_recode;
  int skip_optimize;
  int q_index;
@@ -93,6 +92,8 @@ struct macroblock {

  int encode_breakout;

+  int in_active_map;
+
  // note that token_costs is the cost when eob node is skipped
  vp9_coeff_cost token_costs[TX_SIZES];

@@ -104,9 +105,6 @@ struct macroblock {
  int use_lp32x32fdct;
  int skip_encode;

-  // skip forward transform and quantization
-  int skip_txfm;
-
  // Used to store sub partition's choices.
  MV pred_mv[MAX_REF_FRAMES];

--- a/vp9/encoder/vp9_context_tree.c
+++ b/vp9/encoder/vp9_context_tree.c
@@ -100,10 +100,15 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, VP9_COMP *cpi) {
  vpx_free(cpi->leaf_tree);
  CHECK_MEM_ERROR(cm, cpi->leaf_tree, vpx_calloc(leaf_nodes,
                                                 sizeof(*cpi->leaf_tree)));
+#if CONFIG_TRANSCODE
+  vpx_memset(cpi->leaf_tree, 0, sizeof(*cpi->leaf_tree));
+#endif
  vpx_free(cpi->pc_tree);
  CHECK_MEM_ERROR(cm, cpi->pc_tree, vpx_calloc(tree_nodes,
                                               sizeof(*cpi->pc_tree)));
-
+#if CONFIG_TRANSCODE
+  vpx_memset(cpi->pc_tree, 0, sizeof(*cpi->pc_tree));
+#endif
  this_pc = &cpi->pc_tree[0];
  this_leaf = &cpi->leaf_tree[0];

--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -33,7 +33,6 @@ typedef struct {
  int is_coded;
  int num_4x4_blk;
  int skip;
-  int skip_txfm;
  int best_mode_index;
  int hybrid_pred_diff;
  int comp_pred_diff;
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -43,17 +43,6 @@ static void fdct4(const int16_t *input, int16_t *output) {
  output[3] = fdct_round_shift(temp2);
 }

-void vp9_fdct4x4_1_c(const int16_t *input, int16_t *output, int stride) {
-  int r, c;
-  int16_t sum = 0;
-  for (r = 0; r < 4; ++r)
-    for (c = 0; c < 4; ++c)
-      sum += input[r * stride + c];
-
-  output[0] = sum << 1;
-  output[1] = 0;
-}
-
 void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
  // The 2D transform is done with two passes which are actually pretty
  // similar. In the first one, we transform the columns and transpose
@@ -251,17 +240,6 @@ static void fdct8(const int16_t *input, int16_t *output) {
  output[7] = fdct_round_shift(t3);
 }

-void vp9_fdct8x8_1_c(const int16_t *input, int16_t *output, int stride) {
-  int r, c;
-  int16_t sum = 0;
-  for (r = 0; r < 8; ++r)
-    for (c = 0; c < 8; ++c)
-      sum += input[r * stride + c];
-
-  output[0] = sum;
-  output[1] = 0;
-}
-
 void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) {
  int i, j;
  int16_t intermediate[64];
@@ -333,17 +311,6 @@ void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) {
  }
 }

-void vp9_fdct16x16_1_c(const int16_t *input, int16_t *output, int stride) {
-  int r, c;
-  int16_t sum = 0;
-  for (r = 0; r < 16; ++r)
-    for (c = 0; c < 16; ++c)
-      sum += input[r * stride + c];
-
-  output[0] = sum >> 1;
-  output[1] = 0;
-}
-
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
  // The 2D transform is done with two passes which are actually pretty
  // similar. In the first one, we transform the columns and transpose
@@ -1362,17 +1329,6 @@ static void fdct32(const int *input, int *output, int round) {
  output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
 }

-void vp9_fdct32x32_1_c(const int16_t *input, int16_t *output, int stride) {
-  int r, c;
-  int16_t sum = 0;
-  for (r = 0; r < 32; ++r)
-    for (c = 0; c < 32; ++c)
-      sum += input[r * stride + c];
-
-  output[0] = sum >> 3;
-  output[1] = 0;
-}
-
 void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {
  int i, j;
  int output[32 * 32];
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -1,199 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <assert.h>
-#include "vpx_scale/yv12config.h"
-#include "vpx/vpx_integer.h"
-#include "vp9/encoder/vp9_denoiser.h"
-
-static const int widths[]  = {4, 4, 8, 8,  8, 16, 16, 16, 32, 32, 32, 64, 64};
-static const int heights[] = {4, 8, 4, 8, 16,  8, 16, 32, 16, 32, 64, 32, 64};
-
-int vp9_denoiser_filter() {
-  return 0;
-}
-
-static int update_running_avg(const uint8_t *mc_avg, int mc_avg_stride,
-                              uint8_t *avg, int avg_stride,
-                              const uint8_t *sig, int sig_stride,
-                              int increase_denoising, BLOCK_SIZE bs) {
-  int r, c;
-  int diff, adj, absdiff;
-  int shift_inc1 = 0, shift_inc2 = 1;
-  int adj_val[] = {3, 4, 6};
-  int total_adj = 0;
-
-  if (increase_denoising) {
-    shift_inc1 = 1;
-    shift_inc2 = 2;
-  }
-
-  for (r = 0; r < heights[bs]; ++r) {
-    for (c = 0; c < widths[bs]; ++c) {
-      diff = mc_avg[c] - sig[c];
-      absdiff = abs(diff);
-
-      if (absdiff <= 3 + shift_inc1) {
-        avg[c] = mc_avg[c];
-        total_adj += diff;
-      } else {
-        switch (absdiff) {
-          case 4: case 5: case 6: case 7:
-            adj = adj_val[0];
-            break;
-          case 8: case 9: case 10: case 11:
-          case 12: case 13: case 14: case 15:
-            adj = adj_val[1];
-            break;
-          default:
-            adj = adj_val[2];
-        }
-        if (diff > 0) {
-          avg[c] = MIN(UINT8_MAX, sig[c] + adj);
-          total_adj += adj;
-        } else {
-          avg[c] = MAX(0, sig[c] - adj);
-          total_adj -= adj;
-        }
-      }
-    }
-    sig += sig_stride;
-    avg += avg_stride;
-    mc_avg += mc_avg_stride;
-  }
-  return total_adj;
-}
-
-static uint8_t *block_start(uint8_t *framebuf, int stride,
-                            int mi_row, int mi_col) {
-  return framebuf + (stride * mi_row * 8) + (mi_col * 8);
-}
-
-void copy_block(uint8_t *dest, int dest_stride,
-                uint8_t *src, int src_stride, BLOCK_SIZE bs) {
-  int r, c;
-  for (r = 0; r < heights[bs]; ++r) {
-    for (c = 0; c < widths[bs]; ++c) {
-      dest[c] = src[c];
-    }
-    dest += dest_stride;
-    src += src_stride;
-  }
-}
-
-void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
-                          int mi_row, int mi_col, BLOCK_SIZE bs) {
-  int decision = COPY_BLOCK;
-
-  YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
-  YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
-  uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
-  uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride,
-                                          mi_row, mi_col);
-  struct buf_2d src = mb->plane[0].src;
-
-
-  update_running_avg(mc_avg_start, mc_avg.y_stride, avg_start, avg.y_stride,
-                     mb->plane[0].src.buf, mb->plane[0].src.stride, 0, bs);
-
-  if (decision == FILTER_BLOCK) {
-    // TODO(tkopp)
-  }
-  if (decision == COPY_BLOCK) {
-    copy_block(avg_start, avg.y_stride, src.buf, src.stride, bs);
-  }
-}
-
-static void copy_frame(YV12_BUFFER_CONFIG dest, const YV12_BUFFER_CONFIG src) {
-  int r, c;
-  const uint8_t *srcbuf = src.y_buffer;
-  uint8_t *destbuf = dest.y_buffer;
-  assert(dest.y_width == src.y_width);
-  assert(dest.y_height == src.y_height);
-
-  for (r = 0; r < dest.y_height; ++r) {
-    for (c = 0; c < dest.y_width; ++c) {
-      destbuf[c] = srcbuf[c];
-    }
-    destbuf += dest.y_stride;
-    srcbuf += src.y_stride;
-  }
-}
-
-void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
-                                    YV12_BUFFER_CONFIG src,
-                                    FRAME_TYPE frame_type,
-                                    int refresh_alt_ref_frame,
-                                    int refresh_golden_frame,
-                                    int refresh_last_frame) {
-  if (frame_type == KEY_FRAME) {
-    int i;
-    copy_frame(denoiser->running_avg_y[LAST_FRAME], src);
-    for (i = 2; i < MAX_REF_FRAMES - 1; i++) {
-      copy_frame(denoiser->running_avg_y[i],
-                 denoiser->running_avg_y[LAST_FRAME]);
-    }
-  } else {  /* For non key frames */
-    if (refresh_alt_ref_frame) {
-      copy_frame(denoiser->running_avg_y[ALTREF_FRAME],
-                 denoiser->running_avg_y[INTRA_FRAME]);
-    }
-    if (refresh_golden_frame) {
-      copy_frame(denoiser->running_avg_y[GOLDEN_FRAME],
-                 denoiser->running_avg_y[INTRA_FRAME]);
-    }
-    if (refresh_last_frame) {
-      copy_frame(denoiser->running_avg_y[LAST_FRAME],
-                 denoiser->running_avg_y[INTRA_FRAME]);
-    }
-  }
-}
-
-void vp9_denoiser_update_frame_stats() {
-}
-
-int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
-                       int ssx, int ssy, int border) {
-  int i, fail;
-  assert(denoiser != NULL);
-
-  for (i = 0; i < MAX_REF_FRAMES; ++i) {
-    fail = vp9_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height,
-                                  ssx, ssy, border);
-    if (fail) {
-      vp9_denoiser_free(denoiser);
-      return 1;
-    }
-  }
-
-  fail = vp9_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height,
-                                ssx, ssy, border);
-  if (fail) {
-    vp9_denoiser_free(denoiser);
-    return 1;
-  }
-
-  return 0;
-}
-
-void vp9_denoiser_free(VP9_DENOISER *denoiser) {
-  int i;
-  if (denoiser == NULL) {
-    return;
-  }
-  for (i = 0; i < MAX_REF_FRAMES; ++i) {
-    if (&denoiser->running_avg_y[i] != NULL) {
-      vp9_free_frame_buffer(&denoiser->running_avg_y[i]);
-    }
-  }
-  if (&denoiser->mc_running_avg_y != NULL) {
-    vp9_free_frame_buffer(&denoiser->mc_running_avg_y);
-  }
-}
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h
@@ -1,52 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_ENCODER_DENOISER_H_
-#define VP9_ENCODER_DENOISER_H_
-
-#include "vp9/encoder/vp9_block.h"
-#include "vpx_scale/yv12config.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-enum vp9_denoiser_decision {
-  COPY_BLOCK,
-  FILTER_BLOCK
-};
-
-typedef struct vp9_denoiser {
-  YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES];
-  YV12_BUFFER_CONFIG mc_running_avg_y;
-} VP9_DENOISER;
-
-void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
-                                    YV12_BUFFER_CONFIG src,
-                                    FRAME_TYPE frame_type,
-                                    int refresh_alt_ref_frame,
-                                    int refresh_golden_frame,
-                                    int refresh_last_frame);
-
-void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
-                          int mi_row, int mi_col, BLOCK_SIZE bs);
-
-void vp9_denoiser_update_frame_stats();
-
-int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
-                       int ssx, int ssy, int border);
-
-void vp9_denoiser_free(VP9_DENOISER *denoiser);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VP9_ENCODER_DENOISER_H_
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -301,52 +301,6 @@ static INLINE void fdct32x32(int rd_transform,
    vp9_fdct32x32(src, dst, src_stride);
 }

-void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
-                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const struct macroblock_plane *const p = &x->plane[plane];
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
-  int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  uint16_t *const eob = &p->eobs[block];
-  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
-  int i, j;
-  const int16_t *src_diff;
-
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
-  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
-
-  switch (tx_size) {
-    case TX_32X32:
-      vp9_fdct32x32_1(src_diff, coeff, diff_stride);
-      vp9_quantize_dc_32x32(coeff, x->skip_block, p->round,
-                            p->quant_fp[0], qcoeff, dqcoeff,
-                            pd->dequant[0], eob);
-      break;
-    case TX_16X16:
-      vp9_fdct16x16_1(src_diff, coeff, diff_stride);
-      vp9_quantize_dc(coeff, x->skip_block, p->round,
-                     p->quant_fp[0], qcoeff, dqcoeff,
-                     pd->dequant[0], eob);
-      break;
-    case TX_8X8:
-      vp9_fdct8x8_1(src_diff, coeff, diff_stride);
-      vp9_quantize_dc(coeff, x->skip_block, p->round,
-                      p->quant_fp[0], qcoeff, dqcoeff,
-                      pd->dequant[0], eob);
-      break;
-    case TX_4X4:
-      x->fwd_txm4x4(src_diff, coeff, diff_stride);
-      vp9_quantize_dc(coeff, x->skip_block, p->round,
-                      p->quant_fp[0], qcoeff, dqcoeff,
-                      pd->dequant[0], eob);
-      break;
-    default:
-      assert(0);
-  }
-}
-
 void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
@@ -360,9 +314,6 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  int i, j;
  const int16_t *src_diff;
-#if CONFIG_EXT_TX
-  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-#endif
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  src_diff = &p->src_diff[4 * (j * diff_stride + i)];

@@ -375,45 +326,21 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
                           scan_order->iscan);
      break;
    case TX_16X16:
-#if CONFIG_EXT_TX
-      if (plane != 0 || mbmi->ext_txfrm == NORM) {
-#endif
      vp9_fdct16x16(src_diff, coeff, diff_stride);
-#if CONFIG_EXT_TX
-      } else {
-        vp9_fht16x16(src_diff, coeff, diff_stride, ADST_ADST);
-      }
-#endif
      vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
                     pd->dequant, p->zbin_extra, eob,
                     scan_order->scan, scan_order->iscan);
      break;
    case TX_8X8:
-#if CONFIG_EXT_TX
-      if (plane != 0 || mbmi->ext_txfrm == NORM) {
-#endif
      vp9_fdct8x8(src_diff, coeff, diff_stride);
-#if CONFIG_EXT_TX
-      } else {
-        vp9_fht8x8(src_diff, coeff, diff_stride, ADST_ADST);
-      }
-#endif
      vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
                     pd->dequant, p->zbin_extra, eob,
                     scan_order->scan, scan_order->iscan);
      break;
    case TX_4X4:
-#if CONFIG_EXT_TX
-      if (plane != 0 || mbmi->ext_txfrm == NORM) {
-#endif
      x->fwd_txm4x4(src_diff, coeff, diff_stride);
-#if CONFIG_EXT_TX
-      } else {
-        vp9_fht4x4(src_diff, coeff, diff_stride, ADST_ADST);
-      }
-#endif
      vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
                     pd->dequant, p->zbin_extra, eob,
@@ -436,9 +363,6 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
  int i, j;
  uint8_t *dst;
  ENTROPY_CONTEXT *a, *l;
-#if CONFIG_EXT_TX
-  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-#endif
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
  a = &ctx->ta[plane][i];
@@ -452,19 +376,8 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
    return;
  }

-  if (x->skip_txfm == 0) {
-    // full forward transform and quantization
-    if (!x->skip_recode)
-      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
-  } else if (x->skip_txfm == 2) {
-    // fast path forward transform and quantization
-    vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
-  } else {
-    // skip forward transform
-    p->eobs[block] = 0;
-    *a = *l = 0;
-    return;
-  }
+  if (!x->skip_recode)
+    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);

  if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
    const int ctx = combine_entropy_contexts(*a, *l);
@@ -484,43 +397,16 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
      break;
    case TX_16X16:
-#if CONFIG_EXT_TX
-      if (plane != 0 || mbmi->ext_txfrm == NORM) {
-#endif
      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
-#if CONFIG_EXT_TX
-      } else {
-        vp9_iht16x16_add(ADST_ADST, dqcoeff, dst, pd->dst.stride,
-                         p->eobs[block]);
-      }
-#endif
      break;
    case TX_8X8:
-#if CONFIG_EXT_TX
-      if (plane != 0 || mbmi->ext_txfrm == NORM) {
-#endif
      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
-#if CONFIG_EXT_TX
-      } else {
-        vp9_iht8x8_add(ADST_ADST, dqcoeff, dst, pd->dst.stride,
-                       p->eobs[block]);
-      }
-#endif
      break;
    case TX_4X4:
-#if CONFIG_EXT_TX
-      if (plane != 0 || mbmi->ext_txfrm == NORM) {
-#endif
      // this is like vp9_short_idct4x4 but has a special case around eob<=1
      // which is significant (not just an optimization) for the lossless
      // case.
      x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
-#if CONFIG_EXT_TX
-      } else {
-        vp9_iht4x4_add(ADST_ADST, dqcoeff, dst, pd->dst.stride,
-                       p->eobs[block]);
-      }
-#endif
      break;
    default:
      assert(0 && "Invalid transform size");
@@ -536,10 +422,6 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  int i, j;
  uint8_t *dst;
-#if CONFIG_EXT_TX
-  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  mbmi->ext_txfrm = NORM;
-#endif
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];

@@ -578,26 +460,6 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
  }
 }

-#if CONFIG_SUPERTX
-void vp9_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  struct optimize_ctx ctx;
-  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  struct encode_b_args arg = {x, &ctx, &mbmi->skip};
-  int plane;
-
-  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    BLOCK_SIZE plane_size = bsize - 3 * (plane > 0);
-    const struct macroblockd_plane* const pd = &xd->plane[plane];
-    const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size;
-    vp9_subtract_plane(x, bsize, plane);
-    vp9_get_entropy_contexts(bsize, tx_size, pd,
-                             ctx.ta[plane], ctx.tl[plane]);
-    encode_block(plane, 0, plane_size, bsize_to_tx_size(plane_size), &arg);
-  }
-}
-#endif
-
 static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                               TX_SIZE tx_size, void *arg) {
  struct encode_b_args* const args = arg;
@@ -612,9 +474,6 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
  const scan_order *scan_order;
  TX_TYPE tx_type;
  PREDICTION_MODE mode;
-#if CONFIG_FILTERINTRA
-  int fbit = 0;
-#endif
  const int bwl = b_width_log2(plane_bsize);
  const int diff_stride = 4 * (1 << bwl);
  uint8_t *src, *dst;
@@ -628,20 +487,11 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
  src = &p->src.buf[4 * (j * src_stride + i)];
  src_diff = &p->src_diff[4 * (j * diff_stride + i)];

-#if CONFIG_FILTERINTRA
-      if (mbmi->sb_type < BLOCK_8X8 && plane == 0)
-        fbit = xd->mi[0]->b_filter_info[block];
-      else
-        fbit = plane == 0 ? mbmi->filterbit : mbmi->uv_filterbit;
-#endif
  switch (tx_size) {
    case TX_32X32:
      scan_order = &vp9_default_scan_orders[TX_32X32];
      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
      vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
-#if CONFIG_FILTERINTRA
-                              fbit,
-#endif
                              x->skip_encode ? src : dst,
                              x->skip_encode ? src_stride : dst_stride,
                              dst, dst_stride, i, j, plane);
@@ -662,9 +512,6 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
      scan_order = &vp9_scan_orders[TX_16X16][tx_type];
      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
      vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
-#if CONFIG_FILTERINTRA
-                              fbit,
-#endif
                              x->skip_encode ? src : dst,
                              x->skip_encode ? src_stride : dst_stride,
                              dst, dst_stride, i, j, plane);
@@ -685,9 +532,6 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
      scan_order = &vp9_scan_orders[TX_8X8][tx_type];
      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
      vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
-#if CONFIG_FILTERINTRA
-                              fbit,
-#endif
                              x->skip_encode ? src : dst,
                              x->skip_encode ? src_stride : dst_stride,
                              dst, dst_stride, i, j, plane);
@@ -708,9 +552,6 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
      scan_order = &vp9_scan_orders[TX_4X4][tx_type];
      mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
      vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
-#if CONFIG_FILTERINTRA
-                              fbit,
-#endif
                              x->skip_encode ? src : dst,
                              x->skip_encode ? src_stride : dst_stride,
                              dst, dst_stride, i, j, plane);
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -21,12 +21,8 @@ extern "C" {
 #endif

 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
-#if CONFIG_SUPERTX
-void vp9_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize);
-#endif
 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
-void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
-                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+
 void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size);

--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -216,7 +216,7 @@ void vp9_encode_mv(VP9_COMP* cpi, vp9_writer* w,

  // If auto_mv_step_size is enabled then keep track of the largest
  // motion vector component used.
-  if (!cpi->dummy_packing && cpi->sf.mv.auto_mv_step_size) {
+  if (!cpi->dummy_packing && cpi->sf.auto_mv_step_size) {
    unsigned int maxv = MAX(abs(mv->row), abs(mv->col)) >> 3;
    cpi->max_mv_magnitude = MAX(maxv, cpi->max_mv_magnitude);
  }
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -32,14 +32,10 @@
 #include "vp9/encoder/vp9_mcomp.h"
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_ratectrl.h"
-#include "vp9/encoder/vp9_rdopt.h"
 #include "vp9/encoder/vp9_speed_features.h"
 #include "vp9/encoder/vp9_svc_layercontext.h"
 #include "vp9/encoder/vp9_tokenize.h"
 #include "vp9/encoder/vp9_variance.h"
-#if CONFIG_DENOISING
-#include "vp9/encoder/vp9_denoiser.h"
-#endif

 #ifdef __cplusplus
 extern "C" {
@@ -47,6 +43,9 @@ extern "C" {

 #define DEFAULT_GF_INTERVAL         10

+#define MAX_MODES 30
+#define MAX_REFS  6
+
 typedef struct {
  int nmvjointcost[MV_JOINTS];
  int nmvcosts[2][MV_VALS];
@@ -64,6 +63,57 @@ typedef struct {
  FRAME_CONTEXT fc;
 } CODING_CONTEXT;

+// This enumerator type needs to be kept aligned with the mode order in
+// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
+typedef enum {
+  THR_NEARESTMV,
+  THR_NEARESTA,
+  THR_NEARESTG,
+
+  THR_DC,
+
+  THR_NEWMV,
+  THR_NEWA,
+  THR_NEWG,
+
+  THR_NEARMV,
+  THR_NEARA,
+  THR_COMP_NEARESTLA,
+  THR_COMP_NEARESTGA,
+
+  THR_TM,
+
+  THR_COMP_NEARLA,
+  THR_COMP_NEWLA,
+  THR_NEARG,
+  THR_COMP_NEARGA,
+  THR_COMP_NEWGA,
+
+  THR_ZEROMV,
+  THR_ZEROG,
+  THR_ZEROA,
+  THR_COMP_ZEROLA,
+  THR_COMP_ZEROGA,
+
+  THR_H_PRED,
+  THR_V_PRED,
+  THR_D135_PRED,
+  THR_D207_PRED,
+  THR_D153_PRED,
+  THR_D63_PRED,
+  THR_D117_PRED,
+  THR_D45_PRED,
+} THR_MODES;
+
+typedef enum {
+  THR_LAST,
+  THR_GOLD,
+  THR_ALTR,
+  THR_COMP_LA,
+  THR_COMP_GA,
+  THR_INTRA,
+} THR_MODES_SUB8X8;
+
 typedef enum {
  // encode_breakout is disabled.
  ENCODE_BREAKOUT_DISABLED = 0,
@@ -80,6 +130,13 @@ typedef enum {
  ONETWO      = 3
 } VPX_SCALING;

+typedef enum {
+  RC_MODE_VBR = 0,
+  RC_MODE_CBR = 1,
+  RC_MODE_CONSTRAINED_QUALITY = 2,
+  RC_MODE_CONSTANT_QUALITY    = 3,
+} RC_MODE;
+
 typedef enum {
  // Good Quality Fast Encoding. The encoder balances quality with the
  // amount of time it takes to encode the output. (speed setting
@@ -152,8 +209,7 @@ typedef struct VP9EncoderConfig {
  // ----------------------------------------------------------------
  // DATARATE CONTROL OPTIONS

-  // vbr, cbr, constrained quality or constant quality
-  enum vpx_rc_mode rc_mode;
+  RC_MODE rc_mode;  // vbr, cbr, constrained quality or constant quality

  // buffer targeting aggressiveness
  int under_shoot_pct;
@@ -182,6 +238,8 @@ typedef struct VP9EncoderConfig {
  // Enable feature to reduce the frame quantization every x frames.
  int frame_periodic_boost;

+  int kf_extern_coding;
+
  // two pass datarate control
  int two_pass_vbrbias;        // two pass datarate control tweaks
  int two_pass_vbrmin_section;
@@ -228,10 +286,6 @@ typedef struct VP9EncoderConfig {
  vp8e_tuning tuning;
 } VP9EncoderConfig;

-static INLINE int is_altref_enabled(const VP9EncoderConfig *cfg) {
-  return cfg->mode != REALTIME && cfg->play_alternate && cfg->lag_in_frames > 0;
-}
-
 static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
  return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
 }
@@ -240,6 +294,32 @@ static INLINE int is_best_mode(MODE mode) {
  return mode == ONE_PASS_BEST || mode == TWO_PASS_SECOND_BEST;
 }

+typedef struct RD_OPT {
+  // Thresh_mult is used to set a threshold for the rd score. A higher value
+  // means that we will accept the best mode so far more often. This number
+  // is used in combination with the current block size, and thresh_freq_fact
+  // to pick a threshold.
+  int thresh_mult[MAX_MODES];
+  int thresh_mult_sub8x8[MAX_REFS];
+
+  int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
+  int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
+
+  int64_t comp_pred_diff[REFERENCE_MODES];
+  int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
+  int64_t tx_select_diff[TX_MODES];
+  // FIXME(rbultje) can this overflow?
+  int tx_select_threshes[MAX_REF_FRAMES][TX_MODES];
+
+  int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
+  int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+  int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
+  int64_t mask_filter;
+
+  int RDMULT;
+  int RDDIV;
+} RD_OPT;
+
 typedef struct VP9_COMP {
  QUANTS quants;
  MACROBLOCK mb;
@@ -247,7 +327,11 @@ typedef struct VP9_COMP {
  VP9EncoderConfig oxcf;
  struct lookahead_ctx    *lookahead;
  struct lookahead_entry  *source;
+#if CONFIG_MULTIPLE_ARF
+  struct lookahead_entry  *alt_ref_source[REF_FRAMES];
+#else
  struct lookahead_entry  *alt_ref_source;
+#endif
  struct lookahead_entry  *last_source;

  YV12_BUFFER_CONFIG *Source;
@@ -266,6 +350,9 @@ typedef struct VP9_COMP {
  int gld_fb_idx;
  int alt_fb_idx;

+#if CONFIG_MULTIPLE_ARF
+  int alt_ref_fb_idx[REF_FRAMES - 3];
+#endif
  int refresh_last_frame;
  int refresh_golden_frame;
  int refresh_alt_ref_frame;
@@ -283,6 +370,13 @@ typedef struct VP9_COMP {
  TOKENEXTRA *tok;
  unsigned int tok_count[4][1 << 6];

+#if CONFIG_MULTIPLE_ARF
+  // Position within a frame coding order (including any additional ARF frames).
+  unsigned int sequence_number;
+  // Next frame in naturally occurring order that has not yet been coded.
+  int next_frame_in_order;
+#endif
+
  // Ambient reconstruction err target for force key frames
  int ambient_err;

@@ -332,6 +426,9 @@ typedef struct VP9_COMP {

  unsigned char *complexity_map;

+  unsigned char *active_map;
+  unsigned int active_map_enabled;
+
  CYCLIC_REFRESH *cyclic_refresh;

  fractional_mv_step_fp *find_fractional_mv_step;
@@ -409,31 +506,23 @@ typedef struct VP9_COMP {
  int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
  int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
  int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
-#if CONFIG_COPY_CODING
-  int copy_mode_cost_l2[COPY_MODE_CONTEXTS][2];
-  int copy_mode_cost[COPY_MODE_CONTEXTS][COPY_MODE_COUNT - 1];
-#endif

  PICK_MODE_CONTEXT *leaf_tree;
  PC_TREE *pc_tree;
  PC_TREE *pc_root;
  int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];

-  int multi_arf_allowed;
+#if CONFIG_MULTIPLE_ARF
+  // ARF tracking variables.
  int multi_arf_enabled;
-
-#if CONFIG_DENOISING
-  VP9_DENOISER denoiser;
-#endif
-
-#if CONFIG_MASKED_INTERINTER
-  unsigned int masked_interinter_select_counts[2];
-#endif
-#if CONFIG_INTERINTRA
-  unsigned int interintra_select_count[2];
-#if CONFIG_MASKED_INTERINTRA
-  unsigned int masked_interintra_select_count[2];
-#endif
+  unsigned int frame_coding_order_period;
+  unsigned int new_frame_coding_order_period;
+  int frame_coding_order[MAX_LAG_BUFFERS * 2];
+  int arf_buffer_idx[MAX_LAG_BUFFERS * 3 / 2];
+  int arf_weight[MAX_LAG_BUFFERS];
+  int arf_buffered;
+  int this_frame_weight;
+  int max_arf_level;
 #endif
 } VP9_COMP;

@@ -530,14 +619,10 @@ void vp9_update_reference_frames(VP9_COMP *cpi);

 int64_t vp9_rescale(int64_t val, int64_t num, int denom);

-void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv);
-
 YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
                                          YV12_BUFFER_CONFIG *unscaled,
                                          YV12_BUFFER_CONFIG *scaled);

-void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
-
 static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
                                MV_REFERENCE_FRAME ref0,
                                MV_REFERENCE_FRAME ref1) {
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -33,6 +33,7 @@
 #include "vp9/encoder/vp9_firstpass.h"
 #include "vp9/encoder/vp9_mcomp.h"
 #include "vp9/encoder/vp9_quantize.h"
+#include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rdopt.h"
 #include "vp9/encoder/vp9_variance.h"

@@ -55,7 +56,14 @@
 #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001)

 #define MIN_KF_BOOST        300
-#define MIN_GF_INTERVAL     4
+
+#if CONFIG_MULTIPLE_ARF
+// Set MIN_GF_INTERVAL to 1 for the full decomposition.
+#define MIN_GF_INTERVAL             2
+#else
+#define MIN_GF_INTERVAL             4
+#endif
+
 #define LONG_TERM_VBR_CORRECTION

 static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
@@ -129,13 +137,14 @@ static void output_stats(FIRSTPASS_STATS *stats,
    FILE *fpfile;
    fpfile = fopen("firstpass.stt", "a");

-    fprintf(fpfile, "%12.0f %12.0f %12.0f %12.0f %12.4f %12.4f"
+    fprintf(fpfile, "%12.0f %12.0f %12.0f %12.0f %12.0f %12.4f %12.4f"
            "%12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f"
            "%12.0f %12.0f %12.4f %12.0f %12.0f %12.4f\n",
            stats->frame,
            stats->intra_error,
            stats->coded_error,
            stats->sr_coded_error,
+            stats->ssim_weighted_pred_err,
            stats->pcnt_inter,
            stats->pcnt_motion,
            stats->pcnt_second_ref,
@@ -489,8 +498,6 @@ void vp9_first_pass(VP9_COMP *cpi) {
                                        &cpi->scaled_source);
  }

-  vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
-
  vp9_setup_src_planes(x, cpi->Source, 0, 0);
  vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL);
  vp9_setup_dst_planes(xd->plane, new_yv12, 0, 0);
@@ -498,6 +505,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
  xd->mi = cm->mi_grid_visible;
  xd->mi[0] = cm->mi;

+  vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
+
  vp9_frame_init_quantizer(cpi);

  for (i = 0; i < MAX_MB_PLANE; ++i) {
@@ -586,9 +595,8 @@ void vp9_first_pass(VP9_COMP *cpi) {

      // Other than for the first frame do a motion search.
      if (cm->current_video_frame > 0) {
-        int tmp_err, motion_error, raw_motion_error;
+        int tmp_err, motion_error;
        int_mv mv, tmp_mv;
-        struct buf_2d unscaled_last_source_buf_2d;

        xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
        motion_error = get_prediction_error(bsize, &x->plane[0].src,
@@ -596,83 +604,67 @@ void vp9_first_pass(VP9_COMP *cpi) {
        // Assume 0,0 motion with no mv overhead.
        mv.as_int = tmp_mv.as_int = 0;

-        // Compute the motion error of the 0,0 motion using the last source
-        // frame as the reference. Skip the further motion search on
-        // reconstructed frame if this error is small.
-        unscaled_last_source_buf_2d.buf =
-            cpi->unscaled_last_source->y_buffer + recon_yoffset;
-        unscaled_last_source_buf_2d.stride =
-            cpi->unscaled_last_source->y_stride;
-        raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
-                                                &unscaled_last_source_buf_2d);
+        // Test last reference frame using the previous best mv as the
+        // starting point (best reference) for the search.
+        first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
+                                 &motion_error);
+        if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+          vp9_clear_system_state();
+          motion_error = (int)(motion_error * error_weight);
+        }

-        // TODO(pengchong): Replace the hard-coded threshold
-        if (raw_motion_error > 25 ||
-            (cpi->use_svc && cpi->svc.number_temporal_layers == 1)) {
-          // Test last reference frame using the previous best mv as the
-          // starting point (best reference) for the search.
-          first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
-                                   &motion_error);
+        // If the current best reference mv is not centered on 0,0 then do a 0,0
+        // based search as well.
+        if (best_ref_mv.as_int) {
+          tmp_err = INT_MAX;
+          first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
+                                   &tmp_err);
          if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
            vp9_clear_system_state();
-            motion_error = (int)(motion_error * error_weight);
+            tmp_err = (int)(tmp_err * error_weight);
          }

-          // If the current best reference mv is not centered on 0,0 then do a
-          // 0,0 based search as well.
-          if (best_ref_mv.as_int) {
-            tmp_err = INT_MAX;
-            first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv, &tmp_err);
-            if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
-              vp9_clear_system_state();
-              tmp_err = (int)(tmp_err * error_weight);
-            }
+          if (tmp_err < motion_error) {
+            motion_error = tmp_err;
+            mv.as_int = tmp_mv.as_int;
+          }
+        }

-            if (tmp_err < motion_error) {
-              motion_error = tmp_err;
-              mv.as_int = tmp_mv.as_int;
-            }
+        // Search in an older reference frame.
+        if (cm->current_video_frame > 1 && gld_yv12 != NULL) {
+          // Assume 0,0 motion with no mv overhead.
+          int gf_motion_error;
+
+          xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
+          gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+                                                 &xd->plane[0].pre[0]);
+
+          first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
+                                   &gf_motion_error);
+          if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+            vp9_clear_system_state();
+            gf_motion_error = (int)(gf_motion_error * error_weight);
          }

-          // Search in an older reference frame.
-          if (cm->current_video_frame > 1 && gld_yv12 != NULL) {
-            // Assume 0,0 motion with no mv overhead.
-            int gf_motion_error;
+          if (gf_motion_error < motion_error && gf_motion_error < this_error)
+            ++second_ref_count;

-            xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
-            gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
-                                                   &xd->plane[0].pre[0]);
+          // Reset to last frame as reference buffer.
+          xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
+          xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
+          xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;

-            first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
-                                     &gf_motion_error);
-            if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
-              vp9_clear_system_state();
-              gf_motion_error = (int)(gf_motion_error * error_weight);
-            }
-
-            if (gf_motion_error < motion_error && gf_motion_error < this_error)
-              ++second_ref_count;
-
-            // Reset to last frame as reference buffer.
-            xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
-            xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
-            xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
-
-            // In accumulating a score for the older reference frame take the
-            // best of the motion predicted score and the intra coded error
-            // (just as will be done for) accumulation of "coded_error" for
-            // the last frame.
-            if (gf_motion_error < this_error)
-              sr_coded_error += gf_motion_error;
-            else
-              sr_coded_error += this_error;
-          } else {
-            sr_coded_error += motion_error;
-          }
+          // In accumulating a score for the older reference frame take the
+          // best of the motion predicted score and the intra coded error
+          // (just as will be done for) accumulation of "coded_error" for
+          // the last frame.
+          if (gf_motion_error < this_error)
+            sr_coded_error += gf_motion_error;
+          else
+            sr_coded_error += this_error;
        } else {
          sr_coded_error += motion_error;
        }
-
        // Start by assuming that intra mode is best.
        best_ref_mv.as_int = 0;

@@ -913,7 +905,7 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi,
    }

    // Restriction on active max q for constrained quality mode.
-    if (cpi->oxcf.rc_mode == VPX_CQ)
+    if (cpi->oxcf.rc_mode == RC_MODE_CONSTRAINED_QUALITY)
      q = MAX(q, oxcf->cq_level);
    return q;
  }
@@ -1074,30 +1066,38 @@ static int detect_flash(const TWO_PASS *twopass, int offset) {
 }

 // Update the motion related elements to the GF arf boost calculation.
-static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats,
-                                          double *mv_in_out,
-                                          double *mv_in_out_accumulator,
-                                          double *abs_mv_in_out_accumulator,
-                                          double *mv_ratio_accumulator) {
-  const double pct = stats->pcnt_motion;
+static void accumulate_frame_motion_stats(
+  FIRSTPASS_STATS *this_frame,
+  double *this_frame_mv_in_out,
+  double *mv_in_out_accumulator,
+  double *abs_mv_in_out_accumulator,
+  double *mv_ratio_accumulator) {
+  double motion_pct;
+
+  // Accumulate motion stats.
+  motion_pct = this_frame->pcnt_motion;

  // Accumulate Motion In/Out of frame stats.
-  *mv_in_out = stats->mv_in_out_count * pct;
-  *mv_in_out_accumulator += *mv_in_out;
-  *abs_mv_in_out_accumulator += fabs(*mv_in_out);
+  *this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct;
+  *mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct;
+  *abs_mv_in_out_accumulator += fabs(this_frame->mv_in_out_count * motion_pct);

-  // Accumulate a measure of how uniform (or conversely how random) the motion
-  // field is (a ratio of abs(mv) / mv).
-  if (pct > 0.05) {
-    const double mvr_ratio = fabs(stats->mvr_abs) /
-                                 DOUBLE_DIVIDE_CHECK(fabs(stats->MVr));
-    const double mvc_ratio = fabs(stats->mvc_abs) /
-                                 DOUBLE_DIVIDE_CHECK(fabs(stats->MVc));
+  // Accumulate a measure of how uniform (or conversely how random)
+  // the motion field is (a ratio of absmv / mv).
+  if (motion_pct > 0.05) {
+    const double this_frame_mvr_ratio = fabs(this_frame->mvr_abs) /
+                           DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVr));

-    *mv_ratio_accumulator += pct * (mvr_ratio < stats->mvr_abs ?
-                                       mvr_ratio : stats->mvr_abs);
-    *mv_ratio_accumulator += pct * (mvc_ratio < stats->mvc_abs ?
-                                       mvc_ratio : stats->mvc_abs);
+    const double this_frame_mvc_ratio = fabs(this_frame->mvc_abs) /
+                           DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVc));
+
+    *mv_ratio_accumulator += (this_frame_mvr_ratio < this_frame->mvr_abs)
+      ? (this_frame_mvr_ratio * motion_pct)
+      : this_frame->mvr_abs * motion_pct;
+
+    *mv_ratio_accumulator += (this_frame_mvc_ratio < this_frame->mvc_abs)
+      ? (this_frame_mvc_ratio * motion_pct)
+      : this_frame->mvc_abs * motion_pct;
  }
 }

@@ -1214,6 +1214,144 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
  return arf_boost;
 }

+#if CONFIG_MULTIPLE_ARF
+// Work out the frame coding order for a GF or an ARF group.
+// The current implementation codes frames in their natural order for a
+// GF group, and inserts additional ARFs into an ARF group using a
+// binary split approach.
+// NOTE: this function is currently implemented recursively.
+static void schedule_frames(VP9_COMP *cpi, const int start, const int end,
+                            const int arf_idx, const int gf_or_arf_group,
+                            const int level) {
+  int i, abs_end, half_range;
+  int *cfo = cpi->frame_coding_order;
+  int idx = cpi->new_frame_coding_order_period;
+
+  // If (end < 0) an ARF should be coded at position (-end).
+  assert(start >= 0);
+
+  // printf("start:%d end:%d\n", start, end);
+
+  // GF Group: code frames in logical order.
+  if (gf_or_arf_group == 0) {
+    assert(end >= start);
+    for (i = start; i <= end; ++i) {
+      cfo[idx] = i;
+      cpi->arf_buffer_idx[idx] = arf_idx;
+      cpi->arf_weight[idx] = -1;
+      ++idx;
+    }
+    cpi->new_frame_coding_order_period = idx;
+    return;
+  }
+
+  // ARF Group: Work out the ARF schedule and mark ARF frames as negative.
+  if (end < 0) {
+    // printf("start:%d end:%d\n", -end, -end);
+    // ARF frame is at the end of the range.
+    cfo[idx] = end;
+    // What ARF buffer does this ARF use as predictor.
+    cpi->arf_buffer_idx[idx] = (arf_idx > 2) ? (arf_idx - 1) : 2;
+    cpi->arf_weight[idx] = level;
+    ++idx;
+    abs_end = -end;
+  } else {
+    abs_end = end;
+  }
+
+  half_range = (abs_end - start) >> 1;
+
+  // ARFs may not be adjacent, they must be separated by at least
+  // MIN_GF_INTERVAL non-ARF frames.
+  if ((start + MIN_GF_INTERVAL) >= (abs_end - MIN_GF_INTERVAL)) {
+    // printf("start:%d end:%d\n", start, abs_end);
+    // Update the coding order and active ARF.
+    for (i = start; i <= abs_end; ++i) {
+      cfo[idx] = i;
+      cpi->arf_buffer_idx[idx] = arf_idx;
+      cpi->arf_weight[idx] = -1;
+      ++idx;
+    }
+    cpi->new_frame_coding_order_period = idx;
+  } else {
+    // Place a new ARF at the mid-point of the range.
+    cpi->new_frame_coding_order_period = idx;
+    schedule_frames(cpi, start, -(start + half_range), arf_idx + 1,
+                    gf_or_arf_group, level + 1);
+    schedule_frames(cpi, start + half_range + 1, abs_end, arf_idx,
+                    gf_or_arf_group, level + 1);
+  }
+}
+
+#define FIXED_ARF_GROUP_SIZE 16
+
+void define_fixed_arf_period(VP9_COMP *cpi) {
+  int i;
+  int max_level = INT_MIN;
+
+  assert(cpi->multi_arf_enabled);
+  assert(cpi->oxcf.lag_in_frames >= FIXED_ARF_GROUP_SIZE);
+
+  // Save the weight of the last frame in the sequence before next
+  // sequence pattern overwrites it.
+  cpi->this_frame_weight = cpi->arf_weight[cpi->sequence_number];
+  assert(cpi->this_frame_weight >= 0);
+
+  cpi->twopass.gf_zeromotion_pct = 0;
+
+  // Initialize frame coding order variables.
+  cpi->new_frame_coding_order_period = 0;
+  cpi->next_frame_in_order = 0;
+  cpi->arf_buffered = 0;
+  vp9_zero(cpi->frame_coding_order);
+  vp9_zero(cpi->arf_buffer_idx);
+  vpx_memset(cpi->arf_weight, -1, sizeof(cpi->arf_weight));
+
+  if (cpi->rc.frames_to_key <= (FIXED_ARF_GROUP_SIZE + 8)) {
+    // Setup a GF group close to the keyframe.
+    cpi->rc.source_alt_ref_pending = 0;
+    cpi->rc.baseline_gf_interval = cpi->rc.frames_to_key;
+    schedule_frames(cpi, 0, (cpi->rc.baseline_gf_interval - 1), 2, 0, 0);
+  } else {
+    // Setup a fixed period ARF group.
+    cpi->rc.source_alt_ref_pending = 1;
+    cpi->rc.baseline_gf_interval = FIXED_ARF_GROUP_SIZE;
+    schedule_frames(cpi, 0, -(cpi->rc.baseline_gf_interval - 1), 2, 1, 0);
+  }
+
+  // Replace level indicator of -1 with correct level.
+  for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
+    if (cpi->arf_weight[i] > max_level) {
+      max_level = cpi->arf_weight[i];
+    }
+  }
+  ++max_level;
+  for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
+    if (cpi->arf_weight[i] == -1) {
+      cpi->arf_weight[i] = max_level;
+    }
+  }
+  cpi->max_arf_level = max_level;
+#if 0
+  printf("\nSchedule: ");
+  for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
+    printf("%4d ", cpi->frame_coding_order[i]);
+  }
+  printf("\n");
+  printf("ARFref:   ");
+  for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
+    printf("%4d ", cpi->arf_buffer_idx[i]);
+  }
+  printf("\n");
+  printf("Weight:   ");
+  for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
+    printf("%4d ", cpi->arf_weight[i]);
+  }
+  printf("\n");
+#endif
+}
+#endif
+
 // Calculate a section intra ratio used in setting max loop filter.
 static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin,
                                         const FIRSTPASS_STATS *end,
@@ -1283,18 +1421,6 @@ static int calculate_boost_bits(int frame_count,
  return MAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0);
 }

-// Current limit on maximum number of active arfs in a GF/ARF group.
-#define MAX_ACTIVE_ARFS 2
-#define ARF_SLOT1 2
-#define ARF_SLOT2 3
-// This function indirects the choice of buffers for arfs.
-// At the moment the values are fixed but this may change as part of
-// the integration process with other codec features that swap buffers around.
-static void get_arf_buffer_indices(unsigned char *arf_buffer_indices) {
-  arf_buffer_indices[0] = ARF_SLOT1;
-  arf_buffer_indices[1] = ARF_SLOT2;
-}
-
 static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
                                   double group_error, int gf_arf_bits) {
  RATE_CONTROL *const rc = &cpi->rc;
@@ -1302,85 +1428,42 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
  TWO_PASS *twopass = &cpi->twopass;
  FIRSTPASS_STATS frame_stats;
  int i;
-  int frame_index = 1;
+  int group_frame_index = 1;
  int target_frame_size;
  int key_frame;
  const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
  int64_t total_group_bits = gf_group_bits;
  double modified_err = 0.0;
  double err_fraction;
-  int mid_boost_bits = 0;
-  int mid_frame_idx;
-  unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS];

  key_frame = cpi->common.frame_type == KEY_FRAME ||
              vp9_is_upper_layer_key_frame(cpi);

-  get_arf_buffer_indices(arf_buffer_indices);
-
  // For key frames the frame target rate is already set and it
  // is also the golden frame.
+  // NOTE: We dont bother to check for the special case of ARF overlay
+  // frames here, as there is clamping code for this in the function
+  // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass
+  // encodes.
  if (!key_frame) {
-    if (rc->source_alt_ref_active) {
-      twopass->gf_group.update_type[0] = OVERLAY_UPDATE;
-      twopass->gf_group.rf_level[0] = INTER_NORMAL;
-      twopass->gf_group.bit_allocation[0] = 0;
-      twopass->gf_group.arf_update_idx[0] = arf_buffer_indices[0];
-      twopass->gf_group.arf_ref_idx[0] = arf_buffer_indices[0];
-    } else {
-      twopass->gf_group.update_type[0] = GF_UPDATE;
-      twopass->gf_group.rf_level[0] = GF_ARF_STD;
-      twopass->gf_group.bit_allocation[0] = gf_arf_bits;
-      twopass->gf_group.arf_update_idx[0] = arf_buffer_indices[0];
-      twopass->gf_group.arf_ref_idx[0] = arf_buffer_indices[0];
-    }
+    twopass->gf_group_bit_allocation[0] = gf_arf_bits;

    // Step over the golden frame / overlay frame
    if (EOF == input_stats(twopass, &frame_stats))
      return;
  }

-  // Deduct the boost bits for arf (or gf if it is not a key frame)
-  // from the group total.
+  // Store the bits to spend on the ARF if there is one.
+  if (rc->source_alt_ref_pending) {
+    twopass->gf_group_bit_allocation[group_frame_index++] = gf_arf_bits;
+  }
+
+  // Deduct the boost bits for arf or gf if it is not a key frame.
  if (rc->source_alt_ref_pending || !key_frame)
    total_group_bits -= gf_arf_bits;

-  // Store the bits to spend on the ARF if there is one.
-  if (rc->source_alt_ref_pending) {
-    if (cpi->multi_arf_enabled) {
-      // A portion of the gf / arf extra bits are set asside for lower level
-      // boosted frames in the middle of the group.
-      mid_boost_bits += gf_arf_bits >> 5;
-      gf_arf_bits -= (gf_arf_bits >> 5);
-    }
-
-    twopass->gf_group.update_type[frame_index] = ARF_UPDATE;
-    twopass->gf_group.rf_level[frame_index] = GF_ARF_STD;
-    twopass->gf_group.bit_allocation[frame_index] = gf_arf_bits;
-    twopass->gf_group.arf_src_offset[frame_index] =
-      (unsigned char)(rc->baseline_gf_interval - 1);
-    twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[0];
-    twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0];
-    ++frame_index;
-
-    if (cpi->multi_arf_enabled) {
-      // Set aside a slot for a level 1 arf.
-      twopass->gf_group.update_type[frame_index] = ARF_UPDATE;
-      twopass->gf_group.rf_level[frame_index] = GF_ARF_LOW;
-      twopass->gf_group.arf_src_offset[frame_index] =
-        (unsigned char)((rc->baseline_gf_interval >> 1) - 1);
-      twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[1];
-      twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0];
-      ++frame_index;
-    }
-  }
-
-  // Define middle frame
-  mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
-
  // Allocate bits to the other frames in the group.
  for (i = 0; i < rc->baseline_gf_interval - 1; ++i) {
-    int arf_idx = 0;
    if (EOF == input_stats(twopass, &frame_stats))
      break;

@@ -1392,48 +1475,10 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
      err_fraction = 0.0;

    target_frame_size = (int)((double)total_group_bits * err_fraction);
-
-    if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) {
-      mid_boost_bits += (target_frame_size >> 4);
-      target_frame_size -= (target_frame_size >> 4);
-
-      if (frame_index <= mid_frame_idx)
-        arf_idx = 1;
-    }
-    twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[arf_idx];
-    twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
-
    target_frame_size = clamp(target_frame_size, 0,
                              MIN(max_bits, (int)total_group_bits));

-    twopass->gf_group.update_type[frame_index] = LF_UPDATE;
-    twopass->gf_group.rf_level[frame_index] = INTER_NORMAL;
-
-    twopass->gf_group.bit_allocation[frame_index] = target_frame_size;
-    ++frame_index;
-  }
-
-  // Note:
-  // We need to configure the frame at the end of the sequence + 1 that will be
-  // the start frame for the next group. Otherwise prior to the call to
-  // vp9_rc_get_second_pass_params() the data will be undefined.
-  twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[0];
-  twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0];
-
-  if (rc->source_alt_ref_pending) {
-    twopass->gf_group.update_type[frame_index] = OVERLAY_UPDATE;
-    twopass->gf_group.rf_level[frame_index] = INTER_NORMAL;
-
-    // Final setup for second arf and its overlay.
-    if (cpi->multi_arf_enabled) {
-      twopass->gf_group.bit_allocation[2] =
-        twopass->gf_group.bit_allocation[mid_frame_idx] + mid_boost_bits;
-      twopass->gf_group.update_type[mid_frame_idx] = OVERLAY_UPDATE;
-      twopass->gf_group.bit_allocation[mid_frame_idx] = 0;
-    }
-  } else {
-    twopass->gf_group.update_type[frame_index] = GF_UPDATE;
-    twopass->gf_group.rf_level[frame_index] = GF_ARF_STD;
+    twopass->gf_group_bit_allocation[group_frame_index++] = target_frame_size;
  }
 }

@@ -1463,7 +1508,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
  double mv_in_out_accumulator = 0.0;
  double abs_mv_in_out_accumulator = 0.0;
  double mv_ratio_accumulator_thresh;
-  unsigned int allow_alt_ref = is_altref_enabled(oxcf);
+  unsigned int allow_alt_ref = oxcf->play_alternate && oxcf->lag_in_frames;

  int f_boost = 0;
  int b_boost = 0;
@@ -1476,7 +1521,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
  // Reset the GF group data structures unless this is a key
  // frame in which case it will already have been done.
  if (cpi->common.frame_type != KEY_FRAME) {
-    vp9_zero(twopass->gf_group);
+    twopass->gf_group_index = 0;
+    vp9_zero(twopass->gf_group_bit_allocation);
  }

  vp9_clear_system_state();
@@ -1598,14 +1644,24 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
    }
  }

+#if CONFIG_MULTIPLE_ARF
+  if (cpi->multi_arf_enabled) {
+    // Initialize frame coding order variables.
+    cpi->new_frame_coding_order_period = 0;
+    cpi->next_frame_in_order = 0;
+    cpi->arf_buffered = 0;
+    vp9_zero(cpi->frame_coding_order);
+    vp9_zero(cpi->arf_buffer_idx);
+    vpx_memset(cpi->arf_weight, -1, sizeof(cpi->arf_weight));
+  }
+#endif
+
  // Set the interval until the next gf.
  if (cpi->common.frame_type == KEY_FRAME || rc->source_alt_ref_active)
    rc->baseline_gf_interval = i - 1;
  else
    rc->baseline_gf_interval = i;

-  rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-
  // Should we use the alternate reference frame.
  if (allow_alt_ref &&
      (i < cpi->oxcf.lag_in_frames) &&
@@ -1618,11 +1674,62 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
                                   &b_boost);
    rc->source_alt_ref_pending = 1;

+#if CONFIG_MULTIPLE_ARF
+    // Set the ARF schedule.
+    if (cpi->multi_arf_enabled) {
+      schedule_frames(cpi, 0, -(rc->baseline_gf_interval - 1), 2, 1, 0);
+    }
+#endif
  } else {
    rc->gfu_boost = (int)boost_score;
    rc->source_alt_ref_pending = 0;
+#if CONFIG_MULTIPLE_ARF
+    // Set the GF schedule.
+    if (cpi->multi_arf_enabled) {
+      schedule_frames(cpi, 0, rc->baseline_gf_interval - 1, 2, 0, 0);
+      assert(cpi->new_frame_coding_order_period ==
+             rc->baseline_gf_interval);
+    }
+#endif
  }

+#if CONFIG_MULTIPLE_ARF
+  if (cpi->multi_arf_enabled && (cpi->common.frame_type != KEY_FRAME)) {
+    int max_level = INT_MIN;
+    // Replace level indicator of -1 with correct level.
+    for (i = 0; i < cpi->frame_coding_order_period; ++i) {
+      if (cpi->arf_weight[i] > max_level) {
+        max_level = cpi->arf_weight[i];
+      }
+    }
+    ++max_level;
+    for (i = 0; i < cpi->frame_coding_order_period; ++i) {
+      if (cpi->arf_weight[i] == -1) {
+        cpi->arf_weight[i] = max_level;
+      }
+    }
+    cpi->max_arf_level = max_level;
+  }
+#if 0
+  if (cpi->multi_arf_enabled) {
+    printf("\nSchedule: ");
+    for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
+      printf("%4d ", cpi->frame_coding_order[i]);
+    }
+    printf("\n");
+    printf("ARFref:   ");
+    for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
+      printf("%4d ", cpi->arf_buffer_idx[i]);
+    }
+    printf("\n");
+    printf("Weight:   ");
+    for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
+      printf("%4d ", cpi->arf_weight[i]);
+    }
+    printf("\n");
+  }
+#endif
+#endif
  // Reset the file position.
  reset_fpf_position(twopass, start_pos);

@@ -1772,7 +1879,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
  cpi->common.frame_type = KEY_FRAME;

  // Reset the GF group data structures.
-  vp9_zero(twopass->gf_group);
+  twopass->gf_group_index = 0;
+  vp9_zero(twopass->gf_group_bit_allocation);

  // Is this a forced key frame by interval.
  rc->this_key_frame_forced = rc->next_key_frame_forced;
@@ -1963,9 +2071,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
  twopass->kf_group_bits -= kf_bits;

  // Save the bits to spend on the key frame.
-  twopass->gf_group.bit_allocation[0] = kf_bits;
-  twopass->gf_group.update_type[0] = KF_UPDATE;
-  twopass->gf_group.rf_level[0] = KF_STD;
+  twopass->gf_group_bit_allocation[0] = kf_bits;

  // Note the total error score of the kf group minus the key frame itself.
  twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err);
@@ -1993,44 +2099,6 @@ void vbr_rate_correction(int * this_frame_target,
  }
 }

-// Define the reference buffers that will be updated post encode.
-void configure_buffer_updates(VP9_COMP *cpi) {
-  TWO_PASS *const twopass = &cpi->twopass;
-
-  cpi->rc.is_src_frame_alt_ref = 0;
-  switch (twopass->gf_group.update_type[twopass->gf_group.index]) {
-    case KF_UPDATE:
-      cpi->refresh_last_frame = 1;
-      cpi->refresh_golden_frame = 1;
-      cpi->refresh_alt_ref_frame = 1;
-      break;
-    case LF_UPDATE:
-      cpi->refresh_last_frame = 1;
-      cpi->refresh_golden_frame = 0;
-      cpi->refresh_alt_ref_frame = 0;
-      break;
-    case GF_UPDATE:
-      cpi->refresh_last_frame = 1;
-      cpi->refresh_golden_frame = 1;
-      cpi->refresh_alt_ref_frame = 0;
-      break;
-    case OVERLAY_UPDATE:
-      cpi->refresh_last_frame = 0;
-      cpi->refresh_golden_frame = 1;
-      cpi->refresh_alt_ref_frame = 0;
-      cpi->rc.is_src_frame_alt_ref = 1;
-      break;
-    case ARF_UPDATE:
-      cpi->refresh_last_frame = 0;
-      cpi->refresh_golden_frame = 0;
-      cpi->refresh_alt_ref_frame = 1;
-      break;
-    default:
-      assert(0);
-  }
-}
-
-
 void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
  VP9_COMMON *const cm = &cpi->common;
  RATE_CONTROL *const rc = &cpi->rc;
@@ -2055,17 +2123,19 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
  if (!twopass->stats_in)
    return;

+  // Increment the gf group index.
+  ++twopass->gf_group_index;
+
  // If this is an arf frame then we dont want to read the stats file or
  // advance the input pointer as we already have what we need.
-  if (twopass->gf_group.update_type[twopass->gf_group.index] == ARF_UPDATE) {
+  if (cpi->refresh_alt_ref_frame) {
    int target_rate;
-    configure_buffer_updates(cpi);
-    target_rate = twopass->gf_group.bit_allocation[twopass->gf_group.index];
+    target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index];
    target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
    rc->base_frame_target = target_rate;
 #ifdef LONG_TERM_VBR_CORRECTION
    // Correction to rate target based on prior over or under shoot.
-    if (cpi->oxcf.rc_mode == VPX_VBR)
+    if (cpi->oxcf.rc_mode == RC_MODE_VBR)
      vbr_rate_correction(&target_rate, rc->vbr_bits_off_target);
 #endif
    vp9_rc_set_frame_target(cpi, target_rate);
@@ -2080,7 +2150,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
    twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
  }

-  if (cpi->oxcf.rc_mode == VPX_Q) {
+  if (cpi->oxcf.rc_mode == RC_MODE_CONSTANT_QUALITY) {
    twopass->active_worst_quality = cpi->oxcf.cq_level;
  } else if (cm->current_video_frame == 0 ||
             (is_spatial_svc && lc->current_video_frame_in_layer == 0)) {
@@ -2124,7 +2194,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {

  // Define a new GF/ARF group. (Should always enter here for key frames).
  if (rc->frames_till_gf_update_due == 0) {
-    define_gf_group(cpi, &this_frame_copy);
+#if CONFIG_MULTIPLE_ARF
+    if (cpi->multi_arf_enabled) {
+      define_fixed_arf_period(cpi);
+    } else {
+#endif
+      define_gf_group(cpi, &this_frame_copy);
+#if CONFIG_MULTIPLE_ARF
+    }
+#endif

    if (twopass->gf_zeromotion_pct > 995) {
      // As long as max_thresh for encode breakout is small enough, it is ok
@@ -2148,9 +2226,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
    }
  }

-  configure_buffer_updates(cpi);
-
-  target_rate = twopass->gf_group.bit_allocation[twopass->gf_group.index];
+  target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index];
  if (cpi->common.frame_type == KEY_FRAME)
    target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate);
  else
@@ -2159,7 +2235,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
  rc->base_frame_target = target_rate;
 #ifdef LONG_TERM_VBR_CORRECTION
  // Correction to rate target based on prior over or under shoot.
-  if (cpi->oxcf.rc_mode == VPX_VBR)
+  if (cpi->oxcf.rc_mode == RC_MODE_VBR)
    vbr_rate_correction(&target_rate, rc->vbr_bits_off_target);
 #endif
  vp9_rc_set_frame_target(cpi, target_rate);
@@ -2213,7 +2289,4 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
    twopass->kf_group_bits -= bits_used;
  }
  twopass->kf_group_bits = MAX(twopass->kf_group_bits, 0);
-
-  // Increment the gf group index ready for the next frame.
-  ++twopass->gf_group.index;
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jingning Han	9d9b70a36a	Allow backward prob update in external mode info coding flow This commit enables vpxenc to properly count the coded motion vector related information for backward update. This allows the coding flow using external mode info to use backward probability update. In the short test clip, over 10% bit-rate saving is observed at no distortion change. Change-Id: Ie27e97114ab91c3d95ba7b5554d617d226db5e20	2014-07-21 12:11:31 -07:00
Jingning Han	1e168d12d9	Enable motion vector based prediction mode decision This commit enables vpxenc to compare the motion vector provided by external file to the predicted motion vectors and select the prediction mode with minimum rate cost if motion vector is matched. It doesn't change reconstruction distortion, but provide rate savings. Change-Id: Ia682b775d2bafcaabb5a113bd90a98e1931c9c5a	2014-07-17 16:35:11 -07:00
Jingning Han	6ee6e714b4	Make key frame coding mode configurable in the command line Add --kf-extc configuration. If it is 1, the key frame is coded using mode info from external file; otherwise, use vpxenc internal mode selection process to decide coding modes. It is by default 0. Change-Id: I916f811f9eaa2d0f6cc2a2035ca381a1b0ddd974	2014-07-17 11:52:21 -07:00
Jingning Han	674cc787d3	Update the effective motion vector of sub8x8 blocks This commit enables the vpxenc to update the effective motion vectors stored in the mode_info struct for sub8x8 block coding. It resolves the reference motion vector enc/dec mismatch issue. Change-Id: I93a88fed6f15fad06a41ca21e297d7281cb75c57	2014-07-16 21:37:15 -07:00
Jingning Han	c765cd1a78	Properly handle the effective motion vector in inferred mv modes This commit allows vpxenc to properly set the effective motion vector values in the mode_info struct for inferred motion vector modes. It resolves an enc/dec mismatch issue due to the mode info struct loaded from external file has conflict effective motion vector and inferred prediction mode. Change-Id: I1f47aeaf2b92fcd4dd3d4f3644b88466495be070	2014-07-16 21:35:29 -07:00
Jingning Han	9e3965ae90	Make external sub8x8 block mode info conformable coding decisions This commit converts the sub8x8 block mode info from external file into proper format that conforms the bit-stream definitions. It resolves an enc/dec mismatch issue in sub8x8 block coding used in the inter frames. Change-Id: Ie5717b19d0d06e0f525f9b7c7311abdd40f7885f	2014-07-15 22:40:33 -07:00
Jingning Han	f297504f2d	Re-work configure interface for encoding based on external mi This commit refines the configuration interface for encoding process based on external mode info. It allows the vpxenc to read the external file name from command line, and to produce warning message when necessary. Change-Id: I109d02ea9e6e418d00378d512ed9ab9bb0770dbd	2014-07-14 15:30:03 -07:00
Jingning Han	b4b897605a	Allow more coding flexibility in key frame coding This commit relaxes the encoding mode constraints on key frame coding. It improves the key frame coding performance in speed 5 and up. Change-Id: I114315c2b467174bb1f135f4ab2c1f328c8c65be	2014-07-11 09:55:38 -07:00
Jingning Han	51959786d2	Merge "Use normal encoding route for key frame coding" into sandbox/Jingning/transcode	2014-07-10 10:55:24 -07:00
Jingning Han	502baedb48	Enable motion vector precision regulation conversion This commit allows the vpxenc to check if the motion vectors read from external file comply the frame header. If the frame is using lower precision, the codec will convert the non-conformable motion vectors into corresponding level. This fixes another outstanding enc/dec mismatch issue due to the mode_info values provided by external file not complying the bit-stream definitions. Change-Id: Ie5409f5d3201e9159f6a49c7608db3541f8a190c	2014-07-09 16:58:44 -07:00
Jingning Han	2568ff0081	Enforce tx_size conversion to handle invalid mode_info values This commit forces a transform size check to handle the case where the provided transform size is larger than the block size. In such cases, it will convert the transform size to be the maximally allowed value according to the block size. Change-Id: I6ae26d5008fd60955427e2b7d5dcd3daa6eeb531	2014-07-09 10:31:41 -07:00
Jingning Han	4f2aeceabe	Use normal encoding route for key frame coding This commit makes the key frame coding to use the normal vpxenc coding route. The encoding process based on mode_info read from external file now starts from the first inter frame. Change-Id: Iee5ae2c3aa35d4b89d0cb4e890b9b0f29fe89d62	2014-07-08 12:06:31 -07:00
Jim Bankoski	06eed502bd	adjust the context we got from file Change-Id: Ifeed2fa6b8dbc735f3746548e4535d522e732990	2014-07-07 16:03:32 -07:00
Jingning Han	5e9f681dec	Merge "Force the use of selectable transform size" into sandbox/Jingning/transcode	2014-07-01 10:51:15 -07:00
Jingning Han	80bd67f09d	Merge "Disable decoder read/write access to the mode_info array" into sandbox/Jingning/transcode	2014-07-01 10:32:07 -07:00
Jingning Han	d019119777	Force the use of selectable transform size Change-Id: I87034c5933a9cfc6f82b925bcae11a2e6509c472	2014-06-30 17:17:31 -07:00
Jingning Han	6af2a29764	Disable decoder read/write access to the mode_info array The decoder read/write access to the mode_info array was for the purpose of creating a conformable coding mode decisions and hence validating the encoding process based on exteranl mode_info array. This commit makes a flag to disable all such potential access. Change-Id: I21ece4b595c1c24cdf5581a3147fe76bf33a5570	2014-06-30 14:49:15 -07:00
Jingning Han	a3d2b5213e	Merge "Enable vpxenc to process and convert external mode_info" into sandbox/Jingning/transcode	2014-06-30 11:15:53 -07:00
Jingning Han	0d075d907c	Merge "Add optional mode_info printout function for debug purpose" into sandbox/Jingning/transcode	2014-06-27 16:34:42 -07:00
Jingning Han	1bf27df775	Enable vpxenc to process and convert external mode_info This commit enables the encoder to convert the mode information read from external file into effective VP9 coding decisions. Further optimization for compression performance can be applied therein. Change-Id: Ic3abb8e223ed4b5aa54e5ed099feb450c1ad9363	2014-06-27 16:10:26 -07:00
Jingning Han	d7e8490d04	Add optional mode_info printout function for debug purpose This commit adds an optional function to print out the mode_info loaded from external file for debug purpose. It can be turned on by setting PRINT_MODE_INFO_LOAD 1. Change-Id: I8612801cbf2eb38213105afb7434da2584b3ff2c	2014-06-26 12:11:44 -07:00
Jingning Han	68556c2f1d	Merge "Silence quantization index check warnings" into sandbox/Jingning/transcode	2014-06-19 14:37:23 -07:00
Jingning Han	19c1c1f429	Merge "Make encoding process support non-switchable filter" into sandbox/Jingning/transcode	2014-06-19 14:37:05 -07:00
Jingning Han	1153454cd0	Merge "Enable encoding and bit-stream writing based on mode_info array" into sandbox/Jingning/transcode	2014-06-19 14:36:37 -07:00
Jingning Han	8f17deb617	Merge "Dump mode_info array from vp9 decoder to external file" into sandbox/Jingning/transcode	2014-06-19 14:36:20 -07:00
Jingning Han	f5bb406799	Merge "Add transcode flag in the experimental list" into sandbox/Jingning/transcode	2014-06-19 14:36:09 -07:00
Jingning Han	ff073a70ee	Silence quantization index check warnings Allow the encoder to use fixed quantization step size. Note that this effectively breaks the internal rate control scheme and can cause substantial compression performance. Change-Id: I1caacb1ab06629107f8975e5f707de16d6d5b36a	2014-06-19 09:52:09 -07:00
Jingning Han	44877260a5	Make encoding process support non-switchable filter This commit allows the encoder to handle cases where the encoder is forced to use an arbitrary prediction filter type. Change-Id: I984e554ef8b05d88d3c1714c0b621f5cf09f5dd6	2014-06-17 15:03:49 -07:00
Jingning Han	06510d1ff9	Enable encoding and bit-stream writing based on mode_info array This commit enables vpxenc to encode and write out bit-stream from coding information provided by external mode_info array file. It currently assumes single reference frame and 8-tap switchable prediction filters at frame header level. Tested using the mode_info array dumped at VP9 decoder into the external file, where the bit-stream was generated by VP9 encoder at speed -6. The coding statics remain the same. Note that the compression performance will be affected quite a lot in the two pass coding setting, where at this point the rate control scheme can not be updated properly without statistics gathered during rate distortion optimization search. Change-Id: Ide979d08d3ce6167c1f2e513c34fd8440f3e2aaf	2014-06-17 14:57:43 -07:00
Jingning Han	b95807f2bb	Dump mode_info array from vp9 decoder to external file This commit allows the vp9 decoder to dump the decoded mode_info array, per 64x64, into external file, which serves as conformable test vector for transcoding encoder. The mode_info of 8x8 block inside a 64x64 block is aligned in raster order. Change-Id: I0447d62922c674a674c0d4b31184625cf722f872	2014-06-11 15:26:42 -07:00
Jingning Han	de810ac620	Add transcode flag in the experimental list Change-Id: I756b5899d3b5101643b4e084a1647a15b427d9e9	2014-06-11 11:45:46 -07:00
Jingning Han	0b3ffed9be	Add transcode flag in the experimental list Change-Id: I756b5899d3b5101643b4e084a1647a15b427d9e9	2014-06-11 11:18:02 -07:00