Release v1.6.0 Khaki Campbell Duck

Change-Id: I08da365dd889093f9919476a02ee96ae9615f140
Fix encoder crashes for odd size input
2016-07-20 18:15:41 -07:00 · 2016-07-20 15:02:13 -07:00 · 2016-06-25 11:40:26 -07:00 · 2016-06-25 11:37:20 -07:00 · 2016-06-25 00:43:40 +00:00 · 2016-06-25 00:36:05 +00:00
1355 changed files with 216195 additions and 227172 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -1,91 +0,0 @@
---
-Language:        Cpp
-# BasedOnStyle:  Google
-# Generated with clang-format 3.8.1
-AccessModifierOffset: -1
-AlignAfterOpenBracket: Align
-AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
-AlignEscapedNewlinesLeft: true
-AlignOperands:   true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: true
-AllowShortFunctionsOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: true
-AllowShortLoopsOnASingleLine: true
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: true
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: true
-BinPackParameters: true
-BraceWrapping:
-  AfterClass:      false
-  AfterControlStatement: false
-  AfterEnum:       false
-  AfterFunction:   false
-  AfterNamespace:  false
-  AfterObjCDeclaration: false
-  AfterStruct:     false
-  AfterUnion:      false
-  BeforeCatch:     false
-  BeforeElse:      false
-  IndentBraces:    false
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Attach
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-ColumnLimit:     80
-CommentPragmas:  '^ IWYU pragma:'
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: false
-DerivePointerAlignment: false
-DisableFormat:   false
-ExperimentalAutoDetectBinPacking: false
-ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
-IncludeCategories:
-  - Regex:           '^<.*\.h>'
-    Priority:        1
-  - Regex:           '^<.*'
-    Priority:        2
-  - Regex:           '.*'
-    Priority:        3
-IndentCaseLabels: true
-IndentWidth:     2
-IndentWrappedFunctionNames: false
-KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: ''
-MacroBlockEnd:   ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
-PenaltyBreakBeforeFirstCallParameter: 1
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 200
-PointerAlignment: Right
-ReflowComments:  true
-SortIncludes:    false
-SpaceAfterCStyleCast: false
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeParens: ControlStatements
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
-SpacesInAngles:  false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard:        Auto
-TabWidth:        8
-UseTab:          Never
-...
-
--- a/.gitignore
+++ b/.gitignore
@@ -29,36 +29,37 @@
 /examples/decode_with_drops
 /examples/decode_with_partial_drops
 /examples/example_xma
-/examples/lossless_encoder
 /examples/postproc
 /examples/resize_util
 /examples/set_maps
 /examples/simple_decoder
 /examples/simple_encoder
 /examples/twopass_encoder
-/examples/aom_cx_set_ref
-/examples/av1_spatial_scalable_encoder
-/examples/aom_temporal_scalable_patterns
-/examples/aom_temporal_svc_encoder
+/examples/vp8_multi_resolution_encoder
+/examples/vp8cx_set_ref
+/examples/vp9_lossless_encoder
+/examples/vp9_spatial_scalable_encoder
+/examples/vpx_temporal_scalable_patterns
+/examples/vpx_temporal_svc_encoder
 /ivfdec
 /ivfdec.dox
 /ivfenc
 /ivfenc.dox
-/libaom.so*
-/libaom.ver
+/libvpx.so*
+/libvpx.ver
 /samples.dox
 /test_intra_pred_speed
-/test_libaom
-/aom_api1_migration.dox
-/av1_rtcd.h
-/aom.pc
-/aom_config.c
-/aom_config.h
-/aom_dsp_rtcd.h
-/aom_scale_rtcd.h
-/aom_version.h
-/aomdec
-/aomdec.dox
-/aomenc
-/aomenc.dox
+/test_libvpx
+/vp8_api1_migration.dox
+/vp[89x]_rtcd.h
+/vpx.pc
+/vpx_config.c
+/vpx_config.h
+/vpx_dsp_rtcd.h
+/vpx_scale_rtcd.h
+/vpx_version.h
+/vpxdec
+/vpxdec.dox
+/vpxenc
+/vpxenc.dox
 TAGS
--- a/.mailmap
+++ b/.mailmap
@@ -3,6 +3,7 @@ Aℓex Converse <aconverse@google.com>
 Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com>
 Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
 Alpha Lam <hclam@google.com> <hclam@chromium.org>
+Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
 Deb Mukherjee <debargha@google.com>
 Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
 Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
@@ -13,12 +14,15 @@ Jim Bankoski <jimbankoski@google.com>
 Johann Koenig <johannkoenig@google.com>
 Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
 Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com>
+Johann Koenig <johannkoenig@google.com> <johannkoenig@chromium.org>
 John Koleszar <jkoleszar@google.com>
 Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
 Marco Paniconi <marpan@google.com>
 Marco Paniconi <marpan@google.com> <marpan@chromium.org>
 Pascal Massimino <pascal.massimino@gmail.com>
 Paul Wilkins <paulwilkins@google.com>
+Peter de Rivaz <peter.derivaz@gmail.com>
+Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com>
 Ralph Giles <giles@xiph.org> <giles@entropywave.com>
 Ralph Giles <giles@xiph.org> <giles@mozilla.com>
 Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
@@ -26,7 +30,8 @@ Sami Pietilä <samipietila@google.com>
 Tamar Levy <tamar.levy@intel.com>
 Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
 Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
-Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
+Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com>
 Tom Finegan <tomfinegan@google.com>
 Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
 Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
+Yaowu Xu <yaowu@google.com> <Yaowu Xu>
--- a/18
+++ b/18
@@ -24,6 +24,7 @@ changjun.yang <changjun.yang@intel.com>
 Charles 'Buck' Krasic <ckrasic@google.com>
 chm <chm@rock-chips.com>
 Christian Duvivier <cduvivier@google.com>
+Daniele Castagna <dcastagna@chromium.org>
 Daniel Kang <ddkang@google.com>
 Deb Mukherjee <debargha@google.com>
 Dim Temp <dimtemp0@gmail.com>
@@ -56,7 +57,7 @@ James Zern <jzern@google.com>
 Jan Gerber <j@mailb.org>
 Jan Kratochvil <jan.kratochvil@redhat.com>
 Janne Salonen <jsalonen@google.com>
-Jean-Marc Valin <jmvalin@jmvalin.ca>
+Jean-Yves Avenard <jyavenard@mozilla.com>
 Jeff Faust <jfaust@google.com>
 Jeff Muizelaar <jmuizelaar@mozilla.com>
 Jeff Petkau <jpet@chromium.org>
@@ -65,7 +66,6 @@ Jian Zhou <zhoujian@google.com>
 Jim Bankoski <jimbankoski@google.com>
 Jingning Han <jingning@google.com>
 Joey Parrish <joeyparrish@google.com>
-Johann Koenig <johannkoenig@chromium.org>
 Johann Koenig <johannkoenig@google.com>
 John Koleszar <jkoleszar@google.com>
 Johnny Klonaris <google@jawknee.com>
@@ -77,6 +77,7 @@ Justin Clift <justin@salasaga.org>
 Justin Lebar <justin.lebar@gmail.com>
 KO Myung-Hun <komh@chollian.net>
 Lawrence Velázquez <larryv@macports.org>
+Linfeng Zhang <linfengz@google.com>
 Lou Quillio <louquillio@google.com>
 Luca Barbato <lu_zero@gentoo.org>
 Makoto Kato <makoto.kt@gmail.com>
@@ -92,7 +93,6 @@ Mike Hommey <mhommey@mozilla.com>
 Mikhal Shemer <mikhal@google.com>
 Minghai Shang <minghai@google.com>
 Morton Jonuschat <yabawock@gmail.com>
-Nathan E. Egge <negge@dgql.org>
 Nico Weber <thakis@chromium.org>
 Parag Salasakar <img.mips1@gmail.com>
 Pascal Massimino <pascal.massimino@gmail.com>
@@ -101,7 +101,6 @@ Paul Wilkins <paulwilkins@google.com>
 Pavol Rusnak <stick@gk2.sk>
 Paweł Hajdan <phajdan@google.com>
 Pengchong Jin <pengchong@google.com>
-Peter de Rivaz <peter.derivaz@argondesign.com>
 Peter de Rivaz <peter.derivaz@gmail.com>
 Philip Jägenstedt <philipj@opera.com>
 Priit Laes <plaes@plaes.org>
@@ -121,7 +120,6 @@ Sergey Ulanov <sergeyu@chromium.org>
 Shimon Doodkin <helpmepro1@gmail.com>
 Shunyao Li <shunyaoli@google.com>
 Stefan Holmer <holmer@google.com>
-Steinar Midtskogen <stemidts@cisco.com>
 Suman Sunkara <sunkaras@google.com>
 Taekhyun Kim <takim@nvidia.com>
 Takanori MATSUURA <t.matsuu@gmail.com>
@@ -129,16 +127,16 @@ Tamar Levy <tamar.levy@intel.com>
 Tao Bai <michaelbai@chromium.org>
 Tero Rintaluoma <teror@google.com>
 Thijs Vermeir <thijsvermeir@gmail.com>
-Thomas Daede <tdaede@mozilla.com>
-Thomas Davies <thdavies@cisco.com>
-Thomas <thdavies@cisco.com>
 Tim Kopp <tkopp@google.com>
 Timothy B. Terriberry <tterribe@xiph.org>
 Tom Finegan <tomfinegan@google.com>
-Tristan Matthews <le.businessman@gmail.com>
-Tristan Matthews <tmatth@videolan.org>
 Vignesh Venkatasubramanian <vigneshv@google.com>
 Yaowu Xu <yaowu@google.com>
+Yi Luo <luoyi@google.com>
 Yongzhe Wang <yongzhe@google.com>
 Yunqing Wang <yunqingwang@google.com>
+Yury Gitman <yuryg@google.com>
 Zoe Liu <zoeliu@google.com>
+Google Inc.
+The Mozilla Foundation
+The Xiph.Org Foundation
--- a/34
+++ b/34
@@ -1,9 +1,33 @@
-Next Release
-  - Incompatible changes:
-    The AV1 encoder's default keyframe interval changed to 128 from 9999.
+2016-07-20 v1.6.0 "Khaki Campbell Duck"
+  This release improves upon the VP9 encoder and speeds up the encoding and
+  decoding processes.
+
+  - Upgrading:
+    This release is ABI incompatible with 1.5.0 due to a new 'color_range' enum
+    in vpx_image and some minor changes to the VP8_COMP structure.
+
+    The default key frame interval for VP9 has changed from 128 to 9999.
+
+  - Enhancement:
+    A core focus has been performance for low end Intel processors. SSSE3
+    instructions such as 'pshufb' have been avoided and instructions have been
+    reordered to better accommodate the more constrained pipelines.
+
+    As a result, devices based on Celeron processors have seen substantial
+    decoding improvements. From Indian Runner Duck to Javan Whistling Duck,
+    decoding speed improved between 10 and 30%. Between Javan Whistling Duck
+    and Khaki Campbell Duck, it improved another 10 to 15%.
+
+    While Celeron benefited most, Core-i5 also improved 5% and 10% between the
+    respective releases.
+
+    Realtime performance for WebRTC for both speed and quality has received a
+    lot of attention.
+
+  - Bug Fixes:
+    A number of fuzzing issues, found variously by Mozilla, Chromium and others,
+    have been fixed and we strongly recommend updating.

-2016-04-07 v0.1.0 "AOMedia Codec 1"
-  This release is the first Alliance for Open Media codec.
 2015-11-09 v1.5.0 "Javan Whistling Duck"
  This release improves upon the VP9 encoder and speeds up the encoding and
  decoding processes.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,270 +0,0 @@
-##
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-cmake_minimum_required(VERSION 3.2)
-project(AOM C CXX)
-
-set(AOM_ROOT "${CMAKE_CURRENT_SOURCE_DIR}")
-set(AOM_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
-include("${AOM_ROOT}/build/cmake/aom_configure.cmake")
-
-set(AOM_SRCS
-    "${AOM_CONFIG_DIR}/aom_config.c"
-    "${AOM_CONFIG_DIR}/aom_config.h"
-    "${AOM_ROOT}/aom/aom.h"
-    "${AOM_ROOT}/aom/aom_codec.h"
-    "${AOM_ROOT}/aom/aom_decoder.h"
-    "${AOM_ROOT}/aom/aom_encoder.h"
-    "${AOM_ROOT}/aom/aom_frame_buffer.h"
-    "${AOM_ROOT}/aom/aom_image.h"
-    "${AOM_ROOT}/aom/aom_integer.h"
-    "${AOM_ROOT}/aom/aomcx.h"
-    "${AOM_ROOT}/aom/aomdx.h"
-    "${AOM_ROOT}/aom/internal/aom_codec_internal.h"
-    "${AOM_ROOT}/aom/src/aom_codec.c"
-    "${AOM_ROOT}/aom/src/aom_decoder.c"
-    "${AOM_ROOT}/aom/src/aom_encoder.c"
-    "${AOM_ROOT}/aom/src/aom_image.c")
-
-set(AOM_DSP_SRCS
-    "${AOM_ROOT}/aom_dsp/aom_convolve.c"
-    "${AOM_ROOT}/aom_dsp/aom_convolve.h"
-    "${AOM_ROOT}/aom_dsp/aom_dsp_common.h"
-    "${AOM_ROOT}/aom_dsp/aom_dsp_rtcd.c"
-    "${AOM_ROOT}/aom_dsp/aom_filter.h"
-    "${AOM_ROOT}/aom_dsp/aom_simd.c"
-    "${AOM_ROOT}/aom_dsp/aom_simd.h"
-    "${AOM_ROOT}/aom_dsp/aom_simd_inline.h"
-    "${AOM_ROOT}/aom_dsp/avg.c"
-    "${AOM_ROOT}/aom_dsp/bitreader.h"
-    "${AOM_ROOT}/aom_dsp/bitreader_buffer.c"
-    "${AOM_ROOT}/aom_dsp/bitreader_buffer.h"
-    "${AOM_ROOT}/aom_dsp/bitwriter.h"
-    "${AOM_ROOT}/aom_dsp/bitwriter_buffer.c"
-    "${AOM_ROOT}/aom_dsp/bitwriter_buffer.h"
-    "${AOM_ROOT}/aom_dsp/blend.h"
-    "${AOM_ROOT}/aom_dsp/blend_a64_hmask.c"
-    "${AOM_ROOT}/aom_dsp/blend_a64_mask.c"
-    "${AOM_ROOT}/aom_dsp/blend_a64_vmask.c"
-    "${AOM_ROOT}/aom_dsp/dkboolreader.c"
-    "${AOM_ROOT}/aom_dsp/dkboolreader.h"
-    "${AOM_ROOT}/aom_dsp/dkboolwriter.c"
-    "${AOM_ROOT}/aom_dsp/dkboolwriter.h"
-    "${AOM_ROOT}/aom_dsp/fwd_txfm.c"
-    "${AOM_ROOT}/aom_dsp/fwd_txfm.h"
-    "${AOM_ROOT}/aom_dsp/intrapred.c"
-    "${AOM_ROOT}/aom_dsp/inv_txfm.c"
-    "${AOM_ROOT}/aom_dsp/inv_txfm.h"
-    "${AOM_ROOT}/aom_dsp/loopfilter.c"
-    "${AOM_ROOT}/aom_dsp/prob.c"
-    "${AOM_ROOT}/aom_dsp/prob.h"
-    "${AOM_ROOT}/aom_dsp/psnr.c"
-    "${AOM_ROOT}/aom_dsp/psnr.h"
-    "${AOM_ROOT}/aom_dsp/quantize.c"
-    "${AOM_ROOT}/aom_dsp/quantize.h"
-    "${AOM_ROOT}/aom_dsp/sad.c"
-    "${AOM_ROOT}/aom_dsp/simd/v128_intrinsics.h"
-    "${AOM_ROOT}/aom_dsp/simd/v128_intrinsics_c.h"
-    "${AOM_ROOT}/aom_dsp/simd/v256_intrinsics.h"
-    "${AOM_ROOT}/aom_dsp/simd/v256_intrinsics_c.h"
-    "${AOM_ROOT}/aom_dsp/simd/v64_intrinsics.h"
-    "${AOM_ROOT}/aom_dsp/simd/v64_intrinsics_c.h"
-    "${AOM_ROOT}/aom_dsp/subtract.c"
-    "${AOM_ROOT}/aom_dsp/txfm_common.h"
-    "${AOM_ROOT}/aom_dsp/variance.c"
-    "${AOM_ROOT}/aom_dsp/variance.h")
-
-set(AOM_MEM_SRCS
-    "${AOM_ROOT}/aom_mem/aom_mem.c"
-    "${AOM_ROOT}/aom_mem/aom_mem.h"
-    "${AOM_ROOT}/aom_mem/include/aom_mem_intrnl.h")
-
-set(AOM_SCALE_SRCS
-    "${AOM_ROOT}/aom_scale/aom_scale.h"
-    "${AOM_ROOT}/aom_scale/aom_scale_rtcd.c"
-    "${AOM_ROOT}/aom_scale/generic/aom_scale.c"
-    "${AOM_ROOT}/aom_scale/generic/gen_scalers.c"
-    "${AOM_ROOT}/aom_scale/generic/yv12config.c"
-    "${AOM_ROOT}/aom_scale/generic/yv12extend.c"
-    "${AOM_ROOT}/aom_scale/yv12config.h")
-
-# TODO(tomfinegan): Extract aom_ports from aom_util if possible.
-set(AOM_UTIL_SRCS
-    "${AOM_ROOT}/aom_ports/aom_once.h"
-    "${AOM_ROOT}/aom_ports/aom_timer.h"
-    "${AOM_ROOT}/aom_ports/bitops.h"
-    "${AOM_ROOT}/aom_ports/emmintrin_compat.h"
-    "${AOM_ROOT}/aom_ports/mem.h"
-    "${AOM_ROOT}/aom_ports/mem_ops.h"
-    "${AOM_ROOT}/aom_ports/mem_ops_aligned.h"
-    "${AOM_ROOT}/aom_ports/msvc.h"
-    "${AOM_ROOT}/aom_ports/system_state.h"
-    "${AOM_ROOT}/aom_util/aom_thread.c"
-    "${AOM_ROOT}/aom_util/aom_thread.h"
-    "${AOM_ROOT}/aom_util/endian_inl.h")
-
-set(AOM_AV1_COMMON_SRCS
-    "${AOM_ROOT}/av1/av1_iface_common.h"
-    "${AOM_ROOT}/av1/common/alloccommon.c"
-    "${AOM_ROOT}/av1/common/alloccommon.h"
-    "${AOM_ROOT}/av1/common/av1_fwd_txfm.c"
-    "${AOM_ROOT}/av1/common/av1_fwd_txfm.h"
-    "${AOM_ROOT}/av1/common/av1_inv_txfm.c"
-    "${AOM_ROOT}/av1/common/av1_inv_txfm.h"
-    "${AOM_ROOT}/av1/common/av1_rtcd.c"
-    "${AOM_ROOT}/av1/common/blockd.c"
-    "${AOM_ROOT}/av1/common/blockd.h"
-    "${AOM_ROOT}/av1/common/common.h"
-    "${AOM_ROOT}/av1/common/common_data.h"
-    "${AOM_ROOT}/av1/common/convolve.c"
-    "${AOM_ROOT}/av1/common/convolve.h"
-    "${AOM_ROOT}/av1/common/debugmodes.c"
-    "${AOM_ROOT}/av1/common/entropy.c"
-    "${AOM_ROOT}/av1/common/entropy.h"
-    "${AOM_ROOT}/av1/common/entropymode.c"
-    "${AOM_ROOT}/av1/common/entropymode.h"
-    "${AOM_ROOT}/av1/common/entropymv.c"
-    "${AOM_ROOT}/av1/common/entropymv.h"
-    "${AOM_ROOT}/av1/common/enums.h"
-    "${AOM_ROOT}/av1/common/filter.c"
-    "${AOM_ROOT}/av1/common/filter.h"
-    "${AOM_ROOT}/av1/common/frame_buffers.c"
-    "${AOM_ROOT}/av1/common/frame_buffers.h"
-    "${AOM_ROOT}/av1/common/idct.c"
-    "${AOM_ROOT}/av1/common/idct.h"
-    "${AOM_ROOT}/av1/common/loopfilter.c"
-    "${AOM_ROOT}/av1/common/loopfilter.h"
-    "${AOM_ROOT}/av1/common/mv.h"
-    "${AOM_ROOT}/av1/common/mvref_common.c"
-    "${AOM_ROOT}/av1/common/mvref_common.h"
-    "${AOM_ROOT}/av1/common/odintrin.c"
-    "${AOM_ROOT}/av1/common/odintrin.h"
-    "${AOM_ROOT}/av1/common/onyxc_int.h"
-    "${AOM_ROOT}/av1/common/pred_common.c"
-    "${AOM_ROOT}/av1/common/pred_common.h"
-    "${AOM_ROOT}/av1/common/quant_common.c"
-    "${AOM_ROOT}/av1/common/quant_common.h"
-    "${AOM_ROOT}/av1/common/reconinter.c"
-    "${AOM_ROOT}/av1/common/reconinter.h"
-    "${AOM_ROOT}/av1/common/reconintra.c"
-    "${AOM_ROOT}/av1/common/reconintra.h"
-    "${AOM_ROOT}/av1/common/scale.c"
-    "${AOM_ROOT}/av1/common/scale.h"
-    "${AOM_ROOT}/av1/common/scan.c"
-    "${AOM_ROOT}/av1/common/scan.h"
-    "${AOM_ROOT}/av1/common/seg_common.c"
-    "${AOM_ROOT}/av1/common/seg_common.h"
-    "${AOM_ROOT}/av1/common/thread_common.c"
-    "${AOM_ROOT}/av1/common/thread_common.h"
-    "${AOM_ROOT}/av1/common/tile_common.c"
-    "${AOM_ROOT}/av1/common/tile_common.h")
-
-set(AOM_AV1_DECODER_SRCS
-    "${AOM_ROOT}/av1/av1_dx_iface.c"
-    "${AOM_ROOT}/av1/decoder/decodeframe.c"
-    "${AOM_ROOT}/av1/decoder/decodeframe.h"
-    "${AOM_ROOT}/av1/decoder/decodemv.c"
-    "${AOM_ROOT}/av1/decoder/decodemv.h"
-    "${AOM_ROOT}/av1/decoder/decoder.c"
-    "${AOM_ROOT}/av1/decoder/decoder.h"
-    "${AOM_ROOT}/av1/decoder/detokenize.c"
-    "${AOM_ROOT}/av1/decoder/detokenize.h"
-    "${AOM_ROOT}/av1/decoder/dsubexp.c"
-    "${AOM_ROOT}/av1/decoder/dsubexp.h"
-    "${AOM_ROOT}/av1/decoder/dthread.c"
-    "${AOM_ROOT}/av1/decoder/dthread.h")
-
-set(AOM_AV1_ENCODER_SRCS
-    "${AOM_ROOT}/av1/av1_cx_iface.c"
-    "${AOM_ROOT}/av1/encoder/aq_complexity.c"
-    "${AOM_ROOT}/av1/encoder/aq_complexity.h"
-    "${AOM_ROOT}/av1/encoder/aq_cyclicrefresh.c"
-    "${AOM_ROOT}/av1/encoder/aq_cyclicrefresh.h"
-    "${AOM_ROOT}/av1/encoder/aq_variance.c"
-    "${AOM_ROOT}/av1/encoder/aq_variance.h"
-    "${AOM_ROOT}/av1/encoder/bitstream.c"
-    "${AOM_ROOT}/av1/encoder/bitstream.h"
-    "${AOM_ROOT}/av1/encoder/block.h"
-    "${AOM_ROOT}/av1/encoder/context_tree.c"
-    "${AOM_ROOT}/av1/encoder/context_tree.h"
-    "${AOM_ROOT}/av1/encoder/cost.c"
-    "${AOM_ROOT}/av1/encoder/cost.h"
-    "${AOM_ROOT}/av1/encoder/dct.c"
-    "${AOM_ROOT}/av1/encoder/encodeframe.c"
-    "${AOM_ROOT}/av1/encoder/encodeframe.h"
-    "${AOM_ROOT}/av1/encoder/encodemb.c"
-    "${AOM_ROOT}/av1/encoder/encodemb.h"
-    "${AOM_ROOT}/av1/encoder/encodemv.c"
-    "${AOM_ROOT}/av1/encoder/encodemv.h"
-    "${AOM_ROOT}/av1/encoder/encoder.c"
-    "${AOM_ROOT}/av1/encoder/encoder.h"
-    "${AOM_ROOT}/av1/encoder/ethread.c"
-    "${AOM_ROOT}/av1/encoder/ethread.h"
-    "${AOM_ROOT}/av1/encoder/extend.c"
-    "${AOM_ROOT}/av1/encoder/extend.h"
-    "${AOM_ROOT}/av1/encoder/firstpass.c"
-    "${AOM_ROOT}/av1/encoder/firstpass.h"
-    "${AOM_ROOT}/av1/encoder/hybrid_fwd_txfm.c"
-    "${AOM_ROOT}/av1/encoder/hybrid_fwd_txfm.h"
-    "${AOM_ROOT}/av1/encoder/lookahead.c"
-    "${AOM_ROOT}/av1/encoder/lookahead.h"
-    "${AOM_ROOT}/av1/encoder/mbgraph.c"
-    "${AOM_ROOT}/av1/encoder/mbgraph.h"
-    "${AOM_ROOT}/av1/encoder/mcomp.c"
-    "${AOM_ROOT}/av1/encoder/mcomp.h"
-    "${AOM_ROOT}/av1/encoder/picklpf.c"
-    "${AOM_ROOT}/av1/encoder/picklpf.h"
-    "${AOM_ROOT}/av1/encoder/quantize.c"
-    "${AOM_ROOT}/av1/encoder/quantize.h"
-    "${AOM_ROOT}/av1/encoder/ratectrl.c"
-    "${AOM_ROOT}/av1/encoder/ratectrl.h"
-    "${AOM_ROOT}/av1/encoder/rd.c"
-    "${AOM_ROOT}/av1/encoder/rd.h"
-    "${AOM_ROOT}/av1/encoder/rdopt.c"
-    "${AOM_ROOT}/av1/encoder/rdopt.h"
-    "${AOM_ROOT}/av1/encoder/resize.c"
-    "${AOM_ROOT}/av1/encoder/resize.h"
-    "${AOM_ROOT}/av1/encoder/segmentation.c"
-    "${AOM_ROOT}/av1/encoder/segmentation.h"
-    "${AOM_ROOT}/av1/encoder/speed_features.c"
-    "${AOM_ROOT}/av1/encoder/speed_features.h"
-    "${AOM_ROOT}/av1/encoder/subexp.c"
-    "${AOM_ROOT}/av1/encoder/subexp.h"
-    "${AOM_ROOT}/av1/encoder/temporal_filter.c"
-    "${AOM_ROOT}/av1/encoder/temporal_filter.h"
-    "${AOM_ROOT}/av1/encoder/tokenize.c"
-    "${AOM_ROOT}/av1/encoder/tokenize.h"
-    "${AOM_ROOT}/av1/encoder/treewriter.c"
-    "${AOM_ROOT}/av1/encoder/treewriter.h")
-
-# Targets
-add_library(aom_dsp ${AOM_DSP_SRCS})
-include_directories(${AOM_ROOT} ${AOM_CONFIG_DIR})
-add_library(aom_mem ${AOM_MEM_SRCS})
-add_library(aom_scale ${AOM_SCALE_SRCS})
-include_directories(${AOM_ROOT} ${AOM_CONFIG_DIR})
-add_library(aom_util ${AOM_UTIL_SRCS})
-add_library(aom_av1_decoder ${AOM_AV1_DECODER_SRCS})
-add_library(aom_av1_encoder ${AOM_AV1_ENCODER_SRCS})
-add_library(aom ${AOM_SRCS})
-target_link_libraries(aom LINK_PUBLIC
-                      aom_dsp
-                      aom_mem
-                      aom_scale
-                      aom_util
-                      aom_av1_decoder
-                      aom_av1_encoder)
-add_executable(simple_decoder examples/simple_decoder.c)
-include_directories(${AOM_ROOT})
-target_link_libraries(simple_decoder LINK_PUBLIC aom)
-add_executable(simple_encoder examples/simple_encoder.c)
-include_directories(${AOM_ROOT})
-target_link_libraries(simple_encoder LINK_PUBLIC aom)
-
--- a/42
+++ b/42
@@ -1,27 +1,31 @@
-Copyright (c) 2016, Alliance for Open Media. All rights reserved.
+Copyright (c) 2010, The WebM Project authors. All rights reserved.

 Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
+modification, are permitted provided that the following conditions are
+met:

-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.

-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in
-   the documentation and/or other materials provided with the
-   distribution.
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+
+  * Neither the name of Google, nor the WebM Project, nor the names
+    of its contributors may be used to endorse or promote products
+    derived from this software without specific prior written
+    permission.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

--- a/127
+++ b/127
@@ -1,108 +1,23 @@
-Alliance for Open Media Patent License 1.0
+Additional IP Rights Grant (Patents)
+------------------------------------

-1. License Terms.
-
-1.1. Patent License. Subject to the terms and conditions of this License, each
-     Licensor, on behalf of itself and successors in interest and assigns,
-     grants Licensee a non-sublicensable, perpetual, worldwide, non-exclusive,
-     no-charge, royalty-free, irrevocable (except as expressly stated in this
-     License) patent license to its Necessary Claims to make, use, sell, offer
-     for sale, import or distribute any Implementation.
-
-1.2. Conditions.
-
-1.2.1. Availability. As a condition to the grant of rights to Licensee to make,
-       sell, offer for sale, import or distribute an Implementation under
-       Section 1.1, Licensee must make its Necessary Claims available under
-       this License, and must reproduce this License with any Implementation
-       as follows:
-
-       a. For distribution in source code, by including this License in the
-          root directory of the source code with its Implementation.
-
-       b. For distribution in any other form (including binary, object form,
-          and/or hardware description code (e.g., HDL, RTL, Gate Level Netlist,
-          GDSII, etc.)), by including this License in the documentation, legal
-          notices, and/or other written materials provided with the
-          Implementation.
-
-1.2.2. Additional Conditions. This license is directly from Licensor to
-       Licensee.  Licensee acknowledges as a condition of benefiting from it
-       that no rights from Licensor are received from suppliers, distributors,
-       or otherwise in connection with this License.
-
-1.3. Defensive Termination. If any Licensee, its Affiliates, or its agents
-     initiates patent litigation or files, maintains, or voluntarily
-     participates in a lawsuit against another entity or any person asserting
-     that any Implementation infringes Necessary Claims, any patent licenses
-     granted under this License directly to the Licensee are immediately
-     terminated as of the date of the initiation of action unless 1) that suit
-     was in response to a corresponding suit regarding an Implementation first
-     brought against an initiating entity, or 2) that suit was brought to
-     enforce the terms of this License (including intervention in a third-party
-     action by a Licensee).
-
-1.4. Disclaimers. The Reference Implementation and Specification are provided
-     "AS IS" and without warranty. The entire risk as to implementing or
-     otherwise using the Reference Implementation or Specification is assumed
-     by the implementer and user. Licensor expressly disclaims any warranties
-     (express, implied, or otherwise), including implied warranties of
-     merchantability, non-infringement, fitness for a particular purpose, or
-     title, related to the material. IN NO EVENT WILL LICENSOR BE LIABLE TO
-     ANY OTHER PARTY FOR LOST PROFITS OR ANY FORM OF INDIRECT, SPECIAL,
-     INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER FROM ANY CAUSES OF
-     ACTION OF ANY KIND WITH RESPECT TO THIS LICENSE, WHETHER BASED ON BREACH
-     OF CONTRACT, TORT (INCLUDING NEGLIGENCE), OR OTHERWISE, AND WHETHER OR
-     NOT THE OTHER PARTRY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-2. Definitions.
-
-2.1. Affiliate.  “Affiliate” means an entity that directly or indirectly
-     Controls, is Controlled by, or is under common Control of that party.
-
-2.2. Control. “Control” means direct or indirect control of more than 50% of
-     the voting power to elect directors of that corporation, or for any other
-     entity, the power to direct management of such entity.
-
-2.3. Decoder.  "Decoder" means any decoder that conforms fully with all
-     non-optional portions of the Specification.
-
-2.4. Encoder.  "Encoder" means any encoder that produces a bitstream that can
-     be decoded by a Decoder only to the extent it produces such a bitstream.
-
-2.5. Final Deliverable.  “Final Deliverable” means the final version of a
-     deliverable approved by the Alliance for Open Media as a Final
-     Deliverable.
-
-2.6. Implementation.  "Implementation" means any implementation, including the
-     Reference Implementation, that is an Encoder and/or a Decoder. An
-     Implementation also includes components of an Implementation only to the
-     extent they are used as part of an Implementation.
-
-2.7. License. “License” means this license.
-
-2.8. Licensee. “Licensee” means any person or entity who exercises patent
-     rights granted under this License.
-
-2.9. Licensor.  "Licensor" means (i) any Licensee that makes, sells, offers
-     for sale, imports or distributes any Implementation, or (ii) a person
-     or entity that has a licensing obligation to the Implementation as a
-     result of its membership and/or participation in the Alliance for Open
-     Media working group that developed the Specification.
-
-2.10. Necessary Claims.  "Necessary Claims" means all claims of patents or
-      patent applications, (a) that currently or at any time in the future,
-      are owned or controlled by the Licensor, and (b) (i) would be an
-      Essential Claim as defined by the W3C Policy as of February 5, 2004
-      (https://www.w3.org/Consortium/Patent-Policy-20040205/#def-essential)
-      as if the Specification was a W3C Recommendation; or (ii) are infringed
-      by the Reference Implementation.
-
-2.11. Reference Implementation. “Reference Implementation” means an Encoder
-      and/or Decoder released by the Alliance for Open Media as a Final
-      Deliverable.
-
-2.12. Specification. “Specification” means the specification designated by
-      the Alliance for Open Media as a Final Deliverable for which this
-      License was issued.
+"These implementations" means the copyrightable works that implement the WebM
+codecs distributed by Google as part of the WebM Project.

+Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge,
+royalty-free, irrevocable (except as stated in this section) patent license to
+make, have made, use, offer to sell, sell, import, transfer, and otherwise
+run, modify and propagate the contents of these implementations of WebM, where
+such license applies only to those patent claims, both currently owned by
+Google and acquired in the future, licensable by Google that are necessarily
+infringed by these implementations of WebM. This grant does not include claims
+that would be infringed only as a consequence of further modification of these
+implementations. If you or your agent or exclusive licensee institute or order
+or agree to the institution of patent litigation or any other patent
+enforcement activity against any entity (including a cross-claim or
+counterclaim in a lawsuit) alleging that any of these implementations of WebM
+or any code incorporated within any of these implementations of WebM
+constitute direct or contributory patent infringement, or inducement of
+patent infringement, then any patent rights granted to you under this License
+for these implementations of WebM shall terminate as of the date such
+litigation is filed.
--- a/36
+++ b/36
@@ -1,6 +1,6 @@
-README - 23 March 2015
+README - 20 July 2016

-Welcome to the WebM VP8/AV1 Codec SDK!
+Welcome to the WebM VP8/VP9 Codec SDK!

 COMPILING THE APPLICATIONS/LIBRARIES:
  The build system used is similar to autotools. Building generally consists of
@@ -33,13 +33,13 @@ COMPILING THE APPLICATIONS/LIBRARIES:

    $ mkdir build
    $ cd build
-    $ ../libaom/configure <options>
+    $ ../libvpx/configure <options>
    $ make

  3. Configuration options
  The 'configure' script supports a number of options. The --help option can be
  used to get a list of supported options:
-    $ ../libaom/configure --help
+    $ ../libvpx/configure --help

  4. Cross development
  For cross development, the most notable option is the --target option. The
@@ -79,6 +79,9 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-os2-gcc
    x86-solaris-gcc
    x86-win32-gcc
+    x86-win32-vs7
+    x86-win32-vs8
+    x86-win32-vs9
    x86-win32-vs10
    x86-win32-vs11
    x86-win32-vs12
@@ -95,6 +98,8 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86_64-linux-icc
    x86_64-solaris-gcc
    x86_64-win64-gcc
+    x86_64-win64-vs8
+    x86_64-win64-vs9
    x86_64-win64-vs10
    x86_64-win64-vs11
    x86_64-win64-vs12
@@ -108,7 +113,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  toolchain, the following command could be used (note, POSIX SH syntax, adapt
  to your shell as necessary):

-    $ CROSS=mipsel-linux-uclibc- ../libaom/configure
+    $ CROSS=mipsel-linux-uclibc- ../libvpx/configure

  In addition, the executables to be invoked can be overridden by specifying the
  environment variables: CC, AR, LD, AS, STRIP, NM. Additional flags can be
@@ -119,28 +124,13 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  This defaults to config.log. This should give a good indication of what went
  wrong. If not, contact us for support.

-VP8/AV1 TEST VECTORS:
+VP8/VP9 TEST VECTORS:
  The test vectors can be downloaded and verified using the build system after
  running configure. To specify an alternate directory the
-  LIBAOM_TEST_DATA_PATH environment variable can be used.
+  LIBVPX_TEST_DATA_PATH environment variable can be used.

  $ ./configure --enable-unit-tests
-  $ LIBAOM_TEST_DATA_PATH=../-test-data make testdata
-
-CODE STYLE:
-  The coding style used by this project is enforced with clang-format using the
-  configuration contained in the .clang-format file in the root of the
-  repository.
-
-  Before pushing changes for review you can format your code with:
-  # Apply clang-format to modified .c, .h and .cc files
-  $ clang-format -i --style=file \
-    $(git diff --name-only --diff-filter=ACMR '*.[hc]' '*.cc')
-
-  Check the .clang-format file for the version used to generate it if there is
-  any difference between your local formatting and the review system.
-
-  See also: http://clang.llvm.org/docs/ClangFormat.html
+  $ LIBVPX_TEST_DATA_PATH=../libvpx-test-data make testdata

 SUPPORT
  This library is an open source project supported by its community. Please
--- a/aom/aom.h
+++ b/aom/aom.h
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\defgroup aom AOM
- * \ingroup codecs
- * AOM is aom's newest video compression algorithm that uses motion
- * compensated prediction, Discrete Cosine Transform (DCT) coding of the
- * prediction error signal and context dependent entropy coding techniques
- * based on arithmetic principles. It features:
- *  - YUV 4:2:0 image format
- *  - Macro-block based coding (16x16 luma plus two 8x8 chroma)
- *  - 1/4 (1/8) pixel accuracy motion compensated prediction
- *  - 4x4 DCT transform
- *  - 128 level linear quantizer
- *  - In loop deblocking filter
- *  - Context-based entropy coding
- *
- * @{
- */
-/*!\file
- * \brief Provides controls common to both the AOM encoder and decoder.
- */
-#ifndef AOM_AOM_H_
-#define AOM_AOM_H_
-
-#include "./aom_codec.h"
-#include "./aom_image.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!\brief Control functions
- *
- * The set of macros define the control functions of AOM interface
- */
-enum aom_com_control_id {
-  /*!\brief pass in an external frame into decoder to be used as reference frame
-   */
-  AOM_SET_REFERENCE = 1,
-  AOM_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
-  AOM_SET_POSTPROC = 3,   /**< set the decoder's post processing settings  */
-  AOM_SET_DBG_COLOR_REF_FRAME =
-      4, /**< set the reference frames to color for each macroblock */
-  AOM_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */
-  AOM_SET_DBG_COLOR_B_MODES = 6,  /**< set which blocks modes to color */
-  AOM_SET_DBG_DISPLAY_MV = 7,     /**< set which motion vector modes to draw */
-
-  /* TODO(jkoleszar): The encoder incorrectly reuses some of these values (5+)
-   * for its control ids. These should be migrated to something like the
-   * AOM_DECODER_CTRL_ID_START range next time we're ready to break the ABI.
-   */
-  AV1_GET_REFERENCE = 128, /**< get a pointer to a reference frame */
-  AOM_COMMON_CTRL_ID_MAX,
-
-  AV1_GET_NEW_FRAME_IMAGE = 192, /**< get a pointer to the new frame */
-
-  AOM_DECODER_CTRL_ID_START = 256
-};
-
-/*!\brief post process flags
- *
- * The set of macros define AOM decoder post processing flags
- */
-enum aom_postproc_level {
-  AOM_NOFILTERING = 0,
-  AOM_DEBLOCK = 1 << 0,
-  AOM_DEMACROBLOCK = 1 << 1,
-  AOM_ADDNOISE = 1 << 2,
-  AOM_DEBUG_TXT_FRAME_INFO = 1 << 3, /**< print frame information */
-  AOM_DEBUG_TXT_MBLK_MODES =
-      1 << 4, /**< print macro block modes over each macro block */
-  AOM_DEBUG_TXT_DC_DIFF = 1 << 5,   /**< print dc diff for each macro block */
-  AOM_DEBUG_TXT_RATE_INFO = 1 << 6, /**< print video rate info (encoder only) */
-  AOM_MFQE = 1 << 10
-};
-
-/*!\brief post process flags
- *
- * This define a structure that describe the post processing settings. For
- * the best objective measure (using the PSNR metric) set post_proc_flag
- * to AOM_DEBLOCK and deblocking_level to 1.
- */
-
-typedef struct aom_postproc_cfg {
-  /*!\brief the types of post processing to be done, should be combination of
-   * "aom_postproc_level" */
-  int post_proc_flag;
-  int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */
-  int noise_level; /**< the strength of additive noise, valid range [0, 16] */
-} aom_postproc_cfg_t;
-
-/*!\brief reference frame type
- *
- * The set of macros define the type of AOM reference frames
- */
-typedef enum aom_ref_frame_type {
-  AOM_LAST_FRAME = 1,
-  AOM_GOLD_FRAME = 2,
-  AOM_ALTR_FRAME = 4
-} aom_ref_frame_type_t;
-
-/*!\brief reference frame data struct
- *
- * Define the data struct to access aom reference frames.
- */
-typedef struct aom_ref_frame {
-  aom_ref_frame_type_t frame_type; /**< which reference frame */
-  aom_image_t img;                 /**< reference frame data in image format */
-} aom_ref_frame_t;
-
-/*!\brief AV1 specific reference frame data struct
- *
- * Define the data struct to access av1 reference frames.
- */
-typedef struct av1_ref_frame {
-  int idx;         /**< frame index to get (input) */
-  aom_image_t img; /**< img structure to populate (output) */
-} av1_ref_frame_t;
-
-/*!\cond */
-/*!\brief aom decoder control function parameter type
- *
- * defines the data type for each of AOM decoder control function requires
- */
-AOM_CTRL_USE_TYPE(AOM_SET_REFERENCE, aom_ref_frame_t *)
-#define AOM_CTRL_AOM_SET_REFERENCE
-AOM_CTRL_USE_TYPE(AOM_COPY_REFERENCE, aom_ref_frame_t *)
-#define AOM_CTRL_AOM_COPY_REFERENCE
-AOM_CTRL_USE_TYPE(AOM_SET_POSTPROC, aom_postproc_cfg_t *)
-#define AOM_CTRL_AOM_SET_POSTPROC
-AOM_CTRL_USE_TYPE(AOM_SET_DBG_COLOR_REF_FRAME, int)
-#define AOM_CTRL_AOM_SET_DBG_COLOR_REF_FRAME
-AOM_CTRL_USE_TYPE(AOM_SET_DBG_COLOR_MB_MODES, int)
-#define AOM_CTRL_AOM_SET_DBG_COLOR_MB_MODES
-AOM_CTRL_USE_TYPE(AOM_SET_DBG_COLOR_B_MODES, int)
-#define AOM_CTRL_AOM_SET_DBG_COLOR_B_MODES
-AOM_CTRL_USE_TYPE(AOM_SET_DBG_DISPLAY_MV, int)
-#define AOM_CTRL_AOM_SET_DBG_DISPLAY_MV
-AOM_CTRL_USE_TYPE(AV1_GET_REFERENCE, av1_ref_frame_t *)
-#define AOM_CTRL_AV1_GET_REFERENCE
-AOM_CTRL_USE_TYPE(AV1_GET_NEW_FRAME_IMAGE, aom_image_t *)
-#define AOM_CTRL_AV1_GET_NEW_FRAME_IMAGE
-
-/*!\endcond */
-/*! @} - end defgroup aom */
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AOM_H_
--- a/aom/aom_codec.h
+++ b/aom/aom_codec.h
@@ -1,487 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\defgroup codec Common Algorithm Interface
- * This abstraction allows applications to easily support multiple video
- * formats with minimal code duplication. This section describes the interface
- * common to all codecs (both encoders and decoders).
- * @{
- */
-
-/*!\file
- * \brief Describes the codec algorithm interface to applications.
- *
- * This file describes the interface between an application and a
- * video codec algorithm.
- *
- * An application instantiates a specific codec instance by using
- * aom_codec_init() and a pointer to the algorithm's interface structure:
- *     <pre>
- *     my_app.c:
- *       extern aom_codec_iface_t my_codec;
- *       {
- *           aom_codec_ctx_t algo;
- *           res = aom_codec_init(&algo, &my_codec);
- *       }
- *     </pre>
- *
- * Once initialized, the instance is manged using other functions from
- * the aom_codec_* family.
- */
-#ifndef AOM_AOM_CODEC_H_
-#define AOM_AOM_CODEC_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "./aom_integer.h"
-#include "./aom_image.h"
-
-/*!\brief Decorator indicating a function is deprecated */
-#ifndef DEPRECATED
-#if defined(__GNUC__) && __GNUC__
-#define DEPRECATED __attribute__((deprecated))
-#elif defined(_MSC_VER)
-#define DEPRECATED
-#else
-#define DEPRECATED
-#endif
-#endif /* DEPRECATED */
-
-#ifndef DECLSPEC_DEPRECATED
-#if defined(__GNUC__) && __GNUC__
-#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
-#elif defined(_MSC_VER)
-/*!\brief \copydoc #DEPRECATED */
-#define DECLSPEC_DEPRECATED __declspec(deprecated)
-#else
-#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
-#endif
-#endif /* DECLSPEC_DEPRECATED */
-
-/*!\brief Decorator indicating a function is potentially unused */
-#ifdef UNUSED
-#elif defined(__GNUC__) || defined(__clang__)
-#define UNUSED __attribute__((unused))
-#else
-#define UNUSED
-#endif
-
-/*!\brief Decorator indicating that given struct/union/enum is packed */
-#ifndef ATTRIBUTE_PACKED
-#if defined(__GNUC__) && __GNUC__
-#define ATTRIBUTE_PACKED __attribute__((packed))
-#elif defined(_MSC_VER)
-#define ATTRIBUTE_PACKED
-#else
-#define ATTRIBUTE_PACKED
-#endif
-#endif /* ATTRIBUTE_PACKED */
-
-/*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped.  Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
-#define AOM_CODEC_ABI_VERSION (3 + AOM_IMAGE_ABI_VERSION) /**<\hideinitializer*/
-
-/*!\brief Algorithm return codes */
-typedef enum {
-  /*!\brief Operation completed without error */
-  AOM_CODEC_OK,
-
-  /*!\brief Unspecified error */
-  AOM_CODEC_ERROR,
-
-  /*!\brief Memory operation failed */
-  AOM_CODEC_MEM_ERROR,
-
-  /*!\brief ABI version mismatch */
-  AOM_CODEC_ABI_MISMATCH,
-
-  /*!\brief Algorithm does not have required capability */
-  AOM_CODEC_INCAPABLE,
-
-  /*!\brief The given bitstream is not supported.
-   *
-   * The bitstream was unable to be parsed at the highest level. The decoder
-   * is unable to proceed. This error \ref SHOULD be treated as fatal to the
-   * stream. */
-  AOM_CODEC_UNSUP_BITSTREAM,
-
-  /*!\brief Encoded bitstream uses an unsupported feature
-   *
-   * The decoder does not implement a feature required by the encoder. This
-   * return code should only be used for features that prevent future
-   * pictures from being properly decoded. This error \ref MAY be treated as
-   * fatal to the stream or \ref MAY be treated as fatal to the current GOP.
-   */
-  AOM_CODEC_UNSUP_FEATURE,
-
-  /*!\brief The coded data for this stream is corrupt or incomplete
-   *
-   * There was a problem decoding the current frame.  This return code
-   * should only be used for failures that prevent future pictures from
-   * being properly decoded. This error \ref MAY be treated as fatal to the
-   * stream or \ref MAY be treated as fatal to the current GOP. If decoding
-   * is continued for the current GOP, artifacts may be present.
-   */
-  AOM_CODEC_CORRUPT_FRAME,
-
-  /*!\brief An application-supplied parameter is not valid.
-   *
-   */
-  AOM_CODEC_INVALID_PARAM,
-
-  /*!\brief An iterator reached the end of list.
-   *
-   */
-  AOM_CODEC_LIST_END
-
-} aom_codec_err_t;
-
-/*! \brief Codec capabilities bitfield
- *
- *  Each codec advertises the capabilities it supports as part of its
- *  ::aom_codec_iface_t interface structure. Capabilities are extra interfaces
- *  or functionality, and are not required to be supported.
- *
- *  The available flags are specified by AOM_CODEC_CAP_* defines.
- */
-typedef long aom_codec_caps_t;
-#define AOM_CODEC_CAP_DECODER 0x1 /**< Is a decoder */
-#define AOM_CODEC_CAP_ENCODER 0x2 /**< Is an encoder */
-
-/*! \brief Initialization-time Feature Enabling
- *
- *  Certain codec features must be known at initialization time, to allow for
- *  proper memory allocation.
- *
- *  The available flags are specified by AOM_CODEC_USE_* defines.
- */
-typedef long aom_codec_flags_t;
-
-/*!\brief Codec interface structure.
- *
- * Contains function pointers and other data private to the codec
- * implementation. This structure is opaque to the application.
- */
-typedef const struct aom_codec_iface aom_codec_iface_t;
-
-/*!\brief Codec private data structure.
- *
- * Contains data private to the codec implementation. This structure is opaque
- * to the application.
- */
-typedef struct aom_codec_priv aom_codec_priv_t;
-
-/*!\brief Iterator
- *
- * Opaque storage used for iterating over lists.
- */
-typedef const void *aom_codec_iter_t;
-
-/*!\brief Codec context structure
- *
- * All codecs \ref MUST support this context structure fully. In general,
- * this data should be considered private to the codec algorithm, and
- * not be manipulated or examined by the calling application. Applications
- * may reference the 'name' member to get a printable description of the
- * algorithm.
- */
-typedef struct aom_codec_ctx {
-  const char *name;             /**< Printable interface name */
-  aom_codec_iface_t *iface;     /**< Interface pointers */
-  aom_codec_err_t err;          /**< Last returned error */
-  const char *err_detail;       /**< Detailed info, if available */
-  aom_codec_flags_t init_flags; /**< Flags passed at init time */
-  union {
-    /**< Decoder Configuration Pointer */
-    const struct aom_codec_dec_cfg *dec;
-    /**< Encoder Configuration Pointer */
-    const struct aom_codec_enc_cfg *enc;
-    const void *raw;
-  } config;               /**< Configuration pointer aliasing union */
-  aom_codec_priv_t *priv; /**< Algorithm private storage */
-} aom_codec_ctx_t;
-
-/*!\brief Bit depth for codec
- * *
- * This enumeration determines the bit depth of the codec.
- */
-typedef enum aom_bit_depth {
-  AOM_BITS_8 = 8,   /**<  8 bits */
-  AOM_BITS_10 = 10, /**< 10 bits */
-  AOM_BITS_12 = 12, /**< 12 bits */
-} aom_bit_depth_t;
-
-/*!\brief Superblock size selection.
- *
- * Defines the superblock size used for encoding. The superblock size can
- * either be fixed at 64x64 or 128x128 pixels, or it can be dynamically
- * selected by the encoder for each frame.
- */
-typedef enum aom_superblock_size {
-  AOM_SUPERBLOCK_SIZE_64X64,   /**< Always use 64x64 superblocks. */
-  AOM_SUPERBLOCK_SIZE_128X128, /**< Always use 128x128 superblocks. */
-  AOM_SUPERBLOCK_SIZE_DYNAMIC  /**< Select superblock size dynamically. */
-} aom_superblock_size_t;
-
-/*
- * Library Version Number Interface
- *
- * For example, see the following sample return values:
- *     aom_codec_version()           (1<<16 | 2<<8 | 3)
- *     aom_codec_version_str()       "v1.2.3-rc1-16-gec6a1ba"
- *     aom_codec_version_extra_str() "rc1-16-gec6a1ba"
- */
-
-/*!\brief Return the version information (as an integer)
- *
- * Returns a packed encoding of the library version number. This will only
- * include
- * the major.minor.patch component of the version number. Note that this encoded
- * value should be accessed through the macros provided, as the encoding may
- * change
- * in the future.
- *
- */
-int aom_codec_version(void);
-#define AOM_VERSION_MAJOR(v) \
-  ((v >> 16) & 0xff) /**< extract major from packed version */
-#define AOM_VERSION_MINOR(v) \
-  ((v >> 8) & 0xff) /**< extract minor from packed version */
-#define AOM_VERSION_PATCH(v) \
-  ((v >> 0) & 0xff) /**< extract patch from packed version */
-
-/*!\brief Return the version major number */
-#define aom_codec_version_major() ((aom_codec_version() >> 16) & 0xff)
-
-/*!\brief Return the version minor number */
-#define aom_codec_version_minor() ((aom_codec_version() >> 8) & 0xff)
-
-/*!\brief Return the version patch number */
-#define aom_codec_version_patch() ((aom_codec_version() >> 0) & 0xff)
-
-/*!\brief Return the version information (as a string)
- *
- * Returns a printable string containing the full library version number. This
- * may
- * contain additional text following the three digit version number, as to
- * indicate
- * release candidates, prerelease versions, etc.
- *
- */
-const char *aom_codec_version_str(void);
-
-/*!\brief Return the version information (as a string)
- *
- * Returns a printable "extra string". This is the component of the string
- * returned
- * by aom_codec_version_str() following the three digit version number.
- *
- */
-const char *aom_codec_version_extra_str(void);
-
-/*!\brief Return the build configuration
- *
- * Returns a printable string containing an encoded version of the build
- * configuration. This may be useful to aom support.
- *
- */
-const char *aom_codec_build_config(void);
-
-/*!\brief Return the name for a given interface
- *
- * Returns a human readable string for name of the given codec interface.
- *
- * \param[in]    iface     Interface pointer
- *
- */
-const char *aom_codec_iface_name(aom_codec_iface_t *iface);
-
-/*!\brief Convert error number to printable string
- *
- * Returns a human readable string for the last error returned by the
- * algorithm. The returned error will be one line and will not contain
- * any newline characters.
- *
- *
- * \param[in]    err     Error number.
- *
- */
-const char *aom_codec_err_to_string(aom_codec_err_t err);
-
-/*!\brief Retrieve error synopsis for codec context
- *
- * Returns a human readable string for the last error returned by the
- * algorithm. The returned error will be one line and will not contain
- * any newline characters.
- *
- *
- * \param[in]    ctx     Pointer to this instance's context.
- *
- */
-const char *aom_codec_error(aom_codec_ctx_t *ctx);
-
-/*!\brief Retrieve detailed error information for codec context
- *
- * Returns a human readable string providing detailed information about
- * the last error.
- *
- * \param[in]    ctx     Pointer to this instance's context.
- *
- * \retval NULL
- *     No detailed information is available.
- */
-const char *aom_codec_error_detail(aom_codec_ctx_t *ctx);
-
-/* REQUIRED FUNCTIONS
- *
- * The following functions are required to be implemented for all codecs.
- * They represent the base case functionality expected of all codecs.
- */
-
-/*!\brief Destroy a codec instance
- *
- * Destroys a codec context, freeing any associated memory buffers.
- *
- * \param[in] ctx   Pointer to this instance's context
- *
- * \retval #AOM_CODEC_OK
- *     The codec algorithm initialized.
- * \retval #AOM_CODEC_MEM_ERROR
- *     Memory allocation failed.
- */
-aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx);
-
-/*!\brief Get the capabilities of an algorithm.
- *
- * Retrieves the capabilities bitfield from the algorithm's interface.
- *
- * \param[in] iface   Pointer to the algorithm interface
- *
- */
-aom_codec_caps_t aom_codec_get_caps(aom_codec_iface_t *iface);
-
-/*!\brief Control algorithm
- *
- * This function is used to exchange algorithm specific data with the codec
- * instance. This can be used to implement features specific to a particular
- * algorithm.
- *
- * This wrapper function dispatches the request to the helper function
- * associated with the given ctrl_id. It tries to call this function
- * transparently, but will return #AOM_CODEC_ERROR if the request could not
- * be dispatched.
- *
- * Note that this function should not be used directly. Call the
- * #aom_codec_control wrapper macro instead.
- *
- * \param[in]     ctx              Pointer to this instance's context
- * \param[in]     ctrl_id          Algorithm specific control identifier
- *
- * \retval #AOM_CODEC_OK
- *     The control request was processed.
- * \retval #AOM_CODEC_ERROR
- *     The control request was not processed.
- * \retval #AOM_CODEC_INVALID_PARAM
- *     The data was not valid.
- */
-aom_codec_err_t aom_codec_control_(aom_codec_ctx_t *ctx, int ctrl_id, ...);
-#if defined(AOM_DISABLE_CTRL_TYPECHECKS) && AOM_DISABLE_CTRL_TYPECHECKS
-#define aom_codec_control(ctx, id, data) aom_codec_control_(ctx, id, data)
-#define AOM_CTRL_USE_TYPE(id, typ)
-#define AOM_CTRL_USE_TYPE_DEPRECATED(id, typ)
-#define AOM_CTRL_VOID(id, typ)
-
-#else
-/*!\brief aom_codec_control wrapper macro
- *
- * This macro allows for type safe conversions across the variadic parameter
- * to aom_codec_control_().
- *
- * \internal
- * It works by dispatching the call to the control function through a wrapper
- * function named with the id parameter.
- */
-#define aom_codec_control(ctx, id, data) \
-  aom_codec_control_##id(ctx, id, data) /**<\hideinitializer*/
-
-/*!\brief aom_codec_control type definition macro
- *
- * This macro allows for type safe conversions across the variadic parameter
- * to aom_codec_control_(). It defines the type of the argument for a given
- * control identifier.
- *
- * \internal
- * It defines a static function with
- * the correctly typed arguments as a wrapper to the type-unsafe internal
- * function.
- */
-#define AOM_CTRL_USE_TYPE(id, typ)                                           \
-  static aom_codec_err_t aom_codec_control_##id(aom_codec_ctx_t *, int, typ) \
-      UNUSED;                                                                \
-                                                                             \
-  static aom_codec_err_t aom_codec_control_##id(aom_codec_ctx_t *ctx,        \
-                                                int ctrl_id, typ data) {     \
-    return aom_codec_control_(ctx, ctrl_id, data);                           \
-  } /**<\hideinitializer*/
-
-/*!\brief aom_codec_control deprecated type definition macro
- *
- * Like #AOM_CTRL_USE_TYPE, but indicates that the specified control is
- * deprecated and should not be used. Consult the documentation for your
- * codec for more information.
- *
- * \internal
- * It defines a static function with the correctly typed arguments as a
- * wrapper to the type-unsafe internal function.
- */
-#define AOM_CTRL_USE_TYPE_DEPRECATED(id, typ)                        \
-  DECLSPEC_DEPRECATED static aom_codec_err_t aom_codec_control_##id( \
-      aom_codec_ctx_t *, int, typ) DEPRECATED UNUSED;                \
-                                                                     \
-  DECLSPEC_DEPRECATED static aom_codec_err_t aom_codec_control_##id( \
-      aom_codec_ctx_t *ctx, int ctrl_id, typ data) {                 \
-    return aom_codec_control_(ctx, ctrl_id, data);                   \
-  } /**<\hideinitializer*/
-
-/*!\brief aom_codec_control void type definition macro
- *
- * This macro allows for type safe conversions across the variadic parameter
- * to aom_codec_control_(). It indicates that a given control identifier takes
- * no argument.
- *
- * \internal
- * It defines a static function without a data argument as a wrapper to the
- * type-unsafe internal function.
- */
-#define AOM_CTRL_VOID(id)                                               \
-  static aom_codec_err_t aom_codec_control_##id(aom_codec_ctx_t *, int) \
-      UNUSED;                                                           \
-                                                                        \
-  static aom_codec_err_t aom_codec_control_##id(aom_codec_ctx_t *ctx,   \
-                                                int ctrl_id) {          \
-    return aom_codec_control_(ctx, ctrl_id);                            \
-  } /**<\hideinitializer*/
-
-#endif
-
-/*!@} - end defgroup codec*/
-#ifdef __cplusplus
-}
-#endif
-#endif  // AOM_AOM_CODEC_H_
--- a/aom/aom_codec.mk
+++ b/aom/aom_codec.mk
@@ -1,42 +0,0 @@
-##
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-
-
-API_EXPORTS += exports
-
-API_SRCS-$(CONFIG_AV1_ENCODER) += aom.h
-API_SRCS-$(CONFIG_AV1_ENCODER) += aomcx.h
-API_DOC_SRCS-$(CONFIG_AV1_ENCODER) += aom.h
-API_DOC_SRCS-$(CONFIG_AV1_ENCODER) += aomcx.h
-
-API_SRCS-$(CONFIG_AV1_DECODER) += aom.h
-API_SRCS-$(CONFIG_AV1_DECODER) += aomdx.h
-API_DOC_SRCS-$(CONFIG_AV1_DECODER) += aom.h
-API_DOC_SRCS-$(CONFIG_AV1_DECODER) += aomdx.h
-
-API_DOC_SRCS-yes += aom_codec.h
-API_DOC_SRCS-yes += aom_decoder.h
-API_DOC_SRCS-yes += aom_encoder.h
-API_DOC_SRCS-yes += aom_frame_buffer.h
-API_DOC_SRCS-yes += aom_image.h
-
-API_SRCS-yes += src/aom_decoder.c
-API_SRCS-yes += aom_decoder.h
-API_SRCS-yes += src/aom_encoder.c
-API_SRCS-yes += aom_encoder.h
-API_SRCS-yes += internal/aom_codec_internal.h
-API_SRCS-yes += src/aom_codec.c
-API_SRCS-yes += src/aom_image.c
-API_SRCS-yes += aom_codec.h
-API_SRCS-yes += aom_codec.mk
-API_SRCS-yes += aom_frame_buffer.h
-API_SRCS-yes += aom_image.h
-API_SRCS-yes += aom_integer.h
--- a/aom/aom_decoder.h
+++ b/aom/aom_decoder.h
@@ -1,366 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AOM_DECODER_H_
-#define AOM_AOM_DECODER_H_
-
-/*!\defgroup decoder Decoder Algorithm Interface
- * \ingroup codec
- * This abstraction allows applications using this decoder to easily support
- * multiple video formats with minimal code duplication. This section describes
- * the interface common to all decoders.
- * @{
- */
-
-/*!\file
- * \brief Describes the decoder algorithm interface to applications.
- *
- * This file describes the interface between an application and a
- * video decoder algorithm.
- *
- */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "./aom_codec.h"
-#include "./aom_frame_buffer.h"
-
-/*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped.  Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
-#define AOM_DECODER_ABI_VERSION \
-  (3 + AOM_CODEC_ABI_VERSION) /**<\hideinitializer*/
-
-/*! \brief Decoder capabilities bitfield
- *
- *  Each decoder advertises the capabilities it supports as part of its
- *  ::aom_codec_iface_t interface structure. Capabilities are extra interfaces
- *  or functionality, and are not required to be supported by a decoder.
- *
- *  The available flags are specified by AOM_CODEC_CAP_* defines.
- */
-#define AOM_CODEC_CAP_PUT_SLICE 0x10000 /**< Will issue put_slice callbacks */
-#define AOM_CODEC_CAP_PUT_FRAME 0x20000 /**< Will issue put_frame callbacks */
-#define AOM_CODEC_CAP_POSTPROC 0x40000  /**< Can postprocess decoded frame */
-/*!\brief Can conceal errors due to packet loss */
-#define AOM_CODEC_CAP_ERROR_CONCEALMENT 0x80000
-/*!\brief Can receive encoded frames one fragment at a time */
-#define AOM_CODEC_CAP_INPUT_FRAGMENTS 0x100000
-
-/*! \brief Initialization-time Feature Enabling
- *
- *  Certain codec features must be known at initialization time, to allow for
- *  proper memory allocation.
- *
- *  The available flags are specified by AOM_CODEC_USE_* defines.
- */
-/*!\brief Can support frame-based multi-threading */
-#define AOM_CODEC_CAP_FRAME_THREADING 0x200000
-/*!brief Can support external frame buffers */
-#define AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000
-
-#define AOM_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */
-/*!\brief Conceal errors in decoded frames */
-#define AOM_CODEC_USE_ERROR_CONCEALMENT 0x20000
-/*!\brief The input frame should be passed to the decoder one fragment at a
- * time */
-#define AOM_CODEC_USE_INPUT_FRAGMENTS 0x40000
-/*!\brief Enable frame-based multi-threading */
-#define AOM_CODEC_USE_FRAME_THREADING 0x80000
-
-/*!\brief Stream properties
- *
- * This structure is used to query or set properties of the decoded
- * stream. Algorithms may extend this structure with data specific
- * to their bitstream by setting the sz member appropriately.
- */
-typedef struct aom_codec_stream_info {
-  unsigned int sz;    /**< Size of this structure */
-  unsigned int w;     /**< Width (or 0 for unknown/default) */
-  unsigned int h;     /**< Height (or 0 for unknown/default) */
-  unsigned int is_kf; /**< Current frame is a keyframe */
-} aom_codec_stream_info_t;
-
-/* REQUIRED FUNCTIONS
- *
- * The following functions are required to be implemented for all decoders.
- * They represent the base case functionality expected of all decoders.
- */
-
-/*!\brief Initialization Configurations
- *
- * This structure is used to pass init time configuration options to the
- * decoder.
- */
-typedef struct aom_codec_dec_cfg {
-  unsigned int threads; /**< Maximum number of threads to use, default 1 */
-  unsigned int w;       /**< Width */
-  unsigned int h;       /**< Height */
-} aom_codec_dec_cfg_t;  /**< alias for struct aom_codec_dec_cfg */
-
-/*!\brief Initialize a decoder instance
- *
- * Initializes a decoder context using the given interface. Applications
- * should call the aom_codec_dec_init convenience macro instead of this
- * function directly, to ensure that the ABI version number parameter
- * is properly initialized.
- *
- * If the library was configured with --disable-multithread, this call
- * is not thread safe and should be guarded with a lock if being used
- * in a multithreaded context.
- *
- * \param[in]    ctx     Pointer to this instance's context.
- * \param[in]    iface   Pointer to the algorithm interface to use.
- * \param[in]    cfg     Configuration to use, if known. May be NULL.
- * \param[in]    flags   Bitfield of AOM_CODEC_USE_* flags
- * \param[in]    ver     ABI version number. Must be set to
- *                       AOM_DECODER_ABI_VERSION
- * \retval #AOM_CODEC_OK
- *     The decoder algorithm initialized.
- * \retval #AOM_CODEC_MEM_ERROR
- *     Memory allocation failed.
- */
-aom_codec_err_t aom_codec_dec_init_ver(aom_codec_ctx_t *ctx,
-                                       aom_codec_iface_t *iface,
-                                       const aom_codec_dec_cfg_t *cfg,
-                                       aom_codec_flags_t flags, int ver);
-
-/*!\brief Convenience macro for aom_codec_dec_init_ver()
- *
- * Ensures the ABI version parameter is properly set.
- */
-#define aom_codec_dec_init(ctx, iface, cfg, flags) \
-  aom_codec_dec_init_ver(ctx, iface, cfg, flags, AOM_DECODER_ABI_VERSION)
-
-/*!\brief Parse stream info from a buffer
- *
- * Performs high level parsing of the bitstream. Construction of a decoder
- * context is not necessary. Can be used to determine if the bitstream is
- * of the proper format, and to extract information from the stream.
- *
- * \param[in]      iface   Pointer to the algorithm interface
- * \param[in]      data    Pointer to a block of data to parse
- * \param[in]      data_sz Size of the data buffer
- * \param[in,out]  si      Pointer to stream info to update. The size member
- *                         \ref MUST be properly initialized, but \ref MAY be
- *                         clobbered by the algorithm. This parameter \ref MAY
- *                         be NULL.
- *
- * \retval #AOM_CODEC_OK
- *     Bitstream is parsable and stream information updated
- */
-aom_codec_err_t aom_codec_peek_stream_info(aom_codec_iface_t *iface,
-                                           const uint8_t *data,
-                                           unsigned int data_sz,
-                                           aom_codec_stream_info_t *si);
-
-/*!\brief Return information about the current stream.
- *
- * Returns information about the stream that has been parsed during decoding.
- *
- * \param[in]      ctx     Pointer to this instance's context
- * \param[in,out]  si      Pointer to stream info to update. The size member
- *                         \ref MUST be properly initialized, but \ref MAY be
- *                         clobbered by the algorithm. This parameter \ref MAY
- *                         be NULL.
- *
- * \retval #AOM_CODEC_OK
- *     Bitstream is parsable and stream information updated
- */
-aom_codec_err_t aom_codec_get_stream_info(aom_codec_ctx_t *ctx,
-                                          aom_codec_stream_info_t *si);
-
-/*!\brief Decode data
- *
- * Processes a buffer of coded data. If the processing results in a new
- * decoded frame becoming available, PUT_SLICE and PUT_FRAME events may be
- * generated, as appropriate. Encoded data \ref MUST be passed in DTS (decode
- * time stamp) order. Frames produced will always be in PTS (presentation
- * time stamp) order.
- * If the decoder is configured with AOM_CODEC_USE_INPUT_FRAGMENTS enabled,
- * data and data_sz can contain a fragment of the encoded frame. Fragment
- * \#n must contain at least partition \#n, but can also contain subsequent
- * partitions (\#n+1 - \#n+i), and if so, fragments \#n+1, .., \#n+i must
- * be empty. When no more data is available, this function should be called
- * with NULL as data and 0 as data_sz. The memory passed to this function
- * must be available until the frame has been decoded.
- *
- * \param[in] ctx          Pointer to this instance's context
- * \param[in] data         Pointer to this block of new coded data. If
- *                         NULL, a AOM_CODEC_CB_PUT_FRAME event is posted
- *                         for the previously decoded frame.
- * \param[in] data_sz      Size of the coded data, in bytes.
- * \param[in] user_priv    Application specific data to associate with
- *                         this frame.
- * \param[in] deadline     Soft deadline the decoder should attempt to meet,
- *                         in us. Set to zero for unlimited.
- *
- * \return Returns #AOM_CODEC_OK if the coded data was processed completely
- *         and future pictures can be decoded without error. Otherwise,
- *         see the descriptions of the other error codes in ::aom_codec_err_t
- *         for recoverability capabilities.
- */
-aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data,
-                                 unsigned int data_sz, void *user_priv,
-                                 long deadline);
-
-/*!\brief Decoded frames iterator
- *
- * Iterates over a list of the frames available for display. The iterator
- * storage should be initialized to NULL to start the iteration. Iteration is
- * complete when this function returns NULL.
- *
- * The list of available frames becomes valid upon completion of the
- * aom_codec_decode call, and remains valid until the next call to
- * aom_codec_decode.
- *
- * \param[in]     ctx      Pointer to this instance's context
- * \param[in,out] iter     Iterator storage, initialized to NULL
- *
- * \return Returns a pointer to an image, if one is ready for display. Frames
- *         produced will always be in PTS (presentation time stamp) order.
- */
-aom_image_t *aom_codec_get_frame(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter);
-
-/*!\defgroup cap_put_frame Frame-Based Decoding Functions
- *
- * The following functions are required to be implemented for all decoders
- * that advertise the AOM_CODEC_CAP_PUT_FRAME capability. Calling these
- * functions
- * for codecs that don't advertise this capability will result in an error
- * code being returned, usually AOM_CODEC_ERROR
- * @{
- */
-
-/*!\brief put frame callback prototype
- *
- * This callback is invoked by the decoder to notify the application of
- * the availability of decoded image data.
- */
-typedef void (*aom_codec_put_frame_cb_fn_t)(void *user_priv,
-                                            const aom_image_t *img);
-
-/*!\brief Register for notification of frame completion.
- *
- * Registers a given function to be called when a decoded frame is
- * available.
- *
- * \param[in] ctx          Pointer to this instance's context
- * \param[in] cb           Pointer to the callback function
- * \param[in] user_priv    User's private data
- *
- * \retval #AOM_CODEC_OK
- *     Callback successfully registered.
- * \retval #AOM_CODEC_ERROR
- *     Decoder context not initialized, or algorithm not capable of
- *     posting slice completion.
- */
-aom_codec_err_t aom_codec_register_put_frame_cb(aom_codec_ctx_t *ctx,
-                                                aom_codec_put_frame_cb_fn_t cb,
-                                                void *user_priv);
-
-/*!@} - end defgroup cap_put_frame */
-
-/*!\defgroup cap_put_slice Slice-Based Decoding Functions
- *
- * The following functions are required to be implemented for all decoders
- * that advertise the AOM_CODEC_CAP_PUT_SLICE capability. Calling these
- * functions
- * for codecs that don't advertise this capability will result in an error
- * code being returned, usually AOM_CODEC_ERROR
- * @{
- */
-
-/*!\brief put slice callback prototype
- *
- * This callback is invoked by the decoder to notify the application of
- * the availability of partially decoded image data. The
- */
-typedef void (*aom_codec_put_slice_cb_fn_t)(void *user_priv,
-                                            const aom_image_t *img,
-                                            const aom_image_rect_t *valid,
-                                            const aom_image_rect_t *update);
-
-/*!\brief Register for notification of slice completion.
- *
- * Registers a given function to be called when a decoded slice is
- * available.
- *
- * \param[in] ctx          Pointer to this instance's context
- * \param[in] cb           Pointer to the callback function
- * \param[in] user_priv    User's private data
- *
- * \retval #AOM_CODEC_OK
- *     Callback successfully registered.
- * \retval #AOM_CODEC_ERROR
- *     Decoder context not initialized, or algorithm not capable of
- *     posting slice completion.
- */
-aom_codec_err_t aom_codec_register_put_slice_cb(aom_codec_ctx_t *ctx,
-                                                aom_codec_put_slice_cb_fn_t cb,
-                                                void *user_priv);
-
-/*!@} - end defgroup cap_put_slice*/
-
-/*!\defgroup cap_external_frame_buffer External Frame Buffer Functions
- *
- * The following section is required to be implemented for all decoders
- * that advertise the AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER capability.
- * Calling this function for codecs that don't advertise this capability
- * will result in an error code being returned, usually AOM_CODEC_ERROR.
- *
- * \note
- * Currently this only works with AV1.
- * @{
- */
-
-/*!\brief Pass in external frame buffers for the decoder to use.
- *
- * Registers functions to be called when libaom needs a frame buffer
- * to decode the current frame and a function to be called when libaom does
- * not internally reference the frame buffer. This set function must
- * be called before the first call to decode or libaom will assume the
- * default behavior of allocating frame buffers internally.
- *
- * \param[in] ctx          Pointer to this instance's context
- * \param[in] cb_get       Pointer to the get callback function
- * \param[in] cb_release   Pointer to the release callback function
- * \param[in] cb_priv      Callback's private data
- *
- * \retval #AOM_CODEC_OK
- *     External frame buffers will be used by libaom.
- * \retval #AOM_CODEC_INVALID_PARAM
- *     One or more of the callbacks were NULL.
- * \retval #AOM_CODEC_ERROR
- *     Decoder context not initialized, or algorithm not capable of
- *     using external frame buffers.
- *
- * \note
- * When decoding AV1, the application may be required to pass in at least
- * #AOM_MAXIMUM_WORK_BUFFERS external frame
- * buffers.
- */
-aom_codec_err_t aom_codec_set_frame_buffer_functions(
-    aom_codec_ctx_t *ctx, aom_get_frame_buffer_cb_fn_t cb_get,
-    aom_release_frame_buffer_cb_fn_t cb_release, void *cb_priv);
-
-/*!@} - end defgroup cap_external_frame_buffer */
-
-/*!@} - end defgroup decoder*/
-#ifdef __cplusplus
-}
-#endif
-#endif  // AOM_AOM_DECODER_H_
--- a/aom/aom_encoder.h
+++ b/aom/aom_encoder.h
@@ -1,837 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AOM_ENCODER_H_
-#define AOM_AOM_ENCODER_H_
-
-/*!\defgroup encoder Encoder Algorithm Interface
- * \ingroup codec
- * This abstraction allows applications using this encoder to easily support
- * multiple video formats with minimal code duplication. This section describes
- * the interface common to all encoders.
- * @{
- */
-
-/*!\file
- * \brief Describes the encoder algorithm interface to applications.
- *
- * This file describes the interface between an application and a
- * video encoder algorithm.
- *
- */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "./aom_codec.h"
-
-/*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped.  Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
-#define AOM_ENCODER_ABI_VERSION \
-  (5 + AOM_CODEC_ABI_VERSION) /**<\hideinitializer*/
-
-/*! \brief Encoder capabilities bitfield
- *
- *  Each encoder advertises the capabilities it supports as part of its
- *  ::aom_codec_iface_t interface structure. Capabilities are extra
- *  interfaces or functionality, and are not required to be supported
- *  by an encoder.
- *
- *  The available flags are specified by AOM_CODEC_CAP_* defines.
- */
-#define AOM_CODEC_CAP_PSNR 0x10000 /**< Can issue PSNR packets */
-
-/*! Can output one partition at a time. Each partition is returned in its
- *  own AOM_CODEC_CX_FRAME_PKT, with the FRAME_IS_FRAGMENT flag set for
- *  every partition but the last. In this mode all frames are always
- *  returned partition by partition.
- */
-#define AOM_CODEC_CAP_OUTPUT_PARTITION 0x20000
-
-/*! Can support input images at greater than 8 bitdepth.
- */
-#define AOM_CODEC_CAP_HIGHBITDEPTH 0x40000
-
-/*! \brief Initialization-time Feature Enabling
- *
- *  Certain codec features must be known at initialization time, to allow
- *  for proper memory allocation.
- *
- *  The available flags are specified by AOM_CODEC_USE_* defines.
- */
-#define AOM_CODEC_USE_PSNR 0x10000 /**< Calculate PSNR on each frame */
-/*!\brief Make the encoder output one  partition at a time. */
-#define AOM_CODEC_USE_OUTPUT_PARTITION 0x20000
-#define AOM_CODEC_USE_HIGHBITDEPTH 0x40000 /**< Use high bitdepth */
-
-/*!\brief Generic fixed size buffer structure
- *
- * This structure is able to hold a reference to any fixed size buffer.
- */
-typedef struct aom_fixed_buf {
-  void *buf;       /**< Pointer to the data */
-  size_t sz;       /**< Length of the buffer, in chars */
-} aom_fixed_buf_t; /**< alias for struct aom_fixed_buf */
-
-/*!\brief Time Stamp Type
- *
- * An integer, which when multiplied by the stream's time base, provides
- * the absolute time of a sample.
- */
-typedef int64_t aom_codec_pts_t;
-
-/*!\brief Compressed Frame Flags
- *
- * This type represents a bitfield containing information about a compressed
- * frame that may be useful to an application. The most significant 16 bits
- * can be used by an algorithm to provide additional detail, for example to
- * support frame types that are codec specific (MPEG-1 D-frames for example)
- */
-typedef uint32_t aom_codec_frame_flags_t;
-#define AOM_FRAME_IS_KEY 0x1 /**< frame is the start of a GOP */
-/*!\brief frame can be dropped without affecting the stream (no future frame
- * depends on this one) */
-#define AOM_FRAME_IS_DROPPABLE 0x2
-/*!\brief frame should be decoded but will not be shown */
-#define AOM_FRAME_IS_INVISIBLE 0x4
-/*!\brief this is a fragment of the encoded frame */
-#define AOM_FRAME_IS_FRAGMENT 0x8
-
-/*!\brief Error Resilient flags
- *
- * These flags define which error resilient features to enable in the
- * encoder. The flags are specified through the
- * aom_codec_enc_cfg::g_error_resilient variable.
- */
-typedef uint32_t aom_codec_er_flags_t;
-/*!\brief Improve resiliency against losses of whole frames */
-#define AOM_ERROR_RESILIENT_DEFAULT 0x1
-/*!\brief The frame partitions are independently decodable by the bool decoder,
- * meaning that partitions can be decoded even though earlier partitions have
- * been lost. Note that intra prediction is still done over the partition
- * boundary. */
-#define AOM_ERROR_RESILIENT_PARTITIONS 0x2
-
-/*!\brief Encoder output packet variants
- *
- * This enumeration lists the different kinds of data packets that can be
- * returned by calls to aom_codec_get_cx_data(). Algorithms \ref MAY
- * extend this list to provide additional functionality.
- */
-enum aom_codec_cx_pkt_kind {
-  AOM_CODEC_CX_FRAME_PKT,    /**< Compressed video frame */
-  AOM_CODEC_STATS_PKT,       /**< Two-pass statistics for this frame */
-  AOM_CODEC_FPMB_STATS_PKT,  /**< first pass mb statistics for this frame */
-  AOM_CODEC_PSNR_PKT,        /**< PSNR statistics for this frame */
-  AOM_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions  */
-};
-
-/*!\brief Encoder output packet
- *
- * This structure contains the different kinds of output data the encoder
- * may produce while compressing a frame.
- */
-typedef struct aom_codec_cx_pkt {
-  enum aom_codec_cx_pkt_kind kind; /**< packet variant */
-  union {
-    struct {
-      void *buf; /**< compressed data buffer */
-      size_t sz; /**< length of compressed data */
-      /*!\brief time stamp to show frame (in timebase units) */
-      aom_codec_pts_t pts;
-      /*!\brief duration to show frame (in timebase units) */
-      unsigned long duration;
-      aom_codec_frame_flags_t flags; /**< flags for this frame */
-      /*!\brief the partition id defines the decoding order of the partitions.
-       * Only applicable when "output partition" mode is enabled. First
-       * partition has id 0.*/
-      int partition_id;
-    } frame;                            /**< data for compressed frame packet */
-    aom_fixed_buf_t twopass_stats;      /**< data for two-pass packet */
-    aom_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */
-    struct aom_psnr_pkt {
-      unsigned int samples[4]; /**< Number of samples, total/y/u/v */
-      uint64_t sse[4];         /**< sum squared error, total/y/u/v */
-      double psnr[4];          /**< PSNR, total/y/u/v */
-    } psnr;                    /**< data for PSNR packet */
-    aom_fixed_buf_t raw;       /**< data for arbitrary packets */
-
-    /* This packet size is fixed to allow codecs to extend this
-     * interface without having to manage storage for raw packets,
-     * i.e., if it's smaller than 128 bytes, you can store in the
-     * packet list directly.
-     */
-    char pad[128 - sizeof(enum aom_codec_cx_pkt_kind)]; /**< fixed sz */
-  } data;                                               /**< packet data */
-} aom_codec_cx_pkt_t; /**< alias for struct aom_codec_cx_pkt */
-
-/*!\brief Rational Number
- *
- * This structure holds a fractional value.
- */
-typedef struct aom_rational {
-  int num;        /**< fraction numerator */
-  int den;        /**< fraction denominator */
-} aom_rational_t; /**< alias for struct aom_rational */
-
-/*!\brief Multi-pass Encoding Pass */
-enum aom_enc_pass {
-  AOM_RC_ONE_PASS,   /**< Single pass mode */
-  AOM_RC_FIRST_PASS, /**< First pass of multi-pass mode */
-  AOM_RC_LAST_PASS   /**< Final pass of multi-pass mode */
-};
-
-/*!\brief Rate control mode */
-enum aom_rc_mode {
-  AOM_VBR, /**< Variable Bit Rate (VBR) mode */
-  AOM_CBR, /**< Constant Bit Rate (CBR) mode */
-  AOM_CQ,  /**< Constrained Quality (CQ)  mode */
-  AOM_Q,   /**< Constant Quality (Q) mode */
-};
-
-/*!\brief Keyframe placement mode.
- *
- * This enumeration determines whether keyframes are placed automatically by
- * the encoder or whether this behavior is disabled. Older releases of this
- * SDK were implemented such that AOM_KF_FIXED meant keyframes were disabled.
- * This name is confusing for this behavior, so the new symbols to be used
- * are AOM_KF_AUTO and AOM_KF_DISABLED.
- */
-enum aom_kf_mode {
-  AOM_KF_FIXED,       /**< deprecated, implies AOM_KF_DISABLED */
-  AOM_KF_AUTO,        /**< Encoder determines optimal placement automatically */
-  AOM_KF_DISABLED = 0 /**< Encoder does not place keyframes. */
-};
-
-/*!\brief Encoded Frame Flags
- *
- * This type indicates a bitfield to be passed to aom_codec_encode(), defining
- * per-frame boolean values. By convention, bits common to all codecs will be
- * named AOM_EFLAG_*, and bits specific to an algorithm will be named
- * /algo/_eflag_*. The lower order 16 bits are reserved for common use.
- */
-typedef long aom_enc_frame_flags_t;
-#define AOM_EFLAG_FORCE_KF (1 << 0) /**< Force this frame to be a keyframe */
-
-/*!\brief Encoder configuration structure
- *
- * This structure contains the encoder settings that have common representations
- * across all codecs. This doesn't imply that all codecs support all features,
- * however.
- */
-typedef struct aom_codec_enc_cfg {
-  /*
-   * generic settings (g)
-   */
-
-  /*!\brief Algorithm specific "usage" value
-   *
-   * Algorithms may define multiple values for usage, which may convey the
-   * intent of how the application intends to use the stream. If this value
-   * is non-zero, consult the documentation for the codec to determine its
-   * meaning.
-   */
-  unsigned int g_usage;
-
-  /*!\brief Maximum number of threads to use
-   *
-   * For multi-threaded implementations, use no more than this number of
-   * threads. The codec may use fewer threads than allowed. The value
-   * 0 is equivalent to the value 1.
-   */
-  unsigned int g_threads;
-
-  /*!\brief Bitstream profile to use
-   *
-   * Some codecs support a notion of multiple bitstream profiles. Typically
-   * this maps to a set of features that are turned on or off. Often the
-   * profile to use is determined by the features of the intended decoder.
-   * Consult the documentation for the codec to determine the valid values
-   * for this parameter, or set to zero for a sane default.
-   */
-  unsigned int g_profile; /**< profile of bitstream to use */
-
-  /*!\brief Width of the frame
-   *
-   * This value identifies the presentation resolution of the frame,
-   * in pixels. Note that the frames passed as input to the encoder must
-   * have this resolution. Frames will be presented by the decoder in this
-   * resolution, independent of any spatial resampling the encoder may do.
-   */
-  unsigned int g_w;
-
-  /*!\brief Height of the frame
-   *
-   * This value identifies the presentation resolution of the frame,
-   * in pixels. Note that the frames passed as input to the encoder must
-   * have this resolution. Frames will be presented by the decoder in this
-   * resolution, independent of any spatial resampling the encoder may do.
-   */
-  unsigned int g_h;
-
-  /*!\brief Bit-depth of the codec
-   *
-   * This value identifies the bit_depth of the codec,
-   * Only certain bit-depths are supported as identified in the
-   * aom_bit_depth_t enum.
-   */
-  aom_bit_depth_t g_bit_depth;
-
-  /*!\brief Bit-depth of the input frames
-   *
-   * This value identifies the bit_depth of the input frames in bits.
-   * Note that the frames passed as input to the encoder must have
-   * this bit-depth.
-   */
-  unsigned int g_input_bit_depth;
-
-  /*!\brief Stream timebase units
-   *
-   * Indicates the smallest interval of time, in seconds, used by the stream.
-   * For fixed frame rate material, or variable frame rate material where
-   * frames are timed at a multiple of a given clock (ex: video capture),
-   * the \ref RECOMMENDED method is to set the timebase to the reciprocal
-   * of the frame rate (ex: 1001/30000 for 29.970 Hz NTSC). This allows the
-   * pts to correspond to the frame number, which can be handy. For
-   * re-encoding video from containers with absolute time timestamps, the
-   * \ref RECOMMENDED method is to set the timebase to that of the parent
-   * container or multimedia framework (ex: 1/1000 for ms, as in FLV).
-   */
-  struct aom_rational g_timebase;
-
-  /*!\brief Enable error resilient modes.
-   *
-   * The error resilient bitfield indicates to the encoder which features
-   * it should enable to take measures for streaming over lossy or noisy
-   * links.
-   */
-  aom_codec_er_flags_t g_error_resilient;
-
-  /*!\brief Multi-pass Encoding Mode
-   *
-   * This value should be set to the current phase for multi-pass encoding.
-   * For single pass, set to #AOM_RC_ONE_PASS.
-   */
-  enum aom_enc_pass g_pass;
-
-  /*!\brief Allow lagged encoding
-   *
-   * If set, this value allows the encoder to consume a number of input
-   * frames before producing output frames. This allows the encoder to
-   * base decisions for the current frame on future frames. This does
-   * increase the latency of the encoding pipeline, so it is not appropriate
-   * in all situations (ex: realtime encoding).
-   *
-   * Note that this is a maximum value -- the encoder may produce frames
-   * sooner than the given limit. Set this value to 0 to disable this
-   * feature.
-   */
-  unsigned int g_lag_in_frames;
-
-  /*
-   * rate control settings (rc)
-   */
-
-  /*!\brief Temporal resampling configuration, if supported by the codec.
-   *
-   * Temporal resampling allows the codec to "drop" frames as a strategy to
-   * meet its target data rate. This can cause temporal discontinuities in
-   * the encoded video, which may appear as stuttering during playback. This
-   * trade-off is often acceptable, but for many applications is not. It can
-   * be disabled in these cases.
-   *
-   * Note that not all codecs support this feature. All aom AVx codecs do.
-   * For other codecs, consult the documentation for that algorithm.
-   *
-   * This threshold is described as a percentage of the target data buffer.
-   * When the data buffer falls below this percentage of fullness, a
-   * dropped frame is indicated. Set the threshold to zero (0) to disable
-   * this feature.
-   */
-  unsigned int rc_dropframe_thresh;
-
-  /*!\brief Enable/disable spatial resampling, if supported by the codec.
-   *
-   * Spatial resampling allows the codec to compress a lower resolution
-   * version of the frame, which is then upscaled by the encoder to the
-   * correct presentation resolution. This increases visual quality at
-   * low data rates, at the expense of CPU time on the encoder/decoder.
-   */
-  unsigned int rc_resize_allowed;
-
-  /*!\brief Internal coded frame width.
-   *
-   * If spatial resampling is enabled this specifies the width of the
-   * encoded frame.
-   */
-  unsigned int rc_scaled_width;
-
-  /*!\brief Internal coded frame height.
-   *
-   * If spatial resampling is enabled this specifies the height of the
-   * encoded frame.
-   */
-  unsigned int rc_scaled_height;
-
-  /*!\brief Spatial resampling up watermark.
-   *
-   * This threshold is described as a percentage of the target data buffer.
-   * When the data buffer rises above this percentage of fullness, the
-   * encoder will step up to a higher resolution version of the frame.
-   */
-  unsigned int rc_resize_up_thresh;
-
-  /*!\brief Spatial resampling down watermark.
-   *
-   * This threshold is described as a percentage of the target data buffer.
-   * When the data buffer falls below this percentage of fullness, the
-   * encoder will step down to a lower resolution version of the frame.
-   */
-  unsigned int rc_resize_down_thresh;
-
-  /*!\brief Rate control algorithm to use.
-   *
-   * Indicates whether the end usage of this stream is to be streamed over
-   * a bandwidth constrained link, indicating that Constant Bit Rate (CBR)
-   * mode should be used, or whether it will be played back on a high
-   * bandwidth link, as from a local disk, where higher variations in
-   * bitrate are acceptable.
-   */
-  enum aom_rc_mode rc_end_usage;
-
-  /*!\brief Two-pass stats buffer.
-   *
-   * A buffer containing all of the stats packets produced in the first
-   * pass, concatenated.
-   */
-  aom_fixed_buf_t rc_twopass_stats_in;
-
-  /*!\brief first pass mb stats buffer.
-   *
-   * A buffer containing all of the first pass mb stats packets produced
-   * in the first pass, concatenated.
-   */
-  aom_fixed_buf_t rc_firstpass_mb_stats_in;
-
-  /*!\brief Target data rate
-   *
-   * Target bandwidth to use for this stream, in kilobits per second.
-   */
-  unsigned int rc_target_bitrate;
-
-  /*
-   * quantizer settings
-   */
-
-  /*!\brief Minimum (Best Quality) Quantizer
-   *
-   * The quantizer is the most direct control over the quality of the
-   * encoded image. The range of valid values for the quantizer is codec
-   * specific. Consult the documentation for the codec to determine the
-   * values to use. To determine the range programmatically, call
-   * aom_codec_enc_config_default() with a usage value of 0.
-   */
-  unsigned int rc_min_quantizer;
-
-  /*!\brief Maximum (Worst Quality) Quantizer
-   *
-   * The quantizer is the most direct control over the quality of the
-   * encoded image. The range of valid values for the quantizer is codec
-   * specific. Consult the documentation for the codec to determine the
-   * values to use. To determine the range programmatically, call
-   * aom_codec_enc_config_default() with a usage value of 0.
-   */
-  unsigned int rc_max_quantizer;
-
-  /*
-   * bitrate tolerance
-   */
-
-  /*!\brief Rate control adaptation undershoot control
-   *
-   * This value, expressed as a percentage of the target bitrate,
-   * controls the maximum allowed adaptation speed of the codec.
-   * This factor controls the maximum amount of bits that can
-   * be subtracted from the target bitrate in order to compensate
-   * for prior overshoot.
-   *
-   * Valid values in the range 0-1000.
-   */
-  unsigned int rc_undershoot_pct;
-
-  /*!\brief Rate control adaptation overshoot control
-   *
-   * This value, expressed as a percentage of the target bitrate,
-   * controls the maximum allowed adaptation speed of the codec.
-   * This factor controls the maximum amount of bits that can
-   * be added to the target bitrate in order to compensate for
-   * prior undershoot.
-   *
-   * Valid values in the range 0-1000.
-   */
-  unsigned int rc_overshoot_pct;
-
-  /*
-   * decoder buffer model parameters
-   */
-
-  /*!\brief Decoder Buffer Size
-   *
-   * This value indicates the amount of data that may be buffered by the
-   * decoding application. Note that this value is expressed in units of
-   * time (milliseconds). For example, a value of 5000 indicates that the
-   * client will buffer (at least) 5000ms worth of encoded data. Use the
-   * target bitrate (#rc_target_bitrate) to convert to bits/bytes, if
-   * necessary.
-   */
-  unsigned int rc_buf_sz;
-
-  /*!\brief Decoder Buffer Initial Size
-   *
-   * This value indicates the amount of data that will be buffered by the
-   * decoding application prior to beginning playback. This value is
-   * expressed in units of time (milliseconds). Use the target bitrate
-   * (#rc_target_bitrate) to convert to bits/bytes, if necessary.
-   */
-  unsigned int rc_buf_initial_sz;
-
-  /*!\brief Decoder Buffer Optimal Size
-   *
-   * This value indicates the amount of data that the encoder should try
-   * to maintain in the decoder's buffer. This value is expressed in units
-   * of time (milliseconds). Use the target bitrate (#rc_target_bitrate)
-   * to convert to bits/bytes, if necessary.
-   */
-  unsigned int rc_buf_optimal_sz;
-
-  /*
-   * 2 pass rate control parameters
-   */
-
-  /*!\brief Two-pass mode CBR/VBR bias
-   *
-   * Bias, expressed on a scale of 0 to 100, for determining target size
-   * for the current frame. The value 0 indicates the optimal CBR mode
-   * value should be used. The value 100 indicates the optimal VBR mode
-   * value should be used. Values in between indicate which way the
-   * encoder should "lean."
-   */
-  unsigned int rc_2pass_vbr_bias_pct;
-
-  /*!\brief Two-pass mode per-GOP minimum bitrate
-   *
-   * This value, expressed as a percentage of the target bitrate, indicates
-   * the minimum bitrate to be used for a single GOP (aka "section")
-   */
-  unsigned int rc_2pass_vbr_minsection_pct;
-
-  /*!\brief Two-pass mode per-GOP maximum bitrate
-   *
-   * This value, expressed as a percentage of the target bitrate, indicates
-   * the maximum bitrate to be used for a single GOP (aka "section")
-   */
-  unsigned int rc_2pass_vbr_maxsection_pct;
-
-  /*
-   * keyframing settings (kf)
-   */
-
-  /*!\brief Keyframe placement mode
-   *
-   * This value indicates whether the encoder should place keyframes at a
-   * fixed interval, or determine the optimal placement automatically
-   * (as governed by the #kf_min_dist and #kf_max_dist parameters)
-   */
-  enum aom_kf_mode kf_mode;
-
-  /*!\brief Keyframe minimum interval
-   *
-   * This value, expressed as a number of frames, prevents the encoder from
-   * placing a keyframe nearer than kf_min_dist to the previous keyframe. At
-   * least kf_min_dist frames non-keyframes will be coded before the next
-   * keyframe. Set kf_min_dist equal to kf_max_dist for a fixed interval.
-   */
-  unsigned int kf_min_dist;
-
-  /*!\brief Keyframe maximum interval
-   *
-   * This value, expressed as a number of frames, forces the encoder to code
-   * a keyframe if one has not been coded in the last kf_max_dist frames.
-   * A value of 0 implies all frames will be keyframes. Set kf_min_dist
-   * equal to kf_max_dist for a fixed interval.
-   */
-  unsigned int kf_max_dist;
-} aom_codec_enc_cfg_t; /**< alias for struct aom_codec_enc_cfg */
-
-/*!\brief Initialize an encoder instance
- *
- * Initializes a encoder context using the given interface. Applications
- * should call the aom_codec_enc_init convenience macro instead of this
- * function directly, to ensure that the ABI version number parameter
- * is properly initialized.
- *
- * If the library was configured with --disable-multithread, this call
- * is not thread safe and should be guarded with a lock if being used
- * in a multithreaded context.
- *
- * \param[in]    ctx     Pointer to this instance's context.
- * \param[in]    iface   Pointer to the algorithm interface to use.
- * \param[in]    cfg     Configuration to use, if known. May be NULL.
- * \param[in]    flags   Bitfield of AOM_CODEC_USE_* flags
- * \param[in]    ver     ABI version number. Must be set to
- *                       AOM_ENCODER_ABI_VERSION
- * \retval #AOM_CODEC_OK
- *     The decoder algorithm initialized.
- * \retval #AOM_CODEC_MEM_ERROR
- *     Memory allocation failed.
- */
-aom_codec_err_t aom_codec_enc_init_ver(aom_codec_ctx_t *ctx,
-                                       aom_codec_iface_t *iface,
-                                       const aom_codec_enc_cfg_t *cfg,
-                                       aom_codec_flags_t flags, int ver);
-
-/*!\brief Convenience macro for aom_codec_enc_init_ver()
- *
- * Ensures the ABI version parameter is properly set.
- */
-#define aom_codec_enc_init(ctx, iface, cfg, flags) \
-  aom_codec_enc_init_ver(ctx, iface, cfg, flags, AOM_ENCODER_ABI_VERSION)
-
-/*!\brief Initialize multi-encoder instance
- *
- * Initializes multi-encoder context using the given interface.
- * Applications should call the aom_codec_enc_init_multi convenience macro
- * instead of this function directly, to ensure that the ABI version number
- * parameter is properly initialized.
- *
- * \param[in]    ctx     Pointer to this instance's context.
- * \param[in]    iface   Pointer to the algorithm interface to use.
- * \param[in]    cfg     Configuration to use, if known. May be NULL.
- * \param[in]    num_enc Total number of encoders.
- * \param[in]    flags   Bitfield of AOM_CODEC_USE_* flags
- * \param[in]    dsf     Pointer to down-sampling factors.
- * \param[in]    ver     ABI version number. Must be set to
- *                       AOM_ENCODER_ABI_VERSION
- * \retval #AOM_CODEC_OK
- *     The decoder algorithm initialized.
- * \retval #AOM_CODEC_MEM_ERROR
- *     Memory allocation failed.
- */
-aom_codec_err_t aom_codec_enc_init_multi_ver(
-    aom_codec_ctx_t *ctx, aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg,
-    int num_enc, aom_codec_flags_t flags, aom_rational_t *dsf, int ver);
-
-/*!\brief Convenience macro for aom_codec_enc_init_multi_ver()
- *
- * Ensures the ABI version parameter is properly set.
- */
-#define aom_codec_enc_init_multi(ctx, iface, cfg, num_enc, flags, dsf) \
-  aom_codec_enc_init_multi_ver(ctx, iface, cfg, num_enc, flags, dsf,   \
-                               AOM_ENCODER_ABI_VERSION)
-
-/*!\brief Get a default configuration
- *
- * Initializes a encoder configuration structure with default values. Supports
- * the notion of "usages" so that an algorithm may offer different default
- * settings depending on the user's intended goal. This function \ref SHOULD
- * be called by all applications to initialize the configuration structure
- * before specializing the configuration with application specific values.
- *
- * \param[in]    iface     Pointer to the algorithm interface to use.
- * \param[out]   cfg       Configuration buffer to populate.
- * \param[in]    reserved  Must set to 0 for VP8 and AV1.
- *
- * \retval #AOM_CODEC_OK
- *     The configuration was populated.
- * \retval #AOM_CODEC_INCAPABLE
- *     Interface is not an encoder interface.
- * \retval #AOM_CODEC_INVALID_PARAM
- *     A parameter was NULL, or the usage value was not recognized.
- */
-aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface,
-                                             aom_codec_enc_cfg_t *cfg,
-                                             unsigned int reserved);
-
-/*!\brief Set or change configuration
- *
- * Reconfigures an encoder instance according to the given configuration.
- *
- * \param[in]    ctx     Pointer to this instance's context
- * \param[in]    cfg     Configuration buffer to use
- *
- * \retval #AOM_CODEC_OK
- *     The configuration was populated.
- * \retval #AOM_CODEC_INCAPABLE
- *     Interface is not an encoder interface.
- * \retval #AOM_CODEC_INVALID_PARAM
- *     A parameter was NULL, or the usage value was not recognized.
- */
-aom_codec_err_t aom_codec_enc_config_set(aom_codec_ctx_t *ctx,
-                                         const aom_codec_enc_cfg_t *cfg);
-
-/*!\brief Get global stream headers
- *
- * Retrieves a stream level global header packet, if supported by the codec.
- *
- * \param[in]    ctx     Pointer to this instance's context
- *
- * \retval NULL
- *     Encoder does not support global header
- * \retval Non-NULL
- *     Pointer to buffer containing global header packet
- */
-aom_fixed_buf_t *aom_codec_get_global_headers(aom_codec_ctx_t *ctx);
-
-/*!\brief deadline parameter analogous to AVx REALTIME mode. */
-#define AOM_DL_REALTIME (1)
-/*!\brief deadline parameter analogous to  AVx GOOD QUALITY mode. */
-#define AOM_DL_GOOD_QUALITY (1000000)
-/*!\brief deadline parameter analogous to AVx BEST QUALITY mode. */
-#define AOM_DL_BEST_QUALITY (0)
-/*!\brief Encode a frame
- *
- * Encodes a video frame at the given "presentation time." The presentation
- * time stamp (PTS) \ref MUST be strictly increasing.
- *
- * The encoder supports the notion of a soft real-time deadline. Given a
- * non-zero value to the deadline parameter, the encoder will make a "best
- * effort" guarantee to  return before the given time slice expires. It is
- * implicit that limiting the available time to encode will degrade the
- * output quality. The encoder can be given an unlimited time to produce the
- * best possible frame by specifying a deadline of '0'. This deadline
- * supercedes the AVx notion of "best quality, good quality, realtime".
- * Applications that wish to map these former settings to the new deadline
- * based system can use the symbols #AOM_DL_REALTIME, #AOM_DL_GOOD_QUALITY,
- * and #AOM_DL_BEST_QUALITY.
- *
- * When the last frame has been passed to the encoder, this function should
- * continue to be called, with the img parameter set to NULL. This will
- * signal the end-of-stream condition to the encoder and allow it to encode
- * any held buffers. Encoding is complete when aom_codec_encode() is called
- * and aom_codec_get_cx_data() returns no data.
- *
- * \param[in]    ctx       Pointer to this instance's context
- * \param[in]    img       Image data to encode, NULL to flush.
- * \param[in]    pts       Presentation time stamp, in timebase units.
- * \param[in]    duration  Duration to show frame, in timebase units.
- * \param[in]    flags     Flags to use for encoding this frame.
- * \param[in]    deadline  Time to spend encoding, in microseconds. (0=infinite)
- *
- * \retval #AOM_CODEC_OK
- *     The configuration was populated.
- * \retval #AOM_CODEC_INCAPABLE
- *     Interface is not an encoder interface.
- * \retval #AOM_CODEC_INVALID_PARAM
- *     A parameter was NULL, the image format is unsupported, etc.
- */
-aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img,
-                                 aom_codec_pts_t pts, unsigned long duration,
-                                 aom_enc_frame_flags_t flags,
-                                 unsigned long deadline);
-
-/*!\brief Set compressed data output buffer
- *
- * Sets the buffer that the codec should output the compressed data
- * into. This call effectively sets the buffer pointer returned in the
- * next AOM_CODEC_CX_FRAME_PKT packet. Subsequent packets will be
- * appended into this buffer. The buffer is preserved across frames,
- * so applications must periodically call this function after flushing
- * the accumulated compressed data to disk or to the network to reset
- * the pointer to the buffer's head.
- *
- * `pad_before` bytes will be skipped before writing the compressed
- * data, and `pad_after` bytes will be appended to the packet. The size
- * of the packet will be the sum of the size of the actual compressed
- * data, pad_before, and pad_after. The padding bytes will be preserved
- * (not overwritten).
- *
- * Note that calling this function does not guarantee that the returned
- * compressed data will be placed into the specified buffer. In the
- * event that the encoded data will not fit into the buffer provided,
- * the returned packet \ref MAY point to an internal buffer, as it would
- * if this call were never used. In this event, the output packet will
- * NOT have any padding, and the application must free space and copy it
- * to the proper place. This is of particular note in configurations
- * that may output multiple packets for a single encoded frame (e.g., lagged
- * encoding) or if the application does not reset the buffer periodically.
- *
- * Applications may restore the default behavior of the codec providing
- * the compressed data buffer by calling this function with a NULL
- * buffer.
- *
- * Applications \ref MUSTNOT call this function during iteration of
- * aom_codec_get_cx_data().
- *
- * \param[in]    ctx         Pointer to this instance's context
- * \param[in]    buf         Buffer to store compressed data into
- * \param[in]    pad_before  Bytes to skip before writing compressed data
- * \param[in]    pad_after   Bytes to skip after writing compressed data
- *
- * \retval #AOM_CODEC_OK
- *     The buffer was set successfully.
- * \retval #AOM_CODEC_INVALID_PARAM
- *     A parameter was NULL, the image format is unsupported, etc.
- */
-aom_codec_err_t aom_codec_set_cx_data_buf(aom_codec_ctx_t *ctx,
-                                          const aom_fixed_buf_t *buf,
-                                          unsigned int pad_before,
-                                          unsigned int pad_after);
-
-/*!\brief Encoded data iterator
- *
- * Iterates over a list of data packets to be passed from the encoder to the
- * application. The different kinds of packets available are enumerated in
- * #aom_codec_cx_pkt_kind.
- *
- * #AOM_CODEC_CX_FRAME_PKT packets should be passed to the application's
- * muxer. Multiple compressed frames may be in the list.
- * #AOM_CODEC_STATS_PKT packets should be appended to a global buffer.
- *
- * The application \ref MUST silently ignore any packet kinds that it does
- * not recognize or support.
- *
- * The data buffers returned from this function are only guaranteed to be
- * valid until the application makes another call to any aom_codec_* function.
- *
- * \param[in]     ctx      Pointer to this instance's context
- * \param[in,out] iter     Iterator storage, initialized to NULL
- *
- * \return Returns a pointer to an output data packet (compressed frame data,
- *         two-pass statistics, etc.) or NULL to signal end-of-list.
- *
- */
-const aom_codec_cx_pkt_t *aom_codec_get_cx_data(aom_codec_ctx_t *ctx,
-                                                aom_codec_iter_t *iter);
-
-/*!\brief Get Preview Frame
- *
- * Returns an image that can be used as a preview. Shows the image as it would
- * exist at the decompressor. The application \ref MUST NOT write into this
- * image buffer.
- *
- * \param[in]     ctx      Pointer to this instance's context
- *
- * \return Returns a pointer to a preview image, or NULL if no image is
- *         available.
- *
- */
-const aom_image_t *aom_codec_get_preview_frame(aom_codec_ctx_t *ctx);
-
-/*!@} - end defgroup encoder*/
-#ifdef __cplusplus
-}
-#endif
-#endif  // AOM_AOM_ENCODER_H_
--- a/aom/aom_image.h
+++ b/aom/aom_image.h
@@ -1,225 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief Describes the aom image descriptor and associated operations
- *
- */
-#ifndef AOM_AOM_IMAGE_H_
-#define AOM_AOM_IMAGE_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped.  Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
-#define AOM_IMAGE_ABI_VERSION (4) /**<\hideinitializer*/
-
-#define AOM_IMG_FMT_PLANAR 0x100       /**< Image is a planar format. */
-#define AOM_IMG_FMT_UV_FLIP 0x200      /**< V plane precedes U in memory. */
-#define AOM_IMG_FMT_HAS_ALPHA 0x400    /**< Image has an alpha channel. */
-#define AOM_IMG_FMT_HIGHBITDEPTH 0x800 /**< Image uses 16bit framebuffer. */
-
-/*!\brief List of supported image formats */
-typedef enum aom_img_fmt {
-  AOM_IMG_FMT_NONE,
-  AOM_IMG_FMT_RGB24,     /**< 24 bit per pixel packed RGB */
-  AOM_IMG_FMT_RGB32,     /**< 32 bit per pixel packed 0RGB */
-  AOM_IMG_FMT_RGB565,    /**< 16 bit per pixel, 565 */
-  AOM_IMG_FMT_RGB555,    /**< 16 bit per pixel, 555 */
-  AOM_IMG_FMT_UYVY,      /**< UYVY packed YUV */
-  AOM_IMG_FMT_YUY2,      /**< YUYV packed YUV */
-  AOM_IMG_FMT_YVYU,      /**< YVYU packed YUV */
-  AOM_IMG_FMT_BGR24,     /**< 24 bit per pixel packed BGR */
-  AOM_IMG_FMT_RGB32_LE,  /**< 32 bit packed BGR0 */
-  AOM_IMG_FMT_ARGB,      /**< 32 bit packed ARGB, alpha=255 */
-  AOM_IMG_FMT_ARGB_LE,   /**< 32 bit packed BGRA, alpha=255 */
-  AOM_IMG_FMT_RGB565_LE, /**< 16 bit per pixel, gggbbbbb rrrrrggg */
-  AOM_IMG_FMT_RGB555_LE, /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */
-  AOM_IMG_FMT_YV12 =
-      AOM_IMG_FMT_PLANAR | AOM_IMG_FMT_UV_FLIP | 1, /**< planar YVU */
-  AOM_IMG_FMT_I420 = AOM_IMG_FMT_PLANAR | 2,
-  AOM_IMG_FMT_AOMYV12 = AOM_IMG_FMT_PLANAR | AOM_IMG_FMT_UV_FLIP |
-                        3, /** < planar 4:2:0 format with aom color space */
-  AOM_IMG_FMT_AOMI420 = AOM_IMG_FMT_PLANAR | 4,
-  AOM_IMG_FMT_I422 = AOM_IMG_FMT_PLANAR | 5,
-  AOM_IMG_FMT_I444 = AOM_IMG_FMT_PLANAR | 6,
-  AOM_IMG_FMT_I440 = AOM_IMG_FMT_PLANAR | 7,
-  AOM_IMG_FMT_444A = AOM_IMG_FMT_PLANAR | AOM_IMG_FMT_HAS_ALPHA | 6,
-  AOM_IMG_FMT_I42016 = AOM_IMG_FMT_I420 | AOM_IMG_FMT_HIGHBITDEPTH,
-  AOM_IMG_FMT_I42216 = AOM_IMG_FMT_I422 | AOM_IMG_FMT_HIGHBITDEPTH,
-  AOM_IMG_FMT_I44416 = AOM_IMG_FMT_I444 | AOM_IMG_FMT_HIGHBITDEPTH,
-  AOM_IMG_FMT_I44016 = AOM_IMG_FMT_I440 | AOM_IMG_FMT_HIGHBITDEPTH
-} aom_img_fmt_t; /**< alias for enum aom_img_fmt */
-
-/*!\brief List of supported color spaces */
-typedef enum aom_color_space {
-  AOM_CS_UNKNOWN = 0,   /**< Unknown */
-  AOM_CS_BT_601 = 1,    /**< BT.601 */
-  AOM_CS_BT_709 = 2,    /**< BT.709 */
-  AOM_CS_SMPTE_170 = 3, /**< SMPTE.170 */
-  AOM_CS_SMPTE_240 = 4, /**< SMPTE.240 */
-  AOM_CS_BT_2020 = 5,   /**< BT.2020 */
-  AOM_CS_RESERVED = 6,  /**< Reserved */
-  AOM_CS_SRGB = 7       /**< sRGB */
-} aom_color_space_t;    /**< alias for enum aom_color_space */
-
-/*!\brief List of supported color range */
-typedef enum aom_color_range {
-  AOM_CR_STUDIO_RANGE = 0, /**< Y [16..235], UV [16..240] */
-  AOM_CR_FULL_RANGE = 1    /**< YUV/RGB [0..255] */
-} aom_color_range_t;       /**< alias for enum aom_color_range */
-
-/**\brief Image Descriptor */
-typedef struct aom_image {
-  aom_img_fmt_t fmt;       /**< Image Format */
-  aom_color_space_t cs;    /**< Color Space */
-  aom_color_range_t range; /**< Color Range */
-
-  /* Image storage dimensions */
-  unsigned int w;         /**< Stored image width */
-  unsigned int h;         /**< Stored image height */
-  unsigned int bit_depth; /**< Stored image bit-depth */
-
-  /* Image display dimensions */
-  unsigned int d_w; /**< Displayed image width */
-  unsigned int d_h; /**< Displayed image height */
-
-  /* Image intended rendering dimensions */
-  unsigned int r_w; /**< Intended rendering image width */
-  unsigned int r_h; /**< Intended rendering image height */
-
-  /* Chroma subsampling info */
-  unsigned int x_chroma_shift; /**< subsampling order, X */
-  unsigned int y_chroma_shift; /**< subsampling order, Y */
-
-/* Image data pointers. */
-#define AOM_PLANE_PACKED 0  /**< To be used for all packed formats */
-#define AOM_PLANE_Y 0       /**< Y (Luminance) plane */
-#define AOM_PLANE_U 1       /**< U (Chroma) plane */
-#define AOM_PLANE_V 2       /**< V (Chroma) plane */
-#define AOM_PLANE_ALPHA 3   /**< A (Transparency) plane */
-  unsigned char *planes[4]; /**< pointer to the top left pixel for each plane */
-  int stride[4];            /**< stride between rows for each plane */
-
-  int bps; /**< bits per sample (for packed formats) */
-
-  /*!\brief The following member may be set by the application to associate
-   * data with this image.
-   */
-  void *user_priv;
-
-  /* The following members should be treated as private. */
-  unsigned char *img_data; /**< private */
-  int img_data_owner;      /**< private */
-  int self_allocd;         /**< private */
-
-  void *fb_priv; /**< Frame buffer data associated with the image. */
-} aom_image_t;   /**< alias for struct aom_image */
-
-/**\brief Representation of a rectangle on a surface */
-typedef struct aom_image_rect {
-  unsigned int x;   /**< leftmost column */
-  unsigned int y;   /**< topmost row */
-  unsigned int w;   /**< width */
-  unsigned int h;   /**< height */
-} aom_image_rect_t; /**< alias for struct aom_image_rect */
-
-/*!\brief Open a descriptor, allocating storage for the underlying image
- *
- * Returns a descriptor for storing an image of the given format. The
- * storage for the descriptor is allocated on the heap.
- *
- * \param[in]    img       Pointer to storage for descriptor. If this parameter
- *                         is NULL, the storage for the descriptor will be
- *                         allocated on the heap.
- * \param[in]    fmt       Format for the image
- * \param[in]    d_w       Width of the image
- * \param[in]    d_h       Height of the image
- * \param[in]    align     Alignment, in bytes, of the image buffer and
- *                         each row in the image(stride).
- *
- * \return Returns a pointer to the initialized image descriptor. If the img
- *         parameter is non-null, the value of the img parameter will be
- *         returned.
- */
-aom_image_t *aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt,
-                           unsigned int d_w, unsigned int d_h,
-                           unsigned int align);
-
-/*!\brief Open a descriptor, using existing storage for the underlying image
- *
- * Returns a descriptor for storing an image of the given format. The
- * storage for descriptor has been allocated elsewhere, and a descriptor is
- * desired to "wrap" that storage.
- *
- * \param[in]    img       Pointer to storage for descriptor. If this parameter
- *                         is NULL, the storage for the descriptor will be
- *                         allocated on the heap.
- * \param[in]    fmt       Format for the image
- * \param[in]    d_w       Width of the image
- * \param[in]    d_h       Height of the image
- * \param[in]    align     Alignment, in bytes, of each row in the image.
- * \param[in]    img_data  Storage to use for the image
- *
- * \return Returns a pointer to the initialized image descriptor. If the img
- *         parameter is non-null, the value of the img parameter will be
- *         returned.
- */
-aom_image_t *aom_img_wrap(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w,
-                          unsigned int d_h, unsigned int align,
-                          unsigned char *img_data);
-
-/*!\brief Set the rectangle identifying the displayed portion of the image
- *
- * Updates the displayed rectangle (aka viewport) on the image surface to
- * match the specified coordinates and size.
- *
- * \param[in]    img       Image descriptor
- * \param[in]    x         leftmost column
- * \param[in]    y         topmost row
- * \param[in]    w         width
- * \param[in]    h         height
- *
- * \return 0 if the requested rectangle is valid, nonzero otherwise.
- */
-int aom_img_set_rect(aom_image_t *img, unsigned int x, unsigned int y,
-                     unsigned int w, unsigned int h);
-
-/*!\brief Flip the image vertically (top for bottom)
- *
- * Adjusts the image descriptor's pointers and strides to make the image
- * be referenced upside-down.
- *
- * \param[in]    img       Image descriptor
- */
-void aom_img_flip(aom_image_t *img);
-
-/*!\brief Close an image descriptor
- *
- * Frees all allocated storage associated with an image descriptor.
- *
- * \param[in]    img       Image descriptor
- */
-void aom_img_free(aom_image_t *img);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AOM_IMAGE_H_
--- a/aom/aom_integer.h
+++ b/aom/aom_integer.h
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_INTEGER_H_
-#define AOM_AOM_INTEGER_H_
-
-/* get ptrdiff_t, size_t, wchar_t, NULL */
-#include <stddef.h>
-
-#if defined(_MSC_VER)
-#define AOM_FORCE_INLINE __forceinline
-#define AOM_INLINE __inline
-#else
-#define AOM_FORCE_INLINE __inline__ __attribute__((always_inline))
-// TODO(jbb): Allow a way to force inline off for older compilers.
-#define AOM_INLINE inline
-#endif
-
-#if defined(AOM_EMULATE_INTTYPES)
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef signed int int32_t;
-
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-
-#ifndef _UINTPTR_T_DEFINED
-typedef size_t uintptr_t;
-#endif
-
-#else
-
-/* Most platforms have the C99 standard integer types. */
-
-#if defined(__cplusplus)
-#if !defined(__STDC_FORMAT_MACROS)
-#define __STDC_FORMAT_MACROS
-#endif
-#if !defined(__STDC_LIMIT_MACROS)
-#define __STDC_LIMIT_MACROS
-#endif
-#endif  // __cplusplus
-
-#include <stdint.h>
-
-#endif
-
-/* VS2010 defines stdint.h, but not inttypes.h */
-#if defined(_MSC_VER) && _MSC_VER < 1800
-#define PRId64 "I64d"
-#else
-#include <inttypes.h>
-#endif
-
-#endif  // AOM_AOM_INTEGER_H_
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -1,759 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AOMCX_H_
-#define AOM_AOMCX_H_
-
-/*!\defgroup aom_encoder AOMedia AOM/AV1 Encoder
- * \ingroup aom
- *
- * @{
- */
-#include "./aom.h"
-#include "./aom_encoder.h"
-
-/*!\file
- * \brief Provides definitions for using AOM or AV1 encoder algorithm within the
- *        aom Codec Interface.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!\name Algorithm interface for AV1
- *
- * This interface provides the capability to encode raw AV1 streams.
- * @{
- */
-extern aom_codec_iface_t aom_codec_av1_cx_algo;
-extern aom_codec_iface_t *aom_codec_av1_cx(void);
-/*!@} - end algorithm interface member group*/
-
-/*
- * Algorithm Flags
- */
-
-/*!\brief Don't reference the last frame
- *
- * When this flag is set, the encoder will not use the last frame as a
- * predictor. When not set, the encoder will choose whether to use the
- * last frame or not automatically.
- */
-#define AOM_EFLAG_NO_REF_LAST (1 << 16)
-
-/*!\brief Don't reference the golden frame
- *
- * When this flag is set, the encoder will not use the golden frame as a
- * predictor. When not set, the encoder will choose whether to use the
- * golden frame or not automatically.
- */
-#define AOM_EFLAG_NO_REF_GF (1 << 17)
-
-/*!\brief Don't reference the alternate reference frame
- *
- * When this flag is set, the encoder will not use the alt ref frame as a
- * predictor. When not set, the encoder will choose whether to use the
- * alt ref frame or not automatically.
- */
-#define AOM_EFLAG_NO_REF_ARF (1 << 21)
-
-/*!\brief Don't update the last frame
- *
- * When this flag is set, the encoder will not update the last frame with
- * the contents of the current frame.
- */
-#define AOM_EFLAG_NO_UPD_LAST (1 << 18)
-
-/*!\brief Don't update the golden frame
- *
- * When this flag is set, the encoder will not update the golden frame with
- * the contents of the current frame.
- */
-#define AOM_EFLAG_NO_UPD_GF (1 << 22)
-
-/*!\brief Don't update the alternate reference frame
- *
- * When this flag is set, the encoder will not update the alt ref frame with
- * the contents of the current frame.
- */
-#define AOM_EFLAG_NO_UPD_ARF (1 << 23)
-
-/*!\brief Force golden frame update
- *
- * When this flag is set, the encoder copy the contents of the current frame
- * to the golden frame buffer.
- */
-#define AOM_EFLAG_FORCE_GF (1 << 19)
-
-/*!\brief Force alternate reference frame update
- *
- * When this flag is set, the encoder copy the contents of the current frame
- * to the alternate reference frame buffer.
- */
-#define AOM_EFLAG_FORCE_ARF (1 << 24)
-
-/*!\brief Disable entropy update
- *
- * When this flag is set, the encoder will not update its internal entropy
- * model based on the entropy of this frame.
- */
-#define AOM_EFLAG_NO_UPD_ENTROPY (1 << 20)
-
-/*!\brief AVx encoder control functions
- *
- * This set of macros define the control functions available for AVx
- * encoder interface.
- *
- * \sa #aom_codec_control
- */
-enum aome_enc_control_id {
-  /*!\brief Codec control function to set which reference frame encoder can use.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_USE_REFERENCE = 7,
-
-  /*!\brief Codec control function to pass an ROI map to encoder.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_SET_ROI_MAP = 8,
-
-  /*!\brief Codec control function to pass an Active map to encoder.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_SET_ACTIVEMAP,
-
-  /*!\brief Codec control function to set encoder scaling mode.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_SET_SCALEMODE = 11,
-
-  /*!\brief Codec control function to set encoder internal speed settings.
-   *
-   * Changes in this value influences, among others, the encoder's selection
-   * of motion estimation methods. Values greater than 0 will increase encoder
-   * speed at the expense of quality.
-   *
-   * \note Valid range for VP8: -16..16
-   * \note Valid range for AV1: -8..8
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_SET_CPUUSED = 13,
-
-  /*!\brief Codec control function to enable automatic set and use alf frames.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_SET_ENABLEAUTOALTREF,
-
-#if CONFIG_EXT_REFS
-  /*!\brief Codec control function to enable automatic set and use
-   * bwd-pred frames.
-   *
-   * Supported in codecs: AV1
-   */
-  AOME_SET_ENABLEAUTOBWDREF,
-#endif  // CONFIG_EXT_REFS
-
-  /*!\brief control function to set noise sensitivity
-   *
-   * 0: off, 1: OnYOnly, 2: OnYUV,
-   * 3: OnYUVAggressive, 4: Adaptive
-   *
-   * Supported in codecs: VP8
-   */
-  AOME_SET_NOISE_SENSITIVITY,
-
-  /*!\brief Codec control function to set sharpness.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_SET_SHARPNESS,
-
-  /*!\brief Codec control function to set the threshold for MBs treated static.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_SET_STATIC_THRESHOLD,
-
-  /*!\brief Codec control function to set the number of token partitions.
-   *
-   * Supported in codecs: VP8
-   */
-  AOME_SET_TOKEN_PARTITIONS,
-
-  /*!\brief Codec control function to get last quantizer chosen by the encoder.
-   *
-   * Return value uses internal quantizer scale defined by the codec.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_GET_LAST_QUANTIZER,
-
-  /*!\brief Codec control function to get last quantizer chosen by the encoder.
-   *
-   * Return value uses the 0..63 scale as used by the rc_*_quantizer config
-   * parameters.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_GET_LAST_QUANTIZER_64,
-
-  /*!\brief Codec control function to set the max no of frames to create arf.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_SET_ARNR_MAXFRAMES,
-
-  /*!\brief Codec control function to set the filter strength for the arf.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_SET_ARNR_STRENGTH,
-
-  /*!\deprecated control function to set the filter type to use for the arf. */
-  AOME_SET_ARNR_TYPE,
-
-  /*!\brief Codec control function to set visual tuning.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_SET_TUNING,
-
-  /*!\brief Codec control function to set constrained quality level.
-   *
-   * \attention For this value to be used aom_codec_enc_cfg_t::g_usage must be
-   *            set to #AOM_CQ.
-   * \note Valid range: 0..63
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_SET_CQ_LEVEL,
-
-  /*!\brief Codec control function to set Max data rate for Intra frames.
-   *
-   * This value controls additional clamping on the maximum size of a
-   * keyframe. It is expressed as a percentage of the average
-   * per-frame bitrate, with the special (and default) value 0 meaning
-   * unlimited, or no additional clamping beyond the codec's built-in
-   * algorithm.
-   *
-   * For example, to allocate no more than 4.5 frames worth of bitrate
-   * to a keyframe, set this to 450.
-   *
-   * Supported in codecs: VP8, AV1
-   */
-  AOME_SET_MAX_INTRA_BITRATE_PCT,
-
-  /*!\brief Codec control function to set reference and update frame flags.
-   *
-   *  Supported in codecs: VP8
-   */
-  AOME_SET_FRAME_FLAGS,
-
-  /*!\brief Codec control function to set max data rate for Inter frames.
-   *
-   * This value controls additional clamping on the maximum size of an
-   * inter frame. It is expressed as a percentage of the average
-   * per-frame bitrate, with the special (and default) value 0 meaning
-   * unlimited, or no additional clamping beyond the codec's built-in
-   * algorithm.
-   *
-   * For example, to allow no more than 4.5 frames worth of bitrate
-   * to an inter frame, set this to 450.
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_MAX_INTER_BITRATE_PCT,
-
-  /*!\brief Boost percentage for Golden Frame in CBR mode.
-   *
-   * This value controls the amount of boost given to Golden Frame in
-   * CBR mode. It is expressed as a percentage of the average
-   * per-frame bitrate, with the special (and default) value 0 meaning
-   * the feature is off, i.e., no golden frame boost in CBR mode and
-   * average bitrate target is used.
-   *
-   * For example, to allow 100% more bits, i.e, 2X, in a golden frame
-   * than average frame, set this to 100.
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_GF_CBR_BOOST_PCT,
-
-  /*!\brief Codec control function to set encoder screen content mode.
-   *
-   * 0: off, 1: On, 2: On with more aggressive rate control.
-   *
-   * Supported in codecs: VP8
-   */
-  AOME_SET_SCREEN_CONTENT_MODE,
-
-  /*!\brief Codec control function to set lossless encoding mode.
-   *
-   * AV1 can operate in lossless encoding mode, in which the bitstream
-   * produced will be able to decode and reconstruct a perfect copy of
-   * input source. This control function provides a mean to switch encoder
-   * into lossless coding mode(1) or normal coding mode(0) that may be lossy.
-   *                          0 = lossy coding mode
-   *                          1 = lossless coding mode
-   *
-   *  By default, encoder operates in normal coding mode (maybe lossy).
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_LOSSLESS,
-#if CONFIG_AOM_QM
-  /*!\brief Codec control function to encode with quantisation matrices.
-   *
-   * AOM can operate with default quantisation matrices dependent on
-   * quantisation level and block type.
-   *                          0 = do not use quantisation matrices
-   *                          1 = use quantisation matrices
-   *
-   *  By default, the encoder operates without quantisation matrices.
-   *
-   * Supported in codecs: AOM
-   */
-
-  AV1E_SET_ENABLE_QM,
-
-  /*!\brief Codec control function to set the min quant matrix flatness.
-   *
-   * AOM can operate with different ranges of quantisation matrices.
-   * As quantisation levels increase, the matrices get flatter. This
-   * control sets the minimum level of flatness from which the matrices
-   * are determined.
-   *
-   *  By default, the encoder sets this minimum at half the available
-   *  range.
-   *
-   * Supported in codecs: AOM
-   */
-  AV1E_SET_QM_MIN,
-
-  /*!\brief Codec control function to set the max quant matrix flatness.
-   *
-   * AOM can operate with different ranges of quantisation matrices.
-   * As quantisation levels increase, the matrices get flatter. This
-   * control sets the maximum level of flatness possible.
-   *
-   * By default, the encoder sets this maximum at the top of the
-   * available range.
-   *
-   * Supported in codecs: AOM
-   */
-  AV1E_SET_QM_MAX,
-#endif
-
-  /*!\brief Codec control function to set number of tile columns.
-   *
-   * In encoding and decoding, AV1 allows an input image frame be partitioned
-   * into separated vertical tile columns, which can be encoded or decoded
-   * independently. This enables easy implementation of parallel encoding and
-   * decoding. This control requests the encoder to use column tiles in
-   * encoding an input frame, with number of tile columns (in Log2 unit) as
-   * the parameter:
-   *             0 = 1 tile column
-   *             1 = 2 tile columns
-   *             2 = 4 tile columns
-   *             .....
-   *             n = 2**n tile columns
-   * The requested tile columns will be capped by encoder based on image size
-   * limitation (The minimum width of a tile column is 256 pixel, the maximum
-   * is 4096).
-   *
-   * By default, the value is 0, i.e. one single column tile for entire image.
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_TILE_COLUMNS,
-
-  /*!\brief Codec control function to set number of tile rows.
-   *
-   * In encoding and decoding, AV1 allows an input image frame be partitioned
-   * into separated horizontal tile rows. Tile rows are encoded or decoded
-   * sequentially. Even though encoding/decoding of later tile rows depends on
-   * earlier ones, this allows the encoder to output data packets for tile rows
-   * prior to completely processing all tile rows in a frame, thereby reducing
-   * the latency in processing between input and output. The parameter
-   * for this control describes the number of tile rows, which has a valid
-   * range [0, 2]:
-   *            0 = 1 tile row
-   *            1 = 2 tile rows
-   *            2 = 4 tile rows
-   *
-   * By default, the value is 0, i.e. one single row tile for entire image.
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_TILE_ROWS,
-
-  /*!\brief Codec control function to enable frame parallel decoding feature.
-   *
-   * AV1 has a bitstream feature to reduce decoding dependency between frames
-   * by turning off backward update of probability context used in encoding
-   * and decoding. This allows staged parallel processing of more than one
-   * video frames in the decoder. This control function provides a mean to
-   * turn this feature on or off for bitstreams produced by encoder.
-   *
-   * By default, this feature is off.
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_FRAME_PARALLEL_DECODING,
-
-  /*!\brief Codec control function to set adaptive quantization mode.
-   *
-   * AV1 has a segment based feature that allows encoder to adaptively change
-   * quantization parameter for each segment within a frame to improve the
-   * subjective quality. This control makes encoder operate in one of the
-   * several AQ_modes supported.
-   *
-   * By default, encoder operates with AQ_Mode 0(adaptive quantization off).
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_AQ_MODE,
-
-  /*!\brief Codec control function to enable/disable periodic Q boost.
-   *
-   * One AV1 encoder speed feature is to enable quality boost by lowering
-   * frame level Q periodically. This control function provides a mean to
-   * turn on/off this feature.
-   *               0 = off
-   *               1 = on
-   *
-   * By default, the encoder is allowed to use this feature for appropriate
-   * encoding modes.
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_FRAME_PERIODIC_BOOST,
-
-  /*!\brief Codec control function to set noise sensitivity.
-   *
-   *  0: off, 1: On(YOnly)
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_NOISE_SENSITIVITY,
-
-  /*!\brief Codec control function to set content type.
-   * \note Valid parameter range:
-   *              AOM_CONTENT_DEFAULT = Regular video content (Default)
-   *              AOM_CONTENT_SCREEN  = Screen capture content
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_TUNE_CONTENT,
-
-  /*!\brief Codec control function to set color space info.
-   * \note Valid ranges: 0..7, default is "UNKNOWN".
-   *                     0 = UNKNOWN,
-   *                     1 = BT_601
-   *                     2 = BT_709
-   *                     3 = SMPTE_170
-   *                     4 = SMPTE_240
-   *                     5 = BT_2020
-   *                     6 = RESERVED
-   *                     7 = SRGB
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_COLOR_SPACE,
-
-  /*!\brief Codec control function to set minimum interval between GF/ARF frames
-   *
-   * By default the value is set as 4.
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_MIN_GF_INTERVAL,
-
-  /*!\brief Codec control function to set minimum interval between GF/ARF frames
-   *
-   * By default the value is set as 16.
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_MAX_GF_INTERVAL,
-
-  /*!\brief Codec control function to get an Active map back from the encoder.
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_GET_ACTIVEMAP,
-
-  /*!\brief Codec control function to set color range bit.
-   * \note Valid ranges: 0..1, default is 0
-   *                     0 = Limited range (16..235 or HBD equivalent)
-   *                     1 = Full range (0..255 or HBD equivalent)
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_COLOR_RANGE,
-
-  /*!\brief Codec control function to set intended rendering image size.
-   *
-   * By default, this is identical to the image size in pixels.
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_RENDER_SIZE,
-
-  /*!\brief Codec control function to set target level.
-   *
-   * 255: off (default); 0: only keep level stats; 10: target for level 1.0;
-   * 11: target for level 1.1; ... 62: target for level 6.2
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_TARGET_LEVEL,
-
-  /*!\brief Codec control function to get bitstream level.
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_GET_LEVEL,
-
-  /*!\brief Codec control function to set intended superblock size.
-   *
-   * By default, the superblock size is determined separately for each
-   * frame by the encoder.
-   *
-   * Supported in codecs: AV1
-   */
-  AV1E_SET_SUPERBLOCK_SIZE,
-};
-
-/*!\brief aom 1-D scaling mode
- *
- * This set of constants define 1-D aom scaling modes
- */
-typedef enum aom_scaling_mode_1d {
-  AOME_NORMAL = 0,
-  AOME_FOURFIVE = 1,
-  AOME_THREEFIVE = 2,
-  AOME_ONETWO = 3
-} AOM_SCALING_MODE;
-
-/*!\brief  aom region of interest map
- *
- * These defines the data structures for the region of interest map
- *
- */
-
-typedef struct aom_roi_map {
-  /*! An id between 0 and 3 for each 16x16 region within a frame. */
-  unsigned char *roi_map;
-  unsigned int rows; /**< Number of rows. */
-  unsigned int cols; /**< Number of columns. */
-  // TODO(paulwilkins): broken for AV1 which has 8 segments
-  // q and loop filter deltas for each segment
-  // (see MAX_MB_SEGMENTS)
-  int delta_q[4];  /**< Quantizer deltas. */
-  int delta_lf[4]; /**< Loop filter deltas. */
-  /*! Static breakout threshold for each segment. */
-  unsigned int static_threshold[4];
-} aom_roi_map_t;
-
-/*!\brief  aom active region map
- *
- * These defines the data structures for active region map
- *
- */
-
-typedef struct aom_active_map {
-  /*!\brief specify an on (1) or off (0) each 16x16 region within a frame */
-  unsigned char *active_map;
-  unsigned int rows; /**< number of rows */
-  unsigned int cols; /**< number of cols */
-} aom_active_map_t;
-
-/*!\brief  aom image scaling mode
- *
- * This defines the data structure for image scaling mode
- *
- */
-typedef struct aom_scaling_mode {
-  AOM_SCALING_MODE h_scaling_mode; /**< horizontal scaling mode */
-  AOM_SCALING_MODE v_scaling_mode; /**< vertical scaling mode   */
-} aom_scaling_mode_t;
-
-/*!\brief VP8 token partition mode
- *
- * This defines VP8 partitioning mode for compressed data, i.e., the number of
- * sub-streams in the bitstream. Used for parallelized decoding.
- *
- */
-
-typedef enum {
-  AOM_ONE_TOKENPARTITION = 0,
-  AOM_TWO_TOKENPARTITION = 1,
-  AOM_FOUR_TOKENPARTITION = 2,
-  AOM_EIGHT_TOKENPARTITION = 3
-} aome_token_partitions;
-
-/*!brief AV1 encoder content type */
-typedef enum {
-  AOM_CONTENT_DEFAULT,
-  AOM_CONTENT_SCREEN,
-  AOM_CONTENT_INVALID
-} aom_tune_content;
-
-/*!\brief VP8 model tuning parameters
- *
- * Changes the encoder to tune for certain types of input material.
- *
- */
-typedef enum { AOM_TUNE_PSNR, AOM_TUNE_SSIM } aom_tune_metric;
-
-/*!\cond */
-/*!\brief VP8 encoder control function parameter type
- *
- * Defines the data types that VP8E control functions take. Note that
- * additional common controls are defined in aom.h
- *
- */
-
-AOM_CTRL_USE_TYPE_DEPRECATED(AOME_USE_REFERENCE, int)
-#define AOM_CTRL_AOME_USE_REFERENCE
-AOM_CTRL_USE_TYPE(AOME_SET_FRAME_FLAGS, int)
-#define AOM_CTRL_AOME_SET_FRAME_FLAGS
-AOM_CTRL_USE_TYPE(AOME_SET_ROI_MAP, aom_roi_map_t *)
-#define AOM_CTRL_AOME_SET_ROI_MAP
-AOM_CTRL_USE_TYPE(AOME_SET_ACTIVEMAP, aom_active_map_t *)
-#define AOM_CTRL_AOME_SET_ACTIVEMAP
-AOM_CTRL_USE_TYPE(AOME_SET_SCALEMODE, aom_scaling_mode_t *)
-#define AOM_CTRL_AOME_SET_SCALEMODE
-
-AOM_CTRL_USE_TYPE(AOME_SET_CPUUSED, int)
-#define AOM_CTRL_AOME_SET_CPUUSED
-AOM_CTRL_USE_TYPE(AOME_SET_ENABLEAUTOALTREF, unsigned int)
-#define AOM_CTRL_AOME_SET_ENABLEAUTOALTREF
-
-#if CONFIG_EXT_REFS
-AOM_CTRL_USE_TYPE(AOME_SET_ENABLEAUTOBWDREF, unsigned int)
-#define AOM_CTRL_AOME_SET_ENABLEAUTOBWDREF
-#endif  // CONFIG_EXT_REFS
-
-AOM_CTRL_USE_TYPE(AOME_SET_NOISE_SENSITIVITY, unsigned int)
-#define AOM_CTRL_AOME_SET_NOISE_SENSITIVITY
-AOM_CTRL_USE_TYPE(AOME_SET_SHARPNESS, unsigned int)
-#define AOM_CTRL_AOME_SET_SHARPNESS
-AOM_CTRL_USE_TYPE(AOME_SET_STATIC_THRESHOLD, unsigned int)
-#define AOM_CTRL_AOME_SET_STATIC_THRESHOLD
-AOM_CTRL_USE_TYPE(AOME_SET_TOKEN_PARTITIONS, int) /* aome_token_partitions */
-#define AOM_CTRL_AOME_SET_TOKEN_PARTITIONS
-
-AOM_CTRL_USE_TYPE(AOME_SET_ARNR_MAXFRAMES, unsigned int)
-#define AOM_CTRL_AOME_SET_ARNR_MAXFRAMES
-AOM_CTRL_USE_TYPE(AOME_SET_ARNR_STRENGTH, unsigned int)
-#define AOM_CTRL_AOME_SET_ARNR_STRENGTH
-AOM_CTRL_USE_TYPE_DEPRECATED(AOME_SET_ARNR_TYPE, unsigned int)
-#define AOM_CTRL_AOME_SET_ARNR_TYPE
-AOM_CTRL_USE_TYPE(AOME_SET_TUNING, int) /* aom_tune_metric */
-#define AOM_CTRL_AOME_SET_TUNING
-AOM_CTRL_USE_TYPE(AOME_SET_CQ_LEVEL, unsigned int)
-#define AOM_CTRL_AOME_SET_CQ_LEVEL
-
-AOM_CTRL_USE_TYPE(AV1E_SET_TILE_COLUMNS, int)
-#define AOM_CTRL_AV1E_SET_TILE_COLUMNS
-AOM_CTRL_USE_TYPE(AV1E_SET_TILE_ROWS, int)
-#define AOM_CTRL_AV1E_SET_TILE_ROWS
-
-AOM_CTRL_USE_TYPE(AOME_GET_LAST_QUANTIZER, int *)
-#define AOM_CTRL_AOME_GET_LAST_QUANTIZER
-AOM_CTRL_USE_TYPE(AOME_GET_LAST_QUANTIZER_64, int *)
-#define AOM_CTRL_AOME_GET_LAST_QUANTIZER_64
-
-AOM_CTRL_USE_TYPE(AOME_SET_MAX_INTRA_BITRATE_PCT, unsigned int)
-#define AOM_CTRL_AOME_SET_MAX_INTRA_BITRATE_PCT
-AOM_CTRL_USE_TYPE(AOME_SET_MAX_INTER_BITRATE_PCT, unsigned int)
-#define AOM_CTRL_AOME_SET_MAX_INTER_BITRATE_PCT
-
-AOM_CTRL_USE_TYPE(AOME_SET_SCREEN_CONTENT_MODE, unsigned int)
-#define AOM_CTRL_AOME_SET_SCREEN_CONTENT_MODE
-
-AOM_CTRL_USE_TYPE(AV1E_SET_GF_CBR_BOOST_PCT, unsigned int)
-#define AOM_CTRL_AV1E_SET_GF_CBR_BOOST_PCT
-
-AOM_CTRL_USE_TYPE(AV1E_SET_LOSSLESS, unsigned int)
-#define AOM_CTRL_AV1E_SET_LOSSLESS
-
-#if CONFIG_AOM_QM
-AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_QM, unsigned int)
-#define AOM_CTRL_AV1E_SET_ENABLE_QM
-
-AOM_CTRL_USE_TYPE(AV1E_SET_QM_MIN, unsigned int)
-#define AOM_CTRL_AV1E_SET_QM_MIN
-
-AOM_CTRL_USE_TYPE(AV1E_SET_QM_MAX, unsigned int)
-#define AOM_CTRL_AV1E_SET_QM_MAX
-#endif
-
-AOM_CTRL_USE_TYPE(AV1E_SET_FRAME_PARALLEL_DECODING, unsigned int)
-#define AOM_CTRL_AV1E_SET_FRAME_PARALLEL_DECODING
-
-AOM_CTRL_USE_TYPE(AV1E_SET_AQ_MODE, unsigned int)
-#define AOM_CTRL_AV1E_SET_AQ_MODE
-
-AOM_CTRL_USE_TYPE(AV1E_SET_FRAME_PERIODIC_BOOST, unsigned int)
-#define AOM_CTRL_AV1E_SET_FRAME_PERIODIC_BOOST
-
-AOM_CTRL_USE_TYPE(AV1E_SET_NOISE_SENSITIVITY, unsigned int)
-#define AOM_CTRL_AV1E_SET_NOISE_SENSITIVITY
-
-AOM_CTRL_USE_TYPE(AV1E_SET_TUNE_CONTENT, int) /* aom_tune_content */
-#define AOM_CTRL_AV1E_SET_TUNE_CONTENT
-
-AOM_CTRL_USE_TYPE(AV1E_SET_COLOR_SPACE, int)
-#define AOM_CTRL_AV1E_SET_COLOR_SPACE
-
-AOM_CTRL_USE_TYPE(AV1E_SET_MIN_GF_INTERVAL, unsigned int)
-#define AOM_CTRL_AV1E_SET_MIN_GF_INTERVAL
-
-AOM_CTRL_USE_TYPE(AV1E_SET_MAX_GF_INTERVAL, unsigned int)
-#define AOM_CTRL_AV1E_SET_MAX_GF_INTERVAL
-
-AOM_CTRL_USE_TYPE(AV1E_GET_ACTIVEMAP, aom_active_map_t *)
-#define AOM_CTRL_AV1E_GET_ACTIVEMAP
-
-AOM_CTRL_USE_TYPE(AV1E_SET_COLOR_RANGE, int)
-#define AOM_CTRL_AV1E_SET_COLOR_RANGE
-
-/*!\brief
- *
- * TODO(rbultje) : add support of the control in ffmpeg
- */
-#define AOM_CTRL_AV1E_SET_RENDER_SIZE
-AOM_CTRL_USE_TYPE(AV1E_SET_RENDER_SIZE, int *)
-
-AOM_CTRL_USE_TYPE(AV1E_SET_SUPERBLOCK_SIZE, unsigned int)
-#define AOM_CTRL_AV1E_SET_SUPERBLOCK_SIZE
-
-AOM_CTRL_USE_TYPE(AV1E_SET_TARGET_LEVEL, unsigned int)
-#define AOM_CTRL_AV1E_SET_TARGET_LEVEL
-
-AOM_CTRL_USE_TYPE(AV1E_GET_LEVEL, int *)
-#define AOM_CTRL_AV1E_GET_LEVEL
-/*!\endcond */
-/*! @} - end defgroup vp8_encoder */
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AOMCX_H_
--- a/aom/aomdx.h
+++ b/aom/aomdx.h
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\defgroup aom_decoder AOMedia AOM/AV1 Decoder
- * \ingroup aom
- *
- * @{
- */
-/*!\file
- * \brief Provides definitions for using AOM or AV1 within the aom Decoder
- *        interface.
- */
-#ifndef AOM_AOMDX_H_
-#define AOM_AOMDX_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Include controls common to both the encoder and decoder */
-#include "./aom.h"
-
-/*!\name Algorithm interface for AV1
- *
- * This interface provides the capability to decode AV1 streams.
- * @{
- */
-extern aom_codec_iface_t aom_codec_av1_dx_algo;
-extern aom_codec_iface_t *aom_codec_av1_dx(void);
-/*!@} - end algorithm interface member group*/
-
-/** Data structure that stores bit accounting for debug
- */
-typedef struct Accounting Accounting;
-
-/*!\enum aom_dec_control_id
- * \brief AOM decoder control functions
- *
- * This set of macros define the control functions available for the AOM
- * decoder interface.
- *
- * \sa #aom_codec_control
- */
-enum aom_dec_control_id {
-  /** control function to get info on which reference frames were updated
-   *  by the last decode
-   */
-  AOMD_GET_LAST_REF_UPDATES = AOM_DECODER_CTRL_ID_START,
-
-  /** check if the indicated frame is corrupted */
-  AOMD_GET_FRAME_CORRUPTED,
-
-  /** control function to get info on which reference frames were used
-   *  by the last decode
-   */
-  AOMD_GET_LAST_REF_USED,
-
-  /** decryption function to decrypt encoded buffer data immediately
-   * before decoding. Takes a aom_decrypt_init, which contains
-   * a callback function and opaque context pointer.
-   */
-  AOMD_SET_DECRYPTOR,
-  // AOMD_SET_DECRYPTOR = AOMD_SET_DECRYPTOR,
-
-  /** control function to get the dimensions that the current frame is decoded
-   * at. This may be different to the intended display size for the frame as
-   * specified in the wrapper or frame header (see AV1D_GET_DISPLAY_SIZE). */
-  AV1D_GET_FRAME_SIZE,
-
-  /** control function to get the current frame's intended display dimensions
-   * (as specified in the wrapper or frame header). This may be different to
-   * the decoded dimensions of this frame (see AV1D_GET_FRAME_SIZE). */
-  AV1D_GET_DISPLAY_SIZE,
-
-  /** control function to get the bit depth of the stream. */
-  AV1D_GET_BIT_DEPTH,
-
-  /** control function to set the byte alignment of the planes in the reference
-   * buffers. Valid values are power of 2, from 32 to 1024. A value of 0 sets
-   * legacy alignment. I.e. Y plane is aligned to 32 bytes, U plane directly
-   * follows Y plane, and V plane directly follows U plane. Default value is 0.
-   */
-  AV1_SET_BYTE_ALIGNMENT,
-
-  /** control function to invert the decoding order to from right to left. The
-   * function is used in a test to confirm the decoding independence of tile
-   * columns. The function may be used in application where this order
-   * of decoding is desired.
-   *
-   * TODO(yaowu): Rework the unit test that uses this control, and in a future
-   *              release, this test-only control shall be removed.
-   */
-  AV1_INVERT_TILE_DECODE_ORDER,
-
-  /** control function to set the skip loop filter flag. Valid values are
-   * integers. The decoder will skip the loop filter when its value is set to
-   * nonzero. If the loop filter is skipped the decoder may accumulate decode
-   * artifacts. The default value is 0.
-   */
-  AV1_SET_SKIP_LOOP_FILTER,
-
-  /** control function to retrieve a pointer to the Accounting struct.  When
-   * compiled without --enable-accounting, this returns AOM_CODEC_INCAPABLE.
-   * If called before a frame has been decoded, this returns AOM_CODEC_ERROR.
-   * The caller should ensure that AOM_CODEC_OK is returned before attempting
-   * to dereference the Accounting pointer.
-   */
-  AV1_GET_ACCOUNTING,
-
-  AOM_DECODER_CTRL_ID_MAX,
-
-  /** control function to set the range of tile decoding. A value that is
-   * greater and equal to zero indicates only the specific row/column is
-   * decoded. A value that is -1 indicates the whole row/column is decoded.
-   * A special case is both values are -1 that means the whole frame is
-   * decoded.
-   */
-  AV1_SET_DECODE_TILE_ROW,
-  AV1_SET_DECODE_TILE_COL
-};
-
-/** Decrypt n bytes of data from input -> output, using the decrypt_state
- *  passed in AOMD_SET_DECRYPTOR.
- */
-typedef void (*aom_decrypt_cb)(void *decrypt_state, const unsigned char *input,
-                               unsigned char *output, int count);
-
-/*!\brief Structure to hold decryption state
- *
- * Defines a structure to hold the decryption state and access function.
- */
-typedef struct aom_decrypt_init {
-  /*! Decrypt callback. */
-  aom_decrypt_cb decrypt_cb;
-
-  /*! Decryption state. */
-  void *decrypt_state;
-} aom_decrypt_init;
-
-/*!\brief A deprecated alias for aom_decrypt_init.
- */
-typedef aom_decrypt_init aom_decrypt_init;
-
-/*!\cond */
-/*!\brief AOM decoder control function parameter type
- *
- * Defines the data types that AOMD control functions take. Note that
- * additional common controls are defined in aom.h
- *
- */
-
-AOM_CTRL_USE_TYPE(AOMD_GET_LAST_REF_UPDATES, int *)
-#define AOM_CTRL_AOMD_GET_LAST_REF_UPDATES
-AOM_CTRL_USE_TYPE(AOMD_GET_FRAME_CORRUPTED, int *)
-#define AOM_CTRL_AOMD_GET_FRAME_CORRUPTED
-AOM_CTRL_USE_TYPE(AOMD_GET_LAST_REF_USED, int *)
-#define AOM_CTRL_AOMD_GET_LAST_REF_USED
-AOM_CTRL_USE_TYPE(AOMD_SET_DECRYPTOR, aom_decrypt_init *)
-#define AOM_CTRL_AOMD_SET_DECRYPTOR
-// AOM_CTRL_USE_TYPE(AOMD_SET_DECRYPTOR, aom_decrypt_init *)
-//#define AOM_CTRL_AOMD_SET_DECRYPTOR
-AOM_CTRL_USE_TYPE(AV1D_GET_DISPLAY_SIZE, int *)
-#define AOM_CTRL_AV1D_GET_DISPLAY_SIZE
-AOM_CTRL_USE_TYPE(AV1D_GET_BIT_DEPTH, unsigned int *)
-#define AOM_CTRL_AV1D_GET_BIT_DEPTH
-AOM_CTRL_USE_TYPE(AV1D_GET_FRAME_SIZE, int *)
-#define AOM_CTRL_AV1D_GET_FRAME_SIZE
-AOM_CTRL_USE_TYPE(AV1_INVERT_TILE_DECODE_ORDER, int)
-#define AOM_CTRL_AV1_INVERT_TILE_DECODE_ORDER
-AOM_CTRL_USE_TYPE(AV1_GET_ACCOUNTING, Accounting **)
-#define AOM_CTRL_AV1_GET_ACCOUNTING
-AOM_CTRL_USE_TYPE(AV1_SET_DECODE_TILE_ROW, int)
-#define AOM_CTRL_AV1_SET_DECODE_TILE_ROW
-AOM_CTRL_USE_TYPE(AV1_SET_DECODE_TILE_COL, int)
-#define AOM_CTRL_AV1_SET_DECODE_TILE_COL
-/*!\endcond */
-/*! @} - end defgroup aom_decoder */
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AOMDX_H_
--- a/aom/exports_com
+++ b/aom/exports_com
@@ -1,16 +0,0 @@
-text aom_codec_build_config
-text aom_codec_control_
-text aom_codec_destroy
-text aom_codec_err_to_string
-text aom_codec_error
-text aom_codec_error_detail
-text aom_codec_get_caps
-text aom_codec_iface_name
-text aom_codec_version
-text aom_codec_version_extra_str
-text aom_codec_version_str
-text aom_img_alloc
-text aom_img_flip
-text aom_img_free
-text aom_img_set_rect
-text aom_img_wrap
--- a/aom/exports_dec
+++ b/aom/exports_dec
@@ -1,8 +0,0 @@
-text aom_codec_dec_init_ver
-text aom_codec_decode
-text aom_codec_get_frame
-text aom_codec_get_stream_info
-text aom_codec_peek_stream_info
-text aom_codec_register_put_frame_cb
-text aom_codec_register_put_slice_cb
-text aom_codec_set_frame_buffer_functions
--- a/aom/exports_enc
+++ b/aom/exports_enc
@@ -1,9 +0,0 @@
-text aom_codec_enc_config_default
-text aom_codec_enc_config_set
-text aom_codec_enc_init_multi_ver
-text aom_codec_enc_init_ver
-text aom_codec_encode
-text aom_codec_get_cx_data
-text aom_codec_get_global_headers
-text aom_codec_get_preview_frame
-text aom_codec_set_cx_data_buf
--- a/aom/internal/aom_codec_internal.h
+++ b/aom/internal/aom_codec_internal.h
@@ -1,465 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief Describes the decoder algorithm interface for algorithm
- *        implementations.
- *
- * This file defines the private structures and data types that are only
- * relevant to implementing an algorithm, as opposed to using it.
- *
- * To create a decoder algorithm class, an interface structure is put
- * into the global namespace:
- *     <pre>
- *     my_codec.c:
- *       aom_codec_iface_t my_codec = {
- *           "My Codec v1.0",
- *           AOM_CODEC_ALG_ABI_VERSION,
- *           ...
- *       };
- *     </pre>
- *
- * An application instantiates a specific decoder instance by using
- * aom_codec_init() and a pointer to the algorithm's interface structure:
- *     <pre>
- *     my_app.c:
- *       extern aom_codec_iface_t my_codec;
- *       {
- *           aom_codec_ctx_t algo;
- *           res = aom_codec_init(&algo, &my_codec);
- *       }
- *     </pre>
- *
- * Once initialized, the instance is manged using other functions from
- * the aom_codec_* family.
- */
-#ifndef AOM_INTERNAL_AOM_CODEC_INTERNAL_H_
-#define AOM_INTERNAL_AOM_CODEC_INTERNAL_H_
-#include "./aom_config.h"
-#include "../aom_decoder.h"
-#include "../aom_encoder.h"
-#include <stdarg.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped.  Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
-#define AOM_CODEC_INTERNAL_ABI_VERSION (5) /**<\hideinitializer*/
-
-typedef struct aom_codec_alg_priv aom_codec_alg_priv_t;
-typedef struct aom_codec_priv_enc_mr_cfg aom_codec_priv_enc_mr_cfg_t;
-
-/*!\brief init function pointer prototype
- *
- * Performs algorithm-specific initialization of the decoder context. This
- * function is called by the generic aom_codec_init() wrapper function, so
- * plugins implementing this interface may trust the input parameters to be
- * properly initialized.
- *
- * \param[in] ctx   Pointer to this instance's context
- * \retval #AOM_CODEC_OK
- *     The input stream was recognized and decoder initialized.
- * \retval #AOM_CODEC_MEM_ERROR
- *     Memory operation failed.
- */
-typedef aom_codec_err_t (*aom_codec_init_fn_t)(
-    aom_codec_ctx_t *ctx, aom_codec_priv_enc_mr_cfg_t *data);
-
-/*!\brief destroy function pointer prototype
- *
- * Performs algorithm-specific destruction of the decoder context. This
- * function is called by the generic aom_codec_destroy() wrapper function,
- * so plugins implementing this interface may trust the input parameters
- * to be properly initialized.
- *
- * \param[in] ctx   Pointer to this instance's context
- * \retval #AOM_CODEC_OK
- *     The input stream was recognized and decoder initialized.
- * \retval #AOM_CODEC_MEM_ERROR
- *     Memory operation failed.
- */
-typedef aom_codec_err_t (*aom_codec_destroy_fn_t)(aom_codec_alg_priv_t *ctx);
-
-/*!\brief parse stream info function pointer prototype
- *
- * Performs high level parsing of the bitstream. This function is called by the
- * generic aom_codec_peek_stream_info() wrapper function, so plugins
- * implementing this interface may trust the input parameters to be properly
- * initialized.
- *
- * \param[in]      data    Pointer to a block of data to parse
- * \param[in]      data_sz Size of the data buffer
- * \param[in,out]  si      Pointer to stream info to update. The size member
- *                         \ref MUST be properly initialized, but \ref MAY be
- *                         clobbered by the algorithm. This parameter \ref MAY
- *                         be NULL.
- *
- * \retval #AOM_CODEC_OK
- *     Bitstream is parsable and stream information updated
- */
-typedef aom_codec_err_t (*aom_codec_peek_si_fn_t)(const uint8_t *data,
-                                                  unsigned int data_sz,
-                                                  aom_codec_stream_info_t *si);
-
-/*!\brief Return information about the current stream.
- *
- * Returns information about the stream that has been parsed during decoding.
- *
- * \param[in]      ctx     Pointer to this instance's context
- * \param[in,out]  si      Pointer to stream info to update. The size member
- *                         \ref MUST be properly initialized, but \ref MAY be
- *                         clobbered by the algorithm. This parameter \ref MAY
- *                         be NULL.
- *
- * \retval #AOM_CODEC_OK
- *     Bitstream is parsable and stream information updated
- */
-typedef aom_codec_err_t (*aom_codec_get_si_fn_t)(aom_codec_alg_priv_t *ctx,
-                                                 aom_codec_stream_info_t *si);
-
-/*!\brief control function pointer prototype
- *
- * This function is used to exchange algorithm specific data with the decoder
- * instance. This can be used to implement features specific to a particular
- * algorithm.
- *
- * This function is called by the generic aom_codec_control() wrapper
- * function, so plugins implementing this interface may trust the input
- * parameters to be properly initialized. However,  this interface does not
- * provide type safety for the exchanged data or assign meanings to the
- * control codes. Those details should be specified in the algorithm's
- * header file. In particular, the ctrl_id parameter is guaranteed to exist
- * in the algorithm's control mapping table, and the data parameter may be NULL.
- *
- *
- * \param[in]     ctx              Pointer to this instance's context
- * \param[in]     ctrl_id          Algorithm specific control identifier
- * \param[in,out] data             Data to exchange with algorithm instance.
- *
- * \retval #AOM_CODEC_OK
- *     The internal state data was deserialized.
- */
-typedef aom_codec_err_t (*aom_codec_control_fn_t)(aom_codec_alg_priv_t *ctx,
-                                                  va_list ap);
-
-/*!\brief control function pointer mapping
- *
- * This structure stores the mapping between control identifiers and
- * implementing functions. Each algorithm provides a list of these
- * mappings. This list is searched by the aom_codec_control() wrapper
- * function to determine which function to invoke. The special
- * value {0, NULL} is used to indicate end-of-list, and must be
- * present. The special value {0, <non-null>} can be used as a catch-all
- * mapping. This implies that ctrl_id values chosen by the algorithm
- * \ref MUST be non-zero.
- */
-typedef const struct aom_codec_ctrl_fn_map {
-  int ctrl_id;
-  aom_codec_control_fn_t fn;
-} aom_codec_ctrl_fn_map_t;
-
-/*!\brief decode data function pointer prototype
- *
- * Processes a buffer of coded data. If the processing results in a new
- * decoded frame becoming available, #AOM_CODEC_CB_PUT_SLICE and
- * #AOM_CODEC_CB_PUT_FRAME events are generated as appropriate. This
- * function is called by the generic aom_codec_decode() wrapper function,
- * so plugins implementing this interface may trust the input parameters
- * to be properly initialized.
- *
- * \param[in] ctx          Pointer to this instance's context
- * \param[in] data         Pointer to this block of new coded data. If
- *                         NULL, a #AOM_CODEC_CB_PUT_FRAME event is posted
- *                         for the previously decoded frame.
- * \param[in] data_sz      Size of the coded data, in bytes.
- *
- * \return Returns #AOM_CODEC_OK if the coded data was processed completely
- *         and future pictures can be decoded without error. Otherwise,
- *         see the descriptions of the other error codes in ::aom_codec_err_t
- *         for recoverability capabilities.
- */
-typedef aom_codec_err_t (*aom_codec_decode_fn_t)(aom_codec_alg_priv_t *ctx,
-                                                 const uint8_t *data,
-                                                 unsigned int data_sz,
-                                                 void *user_priv,
-                                                 long deadline);
-
-/*!\brief Decoded frames iterator
- *
- * Iterates over a list of the frames available for display. The iterator
- * storage should be initialized to NULL to start the iteration. Iteration is
- * complete when this function returns NULL.
- *
- * The list of available frames becomes valid upon completion of the
- * aom_codec_decode call, and remains valid until the next call to
- * aom_codec_decode.
- *
- * \param[in]     ctx      Pointer to this instance's context
- * \param[in out] iter     Iterator storage, initialized to NULL
- *
- * \return Returns a pointer to an image, if one is ready for display. Frames
- *         produced will always be in PTS (presentation time stamp) order.
- */
-typedef aom_image_t *(*aom_codec_get_frame_fn_t)(aom_codec_alg_priv_t *ctx,
-                                                 aom_codec_iter_t *iter);
-
-/*!\brief Pass in external frame buffers for the decoder to use.
- *
- * Registers functions to be called when libaom needs a frame buffer
- * to decode the current frame and a function to be called when libaom does
- * not internally reference the frame buffer. This set function must
- * be called before the first call to decode or libaom will assume the
- * default behavior of allocating frame buffers internally.
- *
- * \param[in] ctx          Pointer to this instance's context
- * \param[in] cb_get       Pointer to the get callback function
- * \param[in] cb_release   Pointer to the release callback function
- * \param[in] cb_priv      Callback's private data
- *
- * \retval #AOM_CODEC_OK
- *     External frame buffers will be used by libaom.
- * \retval #AOM_CODEC_INVALID_PARAM
- *     One or more of the callbacks were NULL.
- * \retval #AOM_CODEC_ERROR
- *     Decoder context not initialized, or algorithm not capable of
- *     using external frame buffers.
- *
- * \note
- * When decoding AV1, the application may be required to pass in at least
- * #AOM_MAXIMUM_WORK_BUFFERS external frame
- * buffers.
- */
-typedef aom_codec_err_t (*aom_codec_set_fb_fn_t)(
-    aom_codec_alg_priv_t *ctx, aom_get_frame_buffer_cb_fn_t cb_get,
-    aom_release_frame_buffer_cb_fn_t cb_release, void *cb_priv);
-
-typedef aom_codec_err_t (*aom_codec_encode_fn_t)(aom_codec_alg_priv_t *ctx,
-                                                 const aom_image_t *img,
-                                                 aom_codec_pts_t pts,
-                                                 unsigned long duration,
-                                                 aom_enc_frame_flags_t flags,
-                                                 unsigned long deadline);
-typedef const aom_codec_cx_pkt_t *(*aom_codec_get_cx_data_fn_t)(
-    aom_codec_alg_priv_t *ctx, aom_codec_iter_t *iter);
-
-typedef aom_codec_err_t (*aom_codec_enc_config_set_fn_t)(
-    aom_codec_alg_priv_t *ctx, const aom_codec_enc_cfg_t *cfg);
-typedef aom_fixed_buf_t *(*aom_codec_get_global_headers_fn_t)(
-    aom_codec_alg_priv_t *ctx);
-
-typedef aom_image_t *(*aom_codec_get_preview_frame_fn_t)(
-    aom_codec_alg_priv_t *ctx);
-
-typedef aom_codec_err_t (*aom_codec_enc_mr_get_mem_loc_fn_t)(
-    const aom_codec_enc_cfg_t *cfg, void **mem_loc);
-
-/*!\brief usage configuration mapping
- *
- * This structure stores the mapping between usage identifiers and
- * configuration structures. Each algorithm provides a list of these
- * mappings. This list is searched by the aom_codec_enc_config_default()
- * wrapper function to determine which config to return. The special value
- * {-1, {0}} is used to indicate end-of-list, and must be present. At least
- * one mapping must be present, in addition to the end-of-list.
- *
- */
-typedef const struct aom_codec_enc_cfg_map {
-  int usage;
-  aom_codec_enc_cfg_t cfg;
-} aom_codec_enc_cfg_map_t;
-
-/*!\brief Decoder algorithm interface interface
- *
- * All decoders \ref MUST expose a variable of this type.
- */
-struct aom_codec_iface {
-  const char *name;                   /**< Identification String  */
-  int abi_version;                    /**< Implemented ABI version */
-  aom_codec_caps_t caps;              /**< Decoder capabilities */
-  aom_codec_init_fn_t init;           /**< \copydoc ::aom_codec_init_fn_t */
-  aom_codec_destroy_fn_t destroy;     /**< \copydoc ::aom_codec_destroy_fn_t */
-  aom_codec_ctrl_fn_map_t *ctrl_maps; /**< \copydoc ::aom_codec_ctrl_fn_map_t */
-  struct aom_codec_dec_iface {
-    aom_codec_peek_si_fn_t peek_si; /**< \copydoc ::aom_codec_peek_si_fn_t */
-    aom_codec_get_si_fn_t get_si;   /**< \copydoc ::aom_codec_get_si_fn_t */
-    aom_codec_decode_fn_t decode;   /**< \copydoc ::aom_codec_decode_fn_t */
-    aom_codec_get_frame_fn_t
-        get_frame;                   /**< \copydoc ::aom_codec_get_frame_fn_t */
-    aom_codec_set_fb_fn_t set_fb_fn; /**< \copydoc ::aom_codec_set_fb_fn_t */
-  } dec;
-  struct aom_codec_enc_iface {
-    int cfg_map_count;
-    aom_codec_enc_cfg_map_t
-        *cfg_maps;                /**< \copydoc ::aom_codec_enc_cfg_map_t */
-    aom_codec_encode_fn_t encode; /**< \copydoc ::aom_codec_encode_fn_t */
-    aom_codec_get_cx_data_fn_t
-        get_cx_data; /**< \copydoc ::aom_codec_get_cx_data_fn_t */
-    aom_codec_enc_config_set_fn_t
-        cfg_set; /**< \copydoc ::aom_codec_enc_config_set_fn_t */
-    aom_codec_get_global_headers_fn_t
-        get_glob_hdrs; /**< \copydoc ::aom_codec_get_global_headers_fn_t */
-    aom_codec_get_preview_frame_fn_t
-        get_preview; /**< \copydoc ::aom_codec_get_preview_frame_fn_t */
-    aom_codec_enc_mr_get_mem_loc_fn_t
-        mr_get_mem_loc; /**< \copydoc ::aom_codec_enc_mr_get_mem_loc_fn_t */
-  } enc;
-};
-
-/*!\brief Callback function pointer / user data pair storage */
-typedef struct aom_codec_priv_cb_pair {
-  union {
-    aom_codec_put_frame_cb_fn_t put_frame;
-    aom_codec_put_slice_cb_fn_t put_slice;
-  } u;
-  void *user_priv;
-} aom_codec_priv_cb_pair_t;
-
-/*!\brief Instance private storage
- *
- * This structure is allocated by the algorithm's init function. It can be
- * extended in one of two ways. First, a second, algorithm specific structure
- * can be allocated and the priv member pointed to it. Alternatively, this
- * structure can be made the first member of the algorithm specific structure,
- * and the pointer cast to the proper type.
- */
-struct aom_codec_priv {
-  const char *err_detail;
-  aom_codec_flags_t init_flags;
-  struct {
-    aom_codec_priv_cb_pair_t put_frame_cb;
-    aom_codec_priv_cb_pair_t put_slice_cb;
-  } dec;
-  struct {
-    aom_fixed_buf_t cx_data_dst_buf;
-    unsigned int cx_data_pad_before;
-    unsigned int cx_data_pad_after;
-    aom_codec_cx_pkt_t cx_data_pkt;
-    unsigned int total_encoders;
-  } enc;
-};
-
-/*
- * Multi-resolution encoding internal configuration
- */
-struct aom_codec_priv_enc_mr_cfg {
-  unsigned int mr_total_resolutions;
-  unsigned int mr_encoder_id;
-  struct aom_rational mr_down_sampling_factor;
-  void *mr_low_res_mode_info;
-};
-
-#undef AOM_CTRL_USE_TYPE
-#define AOM_CTRL_USE_TYPE(id, typ) \
-  static AOM_INLINE typ id##__value(va_list args) { return va_arg(args, typ); }
-
-#undef AOM_CTRL_USE_TYPE_DEPRECATED
-#define AOM_CTRL_USE_TYPE_DEPRECATED(id, typ) \
-  static AOM_INLINE typ id##__value(va_list args) { return va_arg(args, typ); }
-
-#define CAST(id, arg) id##__value(arg)
-
-/* CODEC_INTERFACE convenience macro
- *
- * By convention, each codec interface is a struct with extern linkage, where
- * the symbol is suffixed with _algo. A getter function is also defined to
- * return a pointer to the struct, since in some cases it's easier to work
- * with text symbols than data symbols (see issue #169). This function has
- * the same name as the struct, less the _algo suffix. The CODEC_INTERFACE
- * macro is provided to define this getter function automatically.
- */
-#define CODEC_INTERFACE(id)                          \
-  aom_codec_iface_t *id(void) { return &id##_algo; } \
-  aom_codec_iface_t id##_algo
-
-/* Internal Utility Functions
- *
- * The following functions are intended to be used inside algorithms as
- * utilities for manipulating aom_codec_* data structures.
- */
-struct aom_codec_pkt_list {
-  unsigned int cnt;
-  unsigned int max;
-  struct aom_codec_cx_pkt pkts[1];
-};
-
-#define aom_codec_pkt_list_decl(n)     \
-  union {                              \
-    struct aom_codec_pkt_list head;    \
-    struct {                           \
-      struct aom_codec_pkt_list head;  \
-      struct aom_codec_cx_pkt pkts[n]; \
-    } alloc;                           \
-  }
-
-#define aom_codec_pkt_list_init(m) \
-  (m)->alloc.head.cnt = 0,         \
-  (m)->alloc.head.max = sizeof((m)->alloc.pkts) / sizeof((m)->alloc.pkts[0])
-
-int aom_codec_pkt_list_add(struct aom_codec_pkt_list *,
-                           const struct aom_codec_cx_pkt *);
-
-const aom_codec_cx_pkt_t *aom_codec_pkt_list_get(
-    struct aom_codec_pkt_list *list, aom_codec_iter_t *iter);
-
-#include <stdio.h>
-#include <setjmp.h>
-
-struct aom_internal_error_info {
-  aom_codec_err_t error_code;
-  int has_detail;
-  char detail[80];
-  int setjmp;
-  jmp_buf jmp;
-};
-
-#define CLANG_ANALYZER_NORETURN
-#if defined(__has_feature)
-#if __has_feature(attribute_analyzer_noreturn)
-#undef CLANG_ANALYZER_NORETURN
-#define CLANG_ANALYZER_NORETURN __attribute__((analyzer_noreturn))
-#endif
-#endif
-
-void aom_internal_error(struct aom_internal_error_info *info,
-                        aom_codec_err_t error, const char *fmt,
-                        ...) CLANG_ANALYZER_NORETURN;
-
-#if CONFIG_DEBUG
-#define AOM_CHECK_MEM_ERROR(error_info, lval, expr)                         \
-  do {                                                                      \
-    lval = (expr);                                                          \
-    if (!lval)                                                              \
-      aom_internal_error(error_info, AOM_CODEC_MEM_ERROR,                   \
-                         "Failed to allocate " #lval " at %s:%d", __FILE__, \
-                         __LINE__);                                         \
-  } while (0)
-#else
-#define AOM_CHECK_MEM_ERROR(error_info, lval, expr)       \
-  do {                                                    \
-    lval = (expr);                                        \
-    if (!lval)                                            \
-      aom_internal_error(error_info, AOM_CODEC_MEM_ERROR, \
-                         "Failed to allocate " #lval);    \
-  } while (0)
-#endif
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_INTERNAL_AOM_CODEC_INTERNAL_H_
--- a/aom/src/aom_codec.c
+++ b/aom/src/aom_codec.c
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief Provides the high level interface to wrap decoder algorithms.
- *
- */
-#include <stdarg.h>
-#include <stdlib.h>
-#include "aom/aom_integer.h"
-#include "aom/internal/aom_codec_internal.h"
-#include "aom_version.h"
-
-#define SAVE_STATUS(ctx, var) (ctx ? (ctx->err = var) : var)
-
-int aom_codec_version(void) { return VERSION_PACKED; }
-
-const char *aom_codec_version_str(void) { return VERSION_STRING_NOSP; }
-
-const char *aom_codec_version_extra_str(void) { return VERSION_EXTRA; }
-
-const char *aom_codec_iface_name(aom_codec_iface_t *iface) {
-  return iface ? iface->name : "<invalid interface>";
-}
-
-const char *aom_codec_err_to_string(aom_codec_err_t err) {
-  switch (err) {
-    case AOM_CODEC_OK: return "Success";
-    case AOM_CODEC_ERROR: return "Unspecified internal error";
-    case AOM_CODEC_MEM_ERROR: return "Memory allocation error";
-    case AOM_CODEC_ABI_MISMATCH: return "ABI version mismatch";
-    case AOM_CODEC_INCAPABLE:
-      return "Codec does not implement requested capability";
-    case AOM_CODEC_UNSUP_BITSTREAM:
-      return "Bitstream not supported by this decoder";
-    case AOM_CODEC_UNSUP_FEATURE:
-      return "Bitstream required feature not supported by this decoder";
-    case AOM_CODEC_CORRUPT_FRAME: return "Corrupt frame detected";
-    case AOM_CODEC_INVALID_PARAM: return "Invalid parameter";
-    case AOM_CODEC_LIST_END: return "End of iterated list";
-  }
-
-  return "Unrecognized error code";
-}
-
-const char *aom_codec_error(aom_codec_ctx_t *ctx) {
-  return (ctx) ? aom_codec_err_to_string(ctx->err)
-               : aom_codec_err_to_string(AOM_CODEC_INVALID_PARAM);
-}
-
-const char *aom_codec_error_detail(aom_codec_ctx_t *ctx) {
-  if (ctx && ctx->err)
-    return ctx->priv ? ctx->priv->err_detail : ctx->err_detail;
-
-  return NULL;
-}
-
-aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx) {
-  aom_codec_err_t res;
-
-  if (!ctx)
-    res = AOM_CODEC_INVALID_PARAM;
-  else if (!ctx->iface || !ctx->priv)
-    res = AOM_CODEC_ERROR;
-  else {
-    ctx->iface->destroy((aom_codec_alg_priv_t *)ctx->priv);
-
-    ctx->iface = NULL;
-    ctx->name = NULL;
-    ctx->priv = NULL;
-    res = AOM_CODEC_OK;
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_caps_t aom_codec_get_caps(aom_codec_iface_t *iface) {
-  return (iface) ? iface->caps : 0;
-}
-
-aom_codec_err_t aom_codec_control_(aom_codec_ctx_t *ctx, int ctrl_id, ...) {
-  aom_codec_err_t res;
-
-  if (!ctx || !ctrl_id)
-    res = AOM_CODEC_INVALID_PARAM;
-  else if (!ctx->iface || !ctx->priv || !ctx->iface->ctrl_maps)
-    res = AOM_CODEC_ERROR;
-  else {
-    aom_codec_ctrl_fn_map_t *entry;
-
-    res = AOM_CODEC_ERROR;
-
-    for (entry = ctx->iface->ctrl_maps; entry && entry->fn; entry++) {
-      if (!entry->ctrl_id || entry->ctrl_id == ctrl_id) {
-        va_list ap;
-
-        va_start(ap, ctrl_id);
-        res = entry->fn((aom_codec_alg_priv_t *)ctx->priv, ap);
-        va_end(ap);
-        break;
-      }
-    }
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
-
-void aom_internal_error(struct aom_internal_error_info *info,
-                        aom_codec_err_t error, const char *fmt, ...) {
-  va_list ap;
-
-  info->error_code = error;
-  info->has_detail = 0;
-
-  if (fmt) {
-    size_t sz = sizeof(info->detail);
-
-    info->has_detail = 1;
-    va_start(ap, fmt);
-    vsnprintf(info->detail, sz - 1, fmt, ap);
-    va_end(ap);
-    info->detail[sz - 1] = '\0';
-  }
-
-  if (info->setjmp) longjmp(info->jmp, info->error_code);
-}
--- a/aom/src/aom_decoder.c
+++ b/aom/src/aom_decoder.c
@@ -1,189 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief Provides the high level interface to wrap decoder algorithms.
- *
- */
-#include <string.h>
-#include "aom/internal/aom_codec_internal.h"
-
-#define SAVE_STATUS(ctx, var) (ctx ? (ctx->err = var) : var)
-
-static aom_codec_alg_priv_t *get_alg_priv(aom_codec_ctx_t *ctx) {
-  return (aom_codec_alg_priv_t *)ctx->priv;
-}
-
-aom_codec_err_t aom_codec_dec_init_ver(aom_codec_ctx_t *ctx,
-                                       aom_codec_iface_t *iface,
-                                       const aom_codec_dec_cfg_t *cfg,
-                                       aom_codec_flags_t flags, int ver) {
-  aom_codec_err_t res;
-
-  if (ver != AOM_DECODER_ABI_VERSION)
-    res = AOM_CODEC_ABI_MISMATCH;
-  else if (!ctx || !iface)
-    res = AOM_CODEC_INVALID_PARAM;
-  else if (iface->abi_version != AOM_CODEC_INTERNAL_ABI_VERSION)
-    res = AOM_CODEC_ABI_MISMATCH;
-  else if ((flags & AOM_CODEC_USE_POSTPROC) &&
-           !(iface->caps & AOM_CODEC_CAP_POSTPROC))
-    res = AOM_CODEC_INCAPABLE;
-  else if ((flags & AOM_CODEC_USE_ERROR_CONCEALMENT) &&
-           !(iface->caps & AOM_CODEC_CAP_ERROR_CONCEALMENT))
-    res = AOM_CODEC_INCAPABLE;
-  else if ((flags & AOM_CODEC_USE_INPUT_FRAGMENTS) &&
-           !(iface->caps & AOM_CODEC_CAP_INPUT_FRAGMENTS))
-    res = AOM_CODEC_INCAPABLE;
-  else if (!(iface->caps & AOM_CODEC_CAP_DECODER))
-    res = AOM_CODEC_INCAPABLE;
-  else {
-    memset(ctx, 0, sizeof(*ctx));
-    ctx->iface = iface;
-    ctx->name = iface->name;
-    ctx->priv = NULL;
-    ctx->init_flags = flags;
-    ctx->config.dec = cfg;
-
-    res = ctx->iface->init(ctx, NULL);
-    if (res) {
-      ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
-      aom_codec_destroy(ctx);
-    }
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_err_t aom_codec_peek_stream_info(aom_codec_iface_t *iface,
-                                           const uint8_t *data,
-                                           unsigned int data_sz,
-                                           aom_codec_stream_info_t *si) {
-  aom_codec_err_t res;
-
-  if (!iface || !data || !data_sz || !si ||
-      si->sz < sizeof(aom_codec_stream_info_t))
-    res = AOM_CODEC_INVALID_PARAM;
-  else {
-    /* Set default/unknown values */
-    si->w = 0;
-    si->h = 0;
-
-    res = iface->dec.peek_si(data, data_sz, si);
-  }
-
-  return res;
-}
-
-aom_codec_err_t aom_codec_get_stream_info(aom_codec_ctx_t *ctx,
-                                          aom_codec_stream_info_t *si) {
-  aom_codec_err_t res;
-
-  if (!ctx || !si || si->sz < sizeof(aom_codec_stream_info_t))
-    res = AOM_CODEC_INVALID_PARAM;
-  else if (!ctx->iface || !ctx->priv)
-    res = AOM_CODEC_ERROR;
-  else {
-    /* Set default/unknown values */
-    si->w = 0;
-    si->h = 0;
-
-    res = ctx->iface->dec.get_si(get_alg_priv(ctx), si);
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data,
-                                 unsigned int data_sz, void *user_priv,
-                                 long deadline) {
-  aom_codec_err_t res;
-
-  /* Sanity checks */
-  /* NULL data ptr allowed if data_sz is 0 too */
-  if (!ctx || (!data && data_sz) || (data && !data_sz))
-    res = AOM_CODEC_INVALID_PARAM;
-  else if (!ctx->iface || !ctx->priv)
-    res = AOM_CODEC_ERROR;
-  else {
-    res = ctx->iface->dec.decode(get_alg_priv(ctx), data, data_sz, user_priv,
-                                 deadline);
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
-
-aom_image_t *aom_codec_get_frame(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter) {
-  aom_image_t *img;
-
-  if (!ctx || !iter || !ctx->iface || !ctx->priv)
-    img = NULL;
-  else
-    img = ctx->iface->dec.get_frame(get_alg_priv(ctx), iter);
-
-  return img;
-}
-
-aom_codec_err_t aom_codec_register_put_frame_cb(aom_codec_ctx_t *ctx,
-                                                aom_codec_put_frame_cb_fn_t cb,
-                                                void *user_priv) {
-  aom_codec_err_t res;
-
-  if (!ctx || !cb)
-    res = AOM_CODEC_INVALID_PARAM;
-  else if (!ctx->iface || !ctx->priv ||
-           !(ctx->iface->caps & AOM_CODEC_CAP_PUT_FRAME))
-    res = AOM_CODEC_ERROR;
-  else {
-    ctx->priv->dec.put_frame_cb.u.put_frame = cb;
-    ctx->priv->dec.put_frame_cb.user_priv = user_priv;
-    res = AOM_CODEC_OK;
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_err_t aom_codec_register_put_slice_cb(aom_codec_ctx_t *ctx,
-                                                aom_codec_put_slice_cb_fn_t cb,
-                                                void *user_priv) {
-  aom_codec_err_t res;
-
-  if (!ctx || !cb)
-    res = AOM_CODEC_INVALID_PARAM;
-  else if (!ctx->iface || !ctx->priv ||
-           !(ctx->iface->caps & AOM_CODEC_CAP_PUT_SLICE))
-    res = AOM_CODEC_ERROR;
-  else {
-    ctx->priv->dec.put_slice_cb.u.put_slice = cb;
-    ctx->priv->dec.put_slice_cb.user_priv = user_priv;
-    res = AOM_CODEC_OK;
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_err_t aom_codec_set_frame_buffer_functions(
-    aom_codec_ctx_t *ctx, aom_get_frame_buffer_cb_fn_t cb_get,
-    aom_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) {
-  aom_codec_err_t res;
-
-  if (!ctx || !cb_get || !cb_release) {
-    res = AOM_CODEC_INVALID_PARAM;
-  } else if (!ctx->iface || !ctx->priv ||
-             !(ctx->iface->caps & AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER)) {
-    res = AOM_CODEC_ERROR;
-  } else {
-    res = ctx->iface->dec.set_fb_fn(get_alg_priv(ctx), cb_get, cb_release,
-                                    cb_priv);
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
--- a/aom/src/aom_encoder.c
+++ b/aom/src/aom_encoder.c
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief Provides the high level interface to wrap encoder algorithms.
- *
- */
-#include <limits.h>
-#include <string.h>
-#include "aom_config.h"
-#include "aom/internal/aom_codec_internal.h"
-
-#define SAVE_STATUS(ctx, var) (ctx ? (ctx->err = var) : var)
-
-static aom_codec_alg_priv_t *get_alg_priv(aom_codec_ctx_t *ctx) {
-  return (aom_codec_alg_priv_t *)ctx->priv;
-}
-
-aom_codec_err_t aom_codec_enc_init_ver(aom_codec_ctx_t *ctx,
-                                       aom_codec_iface_t *iface,
-                                       const aom_codec_enc_cfg_t *cfg,
-                                       aom_codec_flags_t flags, int ver) {
-  aom_codec_err_t res;
-
-  if (ver != AOM_ENCODER_ABI_VERSION)
-    res = AOM_CODEC_ABI_MISMATCH;
-  else if (!ctx || !iface || !cfg)
-    res = AOM_CODEC_INVALID_PARAM;
-  else if (iface->abi_version != AOM_CODEC_INTERNAL_ABI_VERSION)
-    res = AOM_CODEC_ABI_MISMATCH;
-  else if (!(iface->caps & AOM_CODEC_CAP_ENCODER))
-    res = AOM_CODEC_INCAPABLE;
-  else if ((flags & AOM_CODEC_USE_PSNR) && !(iface->caps & AOM_CODEC_CAP_PSNR))
-    res = AOM_CODEC_INCAPABLE;
-  else if ((flags & AOM_CODEC_USE_OUTPUT_PARTITION) &&
-           !(iface->caps & AOM_CODEC_CAP_OUTPUT_PARTITION))
-    res = AOM_CODEC_INCAPABLE;
-  else {
-    ctx->iface = iface;
-    ctx->name = iface->name;
-    ctx->priv = NULL;
-    ctx->init_flags = flags;
-    ctx->config.enc = cfg;
-    res = ctx->iface->init(ctx, NULL);
-
-    if (res) {
-      ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
-      aom_codec_destroy(ctx);
-    }
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_err_t aom_codec_enc_init_multi_ver(
-    aom_codec_ctx_t *ctx, aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg,
-    int num_enc, aom_codec_flags_t flags, aom_rational_t *dsf, int ver) {
-  aom_codec_err_t res = AOM_CODEC_OK;
-
-  if (ver != AOM_ENCODER_ABI_VERSION)
-    res = AOM_CODEC_ABI_MISMATCH;
-  else if (!ctx || !iface || !cfg || (num_enc > 16 || num_enc < 1))
-    res = AOM_CODEC_INVALID_PARAM;
-  else if (iface->abi_version != AOM_CODEC_INTERNAL_ABI_VERSION)
-    res = AOM_CODEC_ABI_MISMATCH;
-  else if (!(iface->caps & AOM_CODEC_CAP_ENCODER))
-    res = AOM_CODEC_INCAPABLE;
-  else if ((flags & AOM_CODEC_USE_PSNR) && !(iface->caps & AOM_CODEC_CAP_PSNR))
-    res = AOM_CODEC_INCAPABLE;
-  else if ((flags & AOM_CODEC_USE_OUTPUT_PARTITION) &&
-           !(iface->caps & AOM_CODEC_CAP_OUTPUT_PARTITION))
-    res = AOM_CODEC_INCAPABLE;
-  else {
-    int i;
-    void *mem_loc = NULL;
-
-    if (!(res = iface->enc.mr_get_mem_loc(cfg, &mem_loc))) {
-      for (i = 0; i < num_enc; i++) {
-        aom_codec_priv_enc_mr_cfg_t mr_cfg;
-
-        /* Validate down-sampling factor. */
-        if (dsf->num < 1 || dsf->num > 4096 || dsf->den < 1 ||
-            dsf->den > dsf->num) {
-          res = AOM_CODEC_INVALID_PARAM;
-          break;
-        }
-
-        mr_cfg.mr_low_res_mode_info = mem_loc;
-        mr_cfg.mr_total_resolutions = num_enc;
-        mr_cfg.mr_encoder_id = num_enc - 1 - i;
-        mr_cfg.mr_down_sampling_factor.num = dsf->num;
-        mr_cfg.mr_down_sampling_factor.den = dsf->den;
-
-        /* Force Key-frame synchronization. Namely, encoder at higher
-         * resolution always use the same frame_type chosen by the
-         * lowest-resolution encoder.
-         */
-        if (mr_cfg.mr_encoder_id) cfg->kf_mode = AOM_KF_DISABLED;
-
-        ctx->iface = iface;
-        ctx->name = iface->name;
-        ctx->priv = NULL;
-        ctx->init_flags = flags;
-        ctx->config.enc = cfg;
-        res = ctx->iface->init(ctx, &mr_cfg);
-
-        if (res) {
-          const char *error_detail = ctx->priv ? ctx->priv->err_detail : NULL;
-          /* Destroy current ctx */
-          ctx->err_detail = error_detail;
-          aom_codec_destroy(ctx);
-
-          /* Destroy already allocated high-level ctx */
-          while (i) {
-            ctx--;
-            ctx->err_detail = error_detail;
-            aom_codec_destroy(ctx);
-            i--;
-          }
-        }
-
-        if (res) break;
-
-        ctx++;
-        cfg++;
-        dsf++;
-      }
-      ctx--;
-    }
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface,
-                                             aom_codec_enc_cfg_t *cfg,
-                                             unsigned int usage) {
-  aom_codec_err_t res;
-  aom_codec_enc_cfg_map_t *map;
-  int i;
-
-  if (!iface || !cfg || usage > INT_MAX)
-    res = AOM_CODEC_INVALID_PARAM;
-  else if (!(iface->caps & AOM_CODEC_CAP_ENCODER))
-    res = AOM_CODEC_INCAPABLE;
-  else {
-    res = AOM_CODEC_INVALID_PARAM;
-
-    for (i = 0; i < iface->enc.cfg_map_count; ++i) {
-      map = iface->enc.cfg_maps + i;
-      if (map->usage == (int)usage) {
-        *cfg = map->cfg;
-        cfg->g_usage = usage;
-        res = AOM_CODEC_OK;
-        break;
-      }
-    }
-  }
-
-  return res;
-}
-
-#if ARCH_X86 || ARCH_X86_64
-/* On X86, disable the x87 unit's internal 80 bit precision for better
- * consistency with the SSE unit's 64 bit precision.
- */
-#include "aom_ports/x86.h"
-#define FLOATING_POINT_INIT() \
-  do {                        \
-    unsigned short x87_orig_mode = x87_set_double_precision();
-#define FLOATING_POINT_RESTORE()       \
-  x87_set_control_word(x87_orig_mode); \
-  }                                    \
-  while (0)
-
-#else
-static void FLOATING_POINT_INIT() {}
-static void FLOATING_POINT_RESTORE() {}
-#endif
-
-aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img,
-                                 aom_codec_pts_t pts, unsigned long duration,
-                                 aom_enc_frame_flags_t flags,
-                                 unsigned long deadline) {
-  aom_codec_err_t res = AOM_CODEC_OK;
-
-  if (!ctx || (img && !duration))
-    res = AOM_CODEC_INVALID_PARAM;
-  else if (!ctx->iface || !ctx->priv)
-    res = AOM_CODEC_ERROR;
-  else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
-    res = AOM_CODEC_INCAPABLE;
-  else {
-    unsigned int num_enc = ctx->priv->enc.total_encoders;
-
-    /* Execute in a normalized floating point environment, if the platform
-     * requires it.
-     */
-    FLOATING_POINT_INIT();
-
-    if (num_enc == 1)
-      res = ctx->iface->enc.encode(get_alg_priv(ctx), img, pts, duration, flags,
-                                   deadline);
-    else {
-      /* Multi-resolution encoding:
-       * Encode multi-levels in reverse order. For example,
-       * if mr_total_resolutions = 3, first encode level 2,
-       * then encode level 1, and finally encode level 0.
-       */
-      int i;
-
-      ctx += num_enc - 1;
-      if (img) img += num_enc - 1;
-
-      for (i = num_enc - 1; i >= 0; i--) {
-        if ((res = ctx->iface->enc.encode(get_alg_priv(ctx), img, pts, duration,
-                                          flags, deadline)))
-          break;
-
-        ctx--;
-        if (img) img--;
-      }
-      ctx++;
-    }
-
-    FLOATING_POINT_RESTORE();
-  }
-
-  return SAVE_STATUS(ctx, res);
-}
-
-const aom_codec_cx_pkt_t *aom_codec_get_cx_data(aom_codec_ctx_t *ctx,
-                                                aom_codec_iter_t *iter) {
-  const aom_codec_cx_pkt_t *pkt = NULL;
-
-  if (ctx) {
-    if (!iter)
-      ctx->err = AOM_CODEC_INVALID_PARAM;
-    else if (!ctx->iface || !ctx->priv)
-      ctx->err = AOM_CODEC_ERROR;
-    else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
-      ctx->err = AOM_CODEC_INCAPABLE;
-    else
-      pkt = ctx->iface->enc.get_cx_data(get_alg_priv(ctx), iter);
-  }
-
-  if (pkt && pkt->kind == AOM_CODEC_CX_FRAME_PKT) {
-    // If the application has specified a destination area for the
-    // compressed data, and the codec has not placed the data there,
-    // and it fits, copy it.
-    aom_codec_priv_t *const priv = ctx->priv;
-    char *const dst_buf = (char *)priv->enc.cx_data_dst_buf.buf;
-
-    if (dst_buf && pkt->data.raw.buf != dst_buf &&
-        pkt->data.raw.sz + priv->enc.cx_data_pad_before +
-                priv->enc.cx_data_pad_after <=
-            priv->enc.cx_data_dst_buf.sz) {
-      aom_codec_cx_pkt_t *modified_pkt = &priv->enc.cx_data_pkt;
-
-      memcpy(dst_buf + priv->enc.cx_data_pad_before, pkt->data.raw.buf,
-             pkt->data.raw.sz);
-      *modified_pkt = *pkt;
-      modified_pkt->data.raw.buf = dst_buf;
-      modified_pkt->data.raw.sz +=
-          priv->enc.cx_data_pad_before + priv->enc.cx_data_pad_after;
-      pkt = modified_pkt;
-    }
-
-    if (dst_buf == pkt->data.raw.buf) {
-      priv->enc.cx_data_dst_buf.buf = dst_buf + pkt->data.raw.sz;
-      priv->enc.cx_data_dst_buf.sz -= pkt->data.raw.sz;
-    }
-  }
-
-  return pkt;
-}
-
-aom_codec_err_t aom_codec_set_cx_data_buf(aom_codec_ctx_t *ctx,
-                                          const aom_fixed_buf_t *buf,
-                                          unsigned int pad_before,
-                                          unsigned int pad_after) {
-  if (!ctx || !ctx->priv) return AOM_CODEC_INVALID_PARAM;
-
-  if (buf) {
-    ctx->priv->enc.cx_data_dst_buf = *buf;
-    ctx->priv->enc.cx_data_pad_before = pad_before;
-    ctx->priv->enc.cx_data_pad_after = pad_after;
-  } else {
-    ctx->priv->enc.cx_data_dst_buf.buf = NULL;
-    ctx->priv->enc.cx_data_dst_buf.sz = 0;
-    ctx->priv->enc.cx_data_pad_before = 0;
-    ctx->priv->enc.cx_data_pad_after = 0;
-  }
-
-  return AOM_CODEC_OK;
-}
-
-const aom_image_t *aom_codec_get_preview_frame(aom_codec_ctx_t *ctx) {
-  aom_image_t *img = NULL;
-
-  if (ctx) {
-    if (!ctx->iface || !ctx->priv)
-      ctx->err = AOM_CODEC_ERROR;
-    else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
-      ctx->err = AOM_CODEC_INCAPABLE;
-    else if (!ctx->iface->enc.get_preview)
-      ctx->err = AOM_CODEC_INCAPABLE;
-    else
-      img = ctx->iface->enc.get_preview(get_alg_priv(ctx));
-  }
-
-  return img;
-}
-
-aom_fixed_buf_t *aom_codec_get_global_headers(aom_codec_ctx_t *ctx) {
-  aom_fixed_buf_t *buf = NULL;
-
-  if (ctx) {
-    if (!ctx->iface || !ctx->priv)
-      ctx->err = AOM_CODEC_ERROR;
-    else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
-      ctx->err = AOM_CODEC_INCAPABLE;
-    else if (!ctx->iface->enc.get_glob_hdrs)
-      ctx->err = AOM_CODEC_INCAPABLE;
-    else
-      buf = ctx->iface->enc.get_glob_hdrs(get_alg_priv(ctx));
-  }
-
-  return buf;
-}
-
-aom_codec_err_t aom_codec_enc_config_set(aom_codec_ctx_t *ctx,
-                                         const aom_codec_enc_cfg_t *cfg) {
-  aom_codec_err_t res;
-
-  if (!ctx || !ctx->iface || !ctx->priv || !cfg)
-    res = AOM_CODEC_INVALID_PARAM;
-  else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
-    res = AOM_CODEC_INCAPABLE;
-  else
-    res = ctx->iface->enc.cfg_set(get_alg_priv(ctx), cfg);
-
-  return SAVE_STATUS(ctx, res);
-}
-
-int aom_codec_pkt_list_add(struct aom_codec_pkt_list *list,
-                           const struct aom_codec_cx_pkt *pkt) {
-  if (list->cnt < list->max) {
-    list->pkts[list->cnt++] = *pkt;
-    return 0;
-  }
-
-  return 1;
-}
-
-const aom_codec_cx_pkt_t *aom_codec_pkt_list_get(
-    struct aom_codec_pkt_list *list, aom_codec_iter_t *iter) {
-  const aom_codec_cx_pkt_t *pkt;
-
-  if (!(*iter)) {
-    *iter = list->pkts;
-  }
-
-  pkt = (const aom_codec_cx_pkt_t *)*iter;
-
-  if ((size_t)(pkt - list->pkts) < list->cnt)
-    *iter = pkt + 1;
-  else
-    pkt = NULL;
-
-  return pkt;
-}
--- a/aom/src/aom_image.c
+++ b/aom/src/aom_image.c
@@ -1,240 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_image.h"
-#include "aom/aom_integer.h"
-#include "aom_mem/aom_mem.h"
-
-static aom_image_t *img_alloc_helper(aom_image_t *img, aom_img_fmt_t fmt,
-                                     unsigned int d_w, unsigned int d_h,
-                                     unsigned int buf_align,
-                                     unsigned int stride_align,
-                                     unsigned char *img_data) {
-  unsigned int h, w, s, xcs, ycs, bps;
-  unsigned int stride_in_bytes;
-  int align;
-
-  /* Treat align==0 like align==1 */
-  if (!buf_align) buf_align = 1;
-
-  /* Validate alignment (must be power of 2) */
-  if (buf_align & (buf_align - 1)) goto fail;
-
-  /* Treat align==0 like align==1 */
-  if (!stride_align) stride_align = 1;
-
-  /* Validate alignment (must be power of 2) */
-  if (stride_align & (stride_align - 1)) goto fail;
-
-  /* Get sample size for this format */
-  switch (fmt) {
-    case AOM_IMG_FMT_RGB32:
-    case AOM_IMG_FMT_RGB32_LE:
-    case AOM_IMG_FMT_ARGB:
-    case AOM_IMG_FMT_ARGB_LE: bps = 32; break;
-    case AOM_IMG_FMT_RGB24:
-    case AOM_IMG_FMT_BGR24: bps = 24; break;
-    case AOM_IMG_FMT_RGB565:
-    case AOM_IMG_FMT_RGB565_LE:
-    case AOM_IMG_FMT_RGB555:
-    case AOM_IMG_FMT_RGB555_LE:
-    case AOM_IMG_FMT_UYVY:
-    case AOM_IMG_FMT_YUY2:
-    case AOM_IMG_FMT_YVYU: bps = 16; break;
-    case AOM_IMG_FMT_I420:
-    case AOM_IMG_FMT_YV12:
-    case AOM_IMG_FMT_AOMI420:
-    case AOM_IMG_FMT_AOMYV12: bps = 12; break;
-    case AOM_IMG_FMT_I422:
-    case AOM_IMG_FMT_I440: bps = 16; break;
-    case AOM_IMG_FMT_I444: bps = 24; break;
-    case AOM_IMG_FMT_I42016: bps = 24; break;
-    case AOM_IMG_FMT_I42216:
-    case AOM_IMG_FMT_I44016: bps = 32; break;
-    case AOM_IMG_FMT_I44416: bps = 48; break;
-    default: bps = 16; break;
-  }
-
-  /* Get chroma shift values for this format */
-  switch (fmt) {
-    case AOM_IMG_FMT_I420:
-    case AOM_IMG_FMT_YV12:
-    case AOM_IMG_FMT_AOMI420:
-    case AOM_IMG_FMT_AOMYV12:
-    case AOM_IMG_FMT_I422:
-    case AOM_IMG_FMT_I42016:
-    case AOM_IMG_FMT_I42216: xcs = 1; break;
-    default: xcs = 0; break;
-  }
-
-  switch (fmt) {
-    case AOM_IMG_FMT_I420:
-    case AOM_IMG_FMT_I440:
-    case AOM_IMG_FMT_YV12:
-    case AOM_IMG_FMT_AOMI420:
-    case AOM_IMG_FMT_AOMYV12:
-    case AOM_IMG_FMT_I42016:
-    case AOM_IMG_FMT_I44016: ycs = 1; break;
-    default: ycs = 0; break;
-  }
-
-  /* Calculate storage sizes given the chroma subsampling */
-  align = (1 << xcs) - 1;
-  w = (d_w + align) & ~align;
-  align = (1 << ycs) - 1;
-  h = (d_h + align) & ~align;
-  s = (fmt & AOM_IMG_FMT_PLANAR) ? w : bps * w / 8;
-  s = (s + stride_align - 1) & ~(stride_align - 1);
-  stride_in_bytes = (fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s;
-
-  /* Allocate the new image */
-  if (!img) {
-    img = (aom_image_t *)calloc(1, sizeof(aom_image_t));
-
-    if (!img) goto fail;
-
-    img->self_allocd = 1;
-  } else {
-    memset(img, 0, sizeof(aom_image_t));
-  }
-
-  img->img_data = img_data;
-
-  if (!img_data) {
-    const uint64_t alloc_size = (fmt & AOM_IMG_FMT_PLANAR)
-                                    ? (uint64_t)h * s * bps / 8
-                                    : (uint64_t)h * s;
-
-    if (alloc_size != (size_t)alloc_size) goto fail;
-
-    img->img_data = (uint8_t *)aom_memalign(buf_align, (size_t)alloc_size);
-    img->img_data_owner = 1;
-  }
-
-  if (!img->img_data) goto fail;
-
-  img->fmt = fmt;
-  img->bit_depth = (fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 16 : 8;
-  img->w = w;
-  img->h = h;
-  img->x_chroma_shift = xcs;
-  img->y_chroma_shift = ycs;
-  img->bps = bps;
-
-  /* Calculate strides */
-  img->stride[AOM_PLANE_Y] = img->stride[AOM_PLANE_ALPHA] = stride_in_bytes;
-  img->stride[AOM_PLANE_U] = img->stride[AOM_PLANE_V] = stride_in_bytes >> xcs;
-
-  /* Default viewport to entire image */
-  if (!aom_img_set_rect(img, 0, 0, d_w, d_h)) return img;
-
-fail:
-  aom_img_free(img);
-  return NULL;
-}
-
-aom_image_t *aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt,
-                           unsigned int d_w, unsigned int d_h,
-                           unsigned int align) {
-  return img_alloc_helper(img, fmt, d_w, d_h, align, align, NULL);
-}
-
-aom_image_t *aom_img_wrap(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w,
-                          unsigned int d_h, unsigned int stride_align,
-                          unsigned char *img_data) {
-  /* By setting buf_align = 1, we don't change buffer alignment in this
-   * function. */
-  return img_alloc_helper(img, fmt, d_w, d_h, 1, stride_align, img_data);
-}
-
-int aom_img_set_rect(aom_image_t *img, unsigned int x, unsigned int y,
-                     unsigned int w, unsigned int h) {
-  unsigned char *data;
-
-  if (x + w <= img->w && y + h <= img->h) {
-    img->d_w = w;
-    img->d_h = h;
-
-    /* Calculate plane pointers */
-    if (!(img->fmt & AOM_IMG_FMT_PLANAR)) {
-      img->planes[AOM_PLANE_PACKED] =
-          img->img_data + x * img->bps / 8 + y * img->stride[AOM_PLANE_PACKED];
-    } else {
-      const int bytes_per_sample =
-          (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
-      data = img->img_data;
-
-      if (img->fmt & AOM_IMG_FMT_HAS_ALPHA) {
-        img->planes[AOM_PLANE_ALPHA] =
-            data + x * bytes_per_sample + y * img->stride[AOM_PLANE_ALPHA];
-        data += img->h * img->stride[AOM_PLANE_ALPHA];
-      }
-
-      img->planes[AOM_PLANE_Y] =
-          data + x * bytes_per_sample + y * img->stride[AOM_PLANE_Y];
-      data += img->h * img->stride[AOM_PLANE_Y];
-
-      if (!(img->fmt & AOM_IMG_FMT_UV_FLIP)) {
-        img->planes[AOM_PLANE_U] =
-            data + (x >> img->x_chroma_shift) * bytes_per_sample +
-            (y >> img->y_chroma_shift) * img->stride[AOM_PLANE_U];
-        data += (img->h >> img->y_chroma_shift) * img->stride[AOM_PLANE_U];
-        img->planes[AOM_PLANE_V] =
-            data + (x >> img->x_chroma_shift) * bytes_per_sample +
-            (y >> img->y_chroma_shift) * img->stride[AOM_PLANE_V];
-      } else {
-        img->planes[AOM_PLANE_V] =
-            data + (x >> img->x_chroma_shift) * bytes_per_sample +
-            (y >> img->y_chroma_shift) * img->stride[AOM_PLANE_V];
-        data += (img->h >> img->y_chroma_shift) * img->stride[AOM_PLANE_V];
-        img->planes[AOM_PLANE_U] =
-            data + (x >> img->x_chroma_shift) * bytes_per_sample +
-            (y >> img->y_chroma_shift) * img->stride[AOM_PLANE_U];
-      }
-    }
-    return 0;
-  }
-  return -1;
-}
-
-void aom_img_flip(aom_image_t *img) {
-  /* Note: In the calculation pointer adjustment calculation, we want the
-   * rhs to be promoted to a signed type. Section 6.3.1.8 of the ISO C99
-   * standard indicates that if the adjustment parameter is unsigned, the
-   * stride parameter will be promoted to unsigned, causing errors when
-   * the lhs is a larger type than the rhs.
-   */
-  img->planes[AOM_PLANE_Y] += (signed)(img->d_h - 1) * img->stride[AOM_PLANE_Y];
-  img->stride[AOM_PLANE_Y] = -img->stride[AOM_PLANE_Y];
-
-  img->planes[AOM_PLANE_U] += (signed)((img->d_h >> img->y_chroma_shift) - 1) *
-                              img->stride[AOM_PLANE_U];
-  img->stride[AOM_PLANE_U] = -img->stride[AOM_PLANE_U];
-
-  img->planes[AOM_PLANE_V] += (signed)((img->d_h >> img->y_chroma_shift) - 1) *
-                              img->stride[AOM_PLANE_V];
-  img->stride[AOM_PLANE_V] = -img->stride[AOM_PLANE_V];
-
-  img->planes[AOM_PLANE_ALPHA] +=
-      (signed)(img->d_h - 1) * img->stride[AOM_PLANE_ALPHA];
-  img->stride[AOM_PLANE_ALPHA] = -img->stride[AOM_PLANE_ALPHA];
-}
-
-void aom_img_free(aom_image_t *img) {
-  if (img) {
-    if (img->img_data && img->img_data_owner) aom_free(img->img_data);
-
-    if (img->self_allocd) free(img);
-  }
-}
--- a/aom_dsp/add_noise.c
+++ b/aom_dsp/add_noise.c
@@ -1,72 +0,0 @@
-/*
- *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-#include <stdlib.h>
-
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-void aom_plane_add_noise_c(uint8_t *start, char *noise, char blackclamp[16],
-                           char whiteclamp[16], char bothclamp[16],
-                           unsigned int width, unsigned int height, int pitch) {
-  unsigned int i, j;
-
-  for (i = 0; i < height; ++i) {
-    uint8_t *pos = start + i * pitch;
-    char *ref = (char *)(noise + (rand() & 0xff));  // NOLINT
-
-    for (j = 0; j < width; ++j) {
-      int v = pos[j];
-
-      v = clamp(v - blackclamp[0], 0, 255);
-      v = clamp(v + bothclamp[0], 0, 255);
-      v = clamp(v - whiteclamp[0], 0, 255);
-
-      pos[j] = v + ref[j];
-    }
-  }
-}
-
-static double gaussian(double sigma, double mu, double x) {
-  return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
-         (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
-}
-
-int aom_setup_noise(double sigma, int size, char *noise) {
-  char char_dist[256];
-  int next = 0, i, j;
-
-  // set up a 256 entry lookup that matches gaussian distribution
-  for (i = -32; i < 32; ++i) {
-    const int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i));
-    if (a_i) {
-      for (j = 0; j < a_i; ++j) {
-        char_dist[next + j] = (char)i;
-      }
-      next = next + j;
-    }
-  }
-
-  // Rounding error - might mean we have less than 256.
-  for (; next < 256; ++next) {
-    char_dist[next] = 0;
-  }
-
-  for (i = 0; i < size; ++i) {
-    noise[i] = char_dist[rand() & 0xff];  // NOLINT
-  }
-
-  // Returns the highest non 0 value used in distribution.
-  return -char_dist[0];
-}
--- a/aom_dsp/ans.c
+++ b/aom_dsp/ans.c
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/ans.h"
-#include "aom_dsp/prob.h"
-
-static int find_largest(const aom_cdf_prob *const pdf_tab, int num_syms) {
-  int largest_idx = -1;
-  int largest_p = -1;
-  int i;
-  for (i = 0; i < num_syms; ++i) {
-    int p = pdf_tab[i];
-    if (p > largest_p) {
-      largest_p = p;
-      largest_idx = i;
-    }
-  }
-  return largest_idx;
-}
-
-void aom_rans_merge_prob8_pdf(aom_cdf_prob *const out_pdf,
-                              const AnsP8 node_prob,
-                              const aom_cdf_prob *const src_pdf, int in_syms) {
-  int i;
-  int adjustment = RANS_PRECISION;
-  const int round_fact = ANS_P8_PRECISION >> 1;
-  const AnsP8 p1 = ANS_P8_PRECISION - node_prob;
-  const int out_syms = in_syms + 1;
-  assert(src_pdf != out_pdf);
-
-  out_pdf[0] = node_prob << (RANS_PROB_BITS - ANS_P8_SHIFT);
-  adjustment -= out_pdf[0];
-  for (i = 0; i < in_syms; ++i) {
-    int p = (p1 * src_pdf[i] + round_fact) >> ANS_P8_SHIFT;
-    p = AOMMIN(p, (int)RANS_PRECISION - in_syms);
-    p = AOMMAX(p, 1);
-    out_pdf[i + 1] = p;
-    adjustment -= p;
-  }
-
-  // Adjust probabilities so they sum to the total probability
-  if (adjustment > 0) {
-    i = find_largest(out_pdf, out_syms);
-    out_pdf[i] += adjustment;
-  } else {
-    while (adjustment < 0) {
-      i = find_largest(out_pdf, out_syms);
-      --out_pdf[i];
-      assert(out_pdf[i] > 0);
-      adjustment++;
-    }
-  }
-}
--- a/aom_dsp/ans.h
+++ b/aom_dsp/ans.h
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_ANS_H_
-#define AOM_DSP_ANS_H_
-// Constants, types and utilities for Asymmetric Numeral Systems
-// http://arxiv.org/abs/1311.2540v2
-
-#include <assert.h>
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/prob.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-typedef uint8_t AnsP8;
-#define ANS_P8_PRECISION 256u
-#define ANS_P8_SHIFT 8
-#define RANS_PROB_BITS 15
-#define RANS_PRECISION (1u << RANS_PROB_BITS)
-
-// L_BASE % PRECISION must be 0. Increasing L_BASE beyond 2**15 will cause uabs
-// to overflow.
-#define L_BASE (RANS_PRECISION)
-#define IO_BASE 256
-// Range I = { L_BASE, L_BASE + 1, ..., L_BASE * IO_BASE - 1 }
-
-void aom_rans_merge_prob8_pdf(aom_cdf_prob *const out_pdf,
-                              const AnsP8 node_prob,
-                              const aom_cdf_prob *const src_pdf, int in_syms);
-#ifdef __cplusplus
-}  // extern "C"
-#endif  // __cplusplus
-#endif  // AOM_DSP_ANS_H_
--- a/aom_dsp/ansreader.h
+++ b/aom_dsp/ansreader.h
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_ANSREADER_H_
-#define AOM_DSP_ANSREADER_H_
-// A uABS and rANS decoder implementation of Asymmetric Numeral Systems
-// http://arxiv.org/abs/1311.2540v2
-
-#include <assert.h>
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/prob.h"
-#include "aom_dsp/ans.h"
-#include "aom_ports/mem_ops.h"
-#if CONFIG_ACCOUNTING
-#include "av1/common/accounting.h"
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-struct AnsDecoder {
-  const uint8_t *buf;
-  int buf_offset;
-  uint32_t state;
-#if CONFIG_ACCOUNTING
-  Accounting *accounting;
-#endif
-};
-
-static INLINE int uabs_read(struct AnsDecoder *ans, AnsP8 p0) {
-  AnsP8 p = ANS_P8_PRECISION - p0;
-  int s;
-  unsigned xp, sp;
-  unsigned state = ans->state;
-  while (state < L_BASE && ans->buf_offset > 0) {
-    state = state * IO_BASE + ans->buf[--ans->buf_offset];
-  }
-  sp = state * p;
-  xp = sp / ANS_P8_PRECISION;
-  s = (sp & 0xFF) >= p0;
-  if (s)
-    ans->state = xp;
-  else
-    ans->state = state - xp;
-  return s;
-}
-
-static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
-  int s;
-  unsigned state = ans->state;
-  while (state < L_BASE && ans->buf_offset > 0) {
-    state = state * IO_BASE + ans->buf[--ans->buf_offset];
-  }
-  s = (int)(state & 1);
-  ans->state = state >> 1;
-  return s;
-}
-
-struct rans_dec_sym {
-  uint8_t val;
-  aom_cdf_prob prob;
-  aom_cdf_prob cum_prob;  // not-inclusive
-};
-
-static INLINE void fetch_sym(struct rans_dec_sym *out, const aom_cdf_prob *cdf,
-                             aom_cdf_prob rem) {
-  int i;
-  aom_cdf_prob cum_prob = 0, top_prob;
-  // TODO(skal): if critical, could be a binary search.
-  // Or, better, an O(1) alias-table.
-  for (i = 0; rem >= (top_prob = cdf[i]); ++i) {
-    cum_prob = top_prob;
-  }
-  out->val = i;
-  out->prob = top_prob - cum_prob;
-  out->cum_prob = cum_prob;
-}
-
-static INLINE int rans_read(struct AnsDecoder *ans, const aom_cdf_prob *tab) {
-  unsigned rem;
-  unsigned quo;
-  struct rans_dec_sym sym;
-  while (ans->state < L_BASE && ans->buf_offset > 0) {
-    ans->state = ans->state * IO_BASE + ans->buf[--ans->buf_offset];
-  }
-  quo = ans->state / RANS_PRECISION;
-  rem = ans->state % RANS_PRECISION;
-  fetch_sym(&sym, tab, rem);
-  ans->state = quo * sym.prob + rem - sym.cum_prob;
-  return sym.val;
-}
-
-static INLINE int ans_read_init(struct AnsDecoder *const ans,
-                                const uint8_t *const buf, int offset) {
-  unsigned x;
-  if (offset < 1) return 1;
-  ans->buf = buf;
-  x = buf[offset - 1] >> 6;
-  if (x == 0) {
-    ans->buf_offset = offset - 1;
-    ans->state = buf[offset - 1] & 0x3F;
-  } else if (x == 1) {
-    if (offset < 2) return 1;
-    ans->buf_offset = offset - 2;
-    ans->state = mem_get_le16(buf + offset - 2) & 0x3FFF;
-  } else if (x == 2) {
-    if (offset < 3) return 1;
-    ans->buf_offset = offset - 3;
-    ans->state = mem_get_le24(buf + offset - 3) & 0x3FFFFF;
-  } else if ((buf[offset - 1] & 0xE0) == 0xE0) {
-    if (offset < 4) return 1;
-    ans->buf_offset = offset - 4;
-    ans->state = mem_get_le32(buf + offset - 4) & 0x1FFFFFFF;
-  } else {
-    // 110xxxxx implies this byte is a superframe marker
-    return 1;
-  }
-#if CONFIG_ACCOUNTING
-  ans->accounting = NULL;
-#endif
-  ans->state += L_BASE;
-  if (ans->state >= L_BASE * IO_BASE) return 1;
-  return 0;
-}
-
-static INLINE int ans_read_end(struct AnsDecoder *const ans) {
-  return ans->state == L_BASE;
-}
-
-static INLINE int ans_reader_has_error(const struct AnsDecoder *const ans) {
-  return ans->state < L_BASE && ans->buf_offset == 0;
-}
-#ifdef __cplusplus
-}  // extern "C"
-#endif  // __cplusplus
-#endif  // AOM_DSP_ANSREADER_H_
--- a/aom_dsp/answriter.h
+++ b/aom_dsp/answriter.h
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_ANSWRITER_H_
-#define AOM_DSP_ANSWRITER_H_
-// A uABS and rANS encoder implementation of Asymmetric Numeral Systems
-// http://arxiv.org/abs/1311.2540v2
-
-#include <assert.h>
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/ans.h"
-#include "aom_dsp/prob.h"
-#include "aom_ports/mem_ops.h"
-#include "av1/common/odintrin.h"
-
-#if RANS_PRECISION <= OD_DIVU_DMAX
-#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
-  do {                                                     \
-    quotient = OD_DIVU_SMALL((dividend), (divisor));       \
-    remainder = (dividend) - (quotient) * (divisor);       \
-  } while (0)
-#else
-#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
-  do {                                                     \
-    quotient = (dividend) / (divisor);                     \
-    remainder = (dividend) % (divisor);                    \
-  } while (0)
-#endif
-
-#define ANS_DIV8(dividend, divisor) OD_DIVU_SMALL((dividend), (divisor))
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-struct AnsCoder {
-  uint8_t *buf;
-  int buf_offset;
-  uint32_t state;
-};
-
-static INLINE void ans_write_init(struct AnsCoder *const ans,
-                                  uint8_t *const buf) {
-  ans->buf = buf;
-  ans->buf_offset = 0;
-  ans->state = L_BASE;
-}
-
-static INLINE int ans_write_end(struct AnsCoder *const ans) {
-  uint32_t state;
-  assert(ans->state >= L_BASE);
-  assert(ans->state < L_BASE * IO_BASE);
-  state = ans->state - L_BASE;
-  if (state < (1 << 6)) {
-    ans->buf[ans->buf_offset] = (0x00 << 6) + state;
-    return ans->buf_offset + 1;
-  } else if (state < (1 << 14)) {
-    mem_put_le16(ans->buf + ans->buf_offset, (0x01 << 14) + state);
-    return ans->buf_offset + 2;
-  } else if (state < (1 << 22)) {
-    mem_put_le24(ans->buf + ans->buf_offset, (0x02 << 22) + state);
-    return ans->buf_offset + 3;
-  } else if (state < (1 << 29)) {
-    mem_put_le32(ans->buf + ans->buf_offset, (0x07 << 29) + state);
-    return ans->buf_offset + 4;
-  } else {
-    assert(0 && "State is too large to be serialized");
-    return ans->buf_offset;
-  }
-}
-
-// uABS with normalization
-static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) {
-  AnsP8 p = ANS_P8_PRECISION - p0;
-  const unsigned l_s = val ? p : p0;
-  while (ans->state >= L_BASE / ANS_P8_PRECISION * IO_BASE * l_s) {
-    ans->buf[ans->buf_offset++] = ans->state % IO_BASE;
-    ans->state /= IO_BASE;
-  }
-  if (!val)
-    ans->state = ANS_DIV8(ans->state * ANS_P8_PRECISION, p0);
-  else
-    ans->state = ANS_DIV8((ans->state + 1) * ANS_P8_PRECISION + p - 1, p) - 1;
-}
-
-struct rans_sym {
-  aom_cdf_prob prob;
-  aom_cdf_prob cum_prob;  // not-inclusive
-};
-
-// rANS with normalization
-// sym->prob takes the place of l_s from the paper
-// ANS_P10_PRECISION is m
-static INLINE void rans_write(struct AnsCoder *ans,
-                              const struct rans_sym *const sym) {
-  const aom_cdf_prob p = sym->prob;
-  unsigned quot, rem;
-  while (ans->state >= L_BASE / RANS_PRECISION * IO_BASE * p) {
-    ans->buf[ans->buf_offset++] = ans->state % IO_BASE;
-    ans->state /= IO_BASE;
-  }
-  ANS_DIVREM(quot, rem, ans->state, p);
-  ans->state = quot * RANS_PRECISION + rem + sym->cum_prob;
-}
-
-#undef ANS_DIV8
-#undef ANS_DIVREM
-#ifdef __cplusplus
-}  // extern "C"
-#endif  // __cplusplus
-#endif  // AOM_DSP_ANSWRITER_H_
--- a/aom_dsp/aom_convolve.h
+++ b/aom_dsp/aom_convolve.h
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_DSP_AOM_CONVOLVE_H_
-#define AOM_DSP_AOM_CONVOLVE_H_
-
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Note: Fixed size intermediate buffers, place limits on parameters
-// of some functions. 2d filtering proceeds in 2 steps:
-//   (1) Interpolate horizontally into an intermediate buffer, temp.
-//   (2) Interpolate temp vertically to derive the sub-pixel result.
-// Deriving the maximum number of rows in the temp buffer (135):
-// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
-// --Largest block size is 64x64 pixels.
-// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
-//   original frame (in 1/16th pixel units).
-// --Must round-up because block may be located at sub-pixel position.
-// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
-// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
-#define MAX_EXT_SIZE 263
-#else
-#define MAX_EXT_SIZE 135
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
-
-typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
-                              uint8_t *dst, ptrdiff_t dst_stride,
-                              const int16_t *filter_x, int x_step_q4,
-                              const int16_t *filter_y, int y_step_q4, int w,
-                              int h);
-
-#if CONFIG_AOM_HIGHBITDEPTH
-typedef void (*highbd_convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
-                                     uint8_t *dst, ptrdiff_t dst_stride,
-                                     const int16_t *filter_x, int x_step_q4,
-                                     const int16_t *filter_y, int y_step_q4,
-                                     int w, int h, int bd);
-#endif
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_AOM_CONVOLVE_H_
--- a/aom_dsp/aom_dsp_common.h
+++ b/aom_dsp/aom_dsp_common.h
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_AOM_DSP_COMMON_H_
-#define AOM_DSP_AOM_DSP_COMMON_H_
-
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef MAX_SB_SIZE
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
-#define MAX_SB_SIZE 128
-#else
-#define MAX_SB_SIZE 64
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
-#endif  // ndef MAX_SB_SIZE
-
-#define AOMMIN(x, y) (((x) < (y)) ? (x) : (y))
-#define AOMMAX(x, y) (((x) > (y)) ? (x) : (y))
-
-#define IMPLIES(a, b) (!(a) || (b))  //  Logical 'a implies b' (or 'a -> b')
-
-#define IS_POWER_OF_TWO(x) (((x) & ((x)-1)) == 0)
-
-// These can be used to give a hint about branch outcomes.
-// This can have an effect, even if your target processor has a
-// good branch predictor, as these hints can affect basic block
-// ordering by the compiler.
-#ifdef __GNUC__
-#define LIKELY(v) __builtin_expect(v, 1)
-#define UNLIKELY(v) __builtin_expect(v, 0)
-#else
-#define LIKELY(v) (v)
-#define UNLIKELY(v) (v)
-#endif
-
-#define AOM_SWAP(type, a, b) \
-  do {                       \
-    type c = (b);            \
-    b = a;                   \
-    a = c;                   \
-  } while (0)
-
-#if CONFIG_AOM_QM
-typedef uint16_t qm_val_t;
-#define AOM_QM_BITS 6
-#endif
-#if CONFIG_AOM_HIGHBITDEPTH
-// Note:
-// tran_low_t  is the datatype used for final transform coefficients.
-// tran_high_t is the datatype used for intermediate transform stages.
-typedef int64_t tran_high_t;
-typedef int32_t tran_low_t;
-#else
-// Note:
-// tran_low_t  is the datatype used for final transform coefficients.
-// tran_high_t is the datatype used for intermediate transform stages.
-typedef int32_t tran_high_t;
-typedef int16_t tran_low_t;
-#endif  // CONFIG_AOM_HIGHBITDEPTH
-
-static INLINE uint8_t clip_pixel(int val) {
-  return (val > 255) ? 255 : (val < 0) ? 0 : val;
-}
-
-static INLINE int clamp(int value, int low, int high) {
-  return value < low ? low : (value > high ? high : value);
-}
-
-static INLINE double fclamp(double value, double low, double high) {
-  return value < low ? low : (value > high ? high : value);
-}
-
-#if CONFIG_AOM_HIGHBITDEPTH
-static INLINE uint16_t clip_pixel_highbd(int val, int bd) {
-  switch (bd) {
-    case 8:
-    default: return (uint16_t)clamp(val, 0, 255);
-    case 10: return (uint16_t)clamp(val, 0, 1023);
-    case 12: return (uint16_t)clamp(val, 0, 4095);
-  }
-}
-#endif  // CONFIG_AOM_HIGHBITDEPTH
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_AOM_DSP_COMMON_H_
--- a/aom_dsp/aom_dsp_rtcd.c
+++ b/aom_dsp/aom_dsp_rtcd.c
@@ -1,16 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "./aom_config.h"
-#define RTCD_C
-#include "./aom_dsp_rtcd.h"
-#include "aom_ports/aom_once.h"
-
-void aom_dsp_rtcd() { once(setup_rtcd_internal); }
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
--- a/aom_dsp/aom_filter.h
+++ b/aom_dsp/aom_filter.h
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_AOM_FILTER_H_
-#define AOM_DSP_AOM_FILTER_H_
-
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define FILTER_BITS 7
-
-#define SUBPEL_BITS 4
-#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
-#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
-#define SUBPEL_TAPS 8
-
-typedef int16_t InterpKernel[SUBPEL_TAPS];
-
-#define BIL_SUBPEL_BITS 3
-#define BIL_SUBPEL_SHIFTS (1 << BIL_SUBPEL_BITS)
-
-// 2 tap bilinear filters
-static const uint8_t bilinear_filters_2t[BIL_SUBPEL_SHIFTS][2] = {
-  { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
-  { 64, 64 }, { 48, 80 },  { 32, 96 }, { 16, 112 },
-};
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_AOM_FILTER_H_
--- a/aom_dsp/aom_simd.c
+++ b/aom_dsp/aom_simd.c
@@ -1,13 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Set to 1 to add some sanity checks in the fallback C code
-const int simd_check = 1;
--- a/aom_dsp/aom_simd.h
+++ b/aom_dsp/aom_simd.h
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_AOM_AOM_SIMD_H_
-#define AOM_DSP_AOM_AOM_SIMD_H_
-
-#include <stdint.h>
-
-#if defined(_WIN32)
-#include <intrin.h>
-#endif
-
-#include "./aom_config.h"
-#include "./aom_simd_inline.h"
-
-#if HAVE_NEON
-#include "simd/v256_intrinsics_arm.h"
-#elif HAVE_SSE2
-#include "simd/v256_intrinsics_x86.h"
-#else
-#include "simd/v256_intrinsics.h"
-#endif
-
-#endif  // AOM_DSP_AOM_AOM_SIMD_H_
--- a/aom_dsp/aom_simd_inline.h
+++ b/aom_dsp/aom_simd_inline.h
@@ -1,21 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_AOM_SIMD_INLINE_H_
-#define AOM_DSP_AOM_SIMD_INLINE_H_
-
-#include "aom/aom_integer.h"
-
-#ifndef SIMD_INLINE
-#define SIMD_INLINE static AOM_FORCE_INLINE
-#endif
-
-#endif  // AOM_DSP_AOM_SIMD_INLINE_H_
--- a/aom_dsp/arm/aom_convolve_neon.c
+++ b/aom_dsp/arm/aom_convolve_neon.c
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "./aom_dsp_rtcd.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-
-void aom_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
-                        ptrdiff_t dst_stride, const int16_t *filter_x,
-                        int x_step_q4, const int16_t *filter_y, int y_step_q4,
-                        int w, int h) {
-  /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
-   * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
-   */
-  DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);
-
-  // Account for the vertical phase needing 3 lines prior and 4 lines post
-  int intermediate_height = h + 7;
-
-  assert(y_step_q4 == 16);
-  assert(x_step_q4 == 16);
-
-  /* Filter starting 3 lines back. The neon implementation will ignore the
-   * given height and filter a multiple of 4 lines. Since this goes in to
-   * the temp buffer which has lots of extra room and is subsequently discarded
-   * this is safe if somewhat less than ideal.
-   */
-  aom_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, 64, filter_x,
-                           x_step_q4, filter_y, y_step_q4, w,
-                           intermediate_height);
-
-  /* Step into the temp buffer 3 lines to get the actual frame data */
-  aom_convolve8_vert_neon(temp + 64 * 3, 64, dst, dst_stride, filter_x,
-                          x_step_q4, filter_y, y_step_q4, w, h);
-}
-
-void aom_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
-                            uint8_t *dst, ptrdiff_t dst_stride,
-                            const int16_t *filter_x, int x_step_q4,
-                            const int16_t *filter_y, int y_step_q4, int w,
-                            int h) {
-  DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);
-  int intermediate_height = h + 7;
-
-  assert(y_step_q4 == 16);
-  assert(x_step_q4 == 16);
-
-  /* This implementation has the same issues as above. In addition, we only want
-   * to average the values after both passes.
-   */
-  aom_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, 64, filter_x,
-                           x_step_q4, filter_y, y_step_q4, w,
-                           intermediate_height);
-  aom_convolve8_avg_vert_neon(temp + 64 * 3, 64, dst, dst_stride, filter_x,
-                              x_step_q4, filter_y, y_step_q4, w, h);
-}
--- a/aom_dsp/arm/idct16x16_1_add_neon.c
+++ b/aom_dsp/arm/idct16x16_1_add_neon.c
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "aom_dsp/inv_txfm.h"
-#include "aom_ports/mem.h"
-
-void aom_idct16x16_1_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
-  uint8x8_t d2u8, d3u8, d30u8, d31u8;
-  uint64x1_t d2u64, d3u64, d4u64, d5u64;
-  uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16;
-  int16x8_t q0s16;
-  uint8_t *d1, *d2;
-  int16_t i, j, a1;
-  int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
-  out = dct_const_round_shift(out * cospi_16_64);
-  a1 = ROUND_POWER_OF_TWO(out, 6);
-
-  q0s16 = vdupq_n_s16(a1);
-  q0u16 = vreinterpretq_u16_s16(q0s16);
-
-  for (d1 = d2 = dest, i = 0; i < 4; i++) {
-    for (j = 0; j < 2; j++) {
-      d2u64 = vld1_u64((const uint64_t *)d1);
-      d3u64 = vld1_u64((const uint64_t *)(d1 + 8));
-      d1 += dest_stride;
-      d4u64 = vld1_u64((const uint64_t *)d1);
-      d5u64 = vld1_u64((const uint64_t *)(d1 + 8));
-      d1 += dest_stride;
-
-      q9u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d2u64));
-      q10u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d3u64));
-      q11u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d4u64));
-      q12u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d5u64));
-
-      d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
-      d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
-      d30u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
-      d31u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16));
-
-      vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
-      vst1_u64((uint64_t *)(d2 + 8), vreinterpret_u64_u8(d3u8));
-      d2 += dest_stride;
-      vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d30u8));
-      vst1_u64((uint64_t *)(d2 + 8), vreinterpret_u64_u8(d31u8));
-      d2 += dest_stride;
-    }
-  }
-  return;
-}
--- a/aom_dsp/arm/idct16x16_add_neon.c
+++ b/aom_dsp/arm/idct16x16_add_neon.c
--- a/aom_dsp/arm/idct16x16_neon.c
+++ b/aom_dsp/arm/idct16x16_neon.c
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_dsp_common.h"
-
-void aom_idct16x16_256_add_neon_pass1(const int16_t *input, int16_t *output,
-                                      int output_stride);
-void aom_idct16x16_256_add_neon_pass2(const int16_t *src, int16_t *output,
-                                      int16_t *pass1Output, int16_t skip_adding,
-                                      uint8_t *dest, int dest_stride);
-void aom_idct16x16_10_add_neon_pass1(const int16_t *input, int16_t *output,
-                                     int output_stride);
-void aom_idct16x16_10_add_neon_pass2(const int16_t *src, int16_t *output,
-                                     int16_t *pass1Output, int16_t skip_adding,
-                                     uint8_t *dest, int dest_stride);
-
-#if HAVE_NEON_ASM
-/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
-extern void aom_push_neon(int64_t *store);
-extern void aom_pop_neon(int64_t *store);
-#endif  // HAVE_NEON_ASM
-
-void aom_idct16x16_256_add_neon(const int16_t *input, uint8_t *dest,
-                                int dest_stride) {
-#if HAVE_NEON_ASM
-  int64_t store_reg[8];
-#endif
-  int16_t pass1_output[16 * 16] = { 0 };
-  int16_t row_idct_output[16 * 16] = { 0 };
-
-#if HAVE_NEON_ASM
-  // save d8-d15 register values.
-  aom_push_neon(store_reg);
-#endif
-
-  /* Parallel idct on the upper 8 rows */
-  // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
-  // stage 6 result in pass1_output.
-  aom_idct16x16_256_add_neon_pass1(input, pass1_output, 8);
-
-  // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
-  // with result in pass1(pass1_output) to calculate final result in stage 7
-  // which will be saved into row_idct_output.
-  aom_idct16x16_256_add_neon_pass2(input + 1, row_idct_output, pass1_output, 0,
-                                   dest, dest_stride);
-
-  /* Parallel idct on the lower 8 rows */
-  // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
-  // stage 6 result in pass1_output.
-  aom_idct16x16_256_add_neon_pass1(input + 8 * 16, pass1_output, 8);
-
-  // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
-  // with result in pass1(pass1_output) to calculate final result in stage 7
-  // which will be saved into row_idct_output.
-  aom_idct16x16_256_add_neon_pass2(input + 8 * 16 + 1, row_idct_output + 8,
-                                   pass1_output, 0, dest, dest_stride);
-
-  /* Parallel idct on the left 8 columns */
-  // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
-  // stage 6 result in pass1_output.
-  aom_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
-
-  // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
-  // with result in pass1(pass1_output) to calculate final result in stage 7.
-  // Then add the result to the destination data.
-  aom_idct16x16_256_add_neon_pass2(row_idct_output + 1, row_idct_output,
-                                   pass1_output, 1, dest, dest_stride);
-
-  /* Parallel idct on the right 8 columns */
-  // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
-  // stage 6 result in pass1_output.
-  aom_idct16x16_256_add_neon_pass1(row_idct_output + 8 * 16, pass1_output, 8);
-
-  // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
-  // with result in pass1(pass1_output) to calculate final result in stage 7.
-  // Then add the result to the destination data.
-  aom_idct16x16_256_add_neon_pass2(row_idct_output + 8 * 16 + 1,
-                                   row_idct_output + 8, pass1_output, 1,
-                                   dest + 8, dest_stride);
-
-#if HAVE_NEON_ASM
-  // restore d8-d15 register values.
-  aom_pop_neon(store_reg);
-#endif
-
-  return;
-}
-
-void aom_idct16x16_10_add_neon(const int16_t *input, uint8_t *dest,
-                               int dest_stride) {
-#if HAVE_NEON_ASM
-  int64_t store_reg[8];
-#endif
-  int16_t pass1_output[16 * 16] = { 0 };
-  int16_t row_idct_output[16 * 16] = { 0 };
-
-#if HAVE_NEON_ASM
-  // save d8-d15 register values.
-  aom_push_neon(store_reg);
-#endif
-
-  /* Parallel idct on the upper 8 rows */
-  // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
-  // stage 6 result in pass1_output.
-  aom_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
-
-  // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
-  // with result in pass1(pass1_output) to calculate final result in stage 7
-  // which will be saved into row_idct_output.
-  aom_idct16x16_10_add_neon_pass2(input + 1, row_idct_output, pass1_output, 0,
-                                  dest, dest_stride);
-
-  /* Skip Parallel idct on the lower 8 rows as they are all 0s */
-
-  /* Parallel idct on the left 8 columns */
-  // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
-  // stage 6 result in pass1_output.
-  aom_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
-
-  // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
-  // with result in pass1(pass1_output) to calculate final result in stage 7.
-  // Then add the result to the destination data.
-  aom_idct16x16_256_add_neon_pass2(row_idct_output + 1, row_idct_output,
-                                   pass1_output, 1, dest, dest_stride);
-
-  /* Parallel idct on the right 8 columns */
-  // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
-  // stage 6 result in pass1_output.
-  aom_idct16x16_256_add_neon_pass1(row_idct_output + 8 * 16, pass1_output, 8);
-
-  // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
-  // with result in pass1(pass1_output) to calculate final result in stage 7.
-  // Then add the result to the destination data.
-  aom_idct16x16_256_add_neon_pass2(row_idct_output + 8 * 16 + 1,
-                                   row_idct_output + 8, pass1_output, 1,
-                                   dest + 8, dest_stride);
-
-#if HAVE_NEON_ASM
-  // restore d8-d15 register values.
-  aom_pop_neon(store_reg);
-#endif
-
-  return;
-}
--- a/aom_dsp/arm/idct32x32_1_add_neon.c
+++ b/aom_dsp/arm/idct32x32_1_add_neon.c
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "./aom_config.h"
-
-#include "aom_dsp/inv_txfm.h"
-#include "aom_ports/mem.h"
-
-static INLINE void LD_16x8(uint8_t *d, int d_stride, uint8x16_t *q8u8,
-                           uint8x16_t *q9u8, uint8x16_t *q10u8,
-                           uint8x16_t *q11u8, uint8x16_t *q12u8,
-                           uint8x16_t *q13u8, uint8x16_t *q14u8,
-                           uint8x16_t *q15u8) {
-  *q8u8 = vld1q_u8(d);
-  d += d_stride;
-  *q9u8 = vld1q_u8(d);
-  d += d_stride;
-  *q10u8 = vld1q_u8(d);
-  d += d_stride;
-  *q11u8 = vld1q_u8(d);
-  d += d_stride;
-  *q12u8 = vld1q_u8(d);
-  d += d_stride;
-  *q13u8 = vld1q_u8(d);
-  d += d_stride;
-  *q14u8 = vld1q_u8(d);
-  d += d_stride;
-  *q15u8 = vld1q_u8(d);
-  return;
-}
-
-static INLINE void ADD_DIFF_16x8(uint8x16_t qdiffu8, uint8x16_t *q8u8,
-                                 uint8x16_t *q9u8, uint8x16_t *q10u8,
-                                 uint8x16_t *q11u8, uint8x16_t *q12u8,
-                                 uint8x16_t *q13u8, uint8x16_t *q14u8,
-                                 uint8x16_t *q15u8) {
-  *q8u8 = vqaddq_u8(*q8u8, qdiffu8);
-  *q9u8 = vqaddq_u8(*q9u8, qdiffu8);
-  *q10u8 = vqaddq_u8(*q10u8, qdiffu8);
-  *q11u8 = vqaddq_u8(*q11u8, qdiffu8);
-  *q12u8 = vqaddq_u8(*q12u8, qdiffu8);
-  *q13u8 = vqaddq_u8(*q13u8, qdiffu8);
-  *q14u8 = vqaddq_u8(*q14u8, qdiffu8);
-  *q15u8 = vqaddq_u8(*q15u8, qdiffu8);
-  return;
-}
-
-static INLINE void SUB_DIFF_16x8(uint8x16_t qdiffu8, uint8x16_t *q8u8,
-                                 uint8x16_t *q9u8, uint8x16_t *q10u8,
-                                 uint8x16_t *q11u8, uint8x16_t *q12u8,
-                                 uint8x16_t *q13u8, uint8x16_t *q14u8,
-                                 uint8x16_t *q15u8) {
-  *q8u8 = vqsubq_u8(*q8u8, qdiffu8);
-  *q9u8 = vqsubq_u8(*q9u8, qdiffu8);
-  *q10u8 = vqsubq_u8(*q10u8, qdiffu8);
-  *q11u8 = vqsubq_u8(*q11u8, qdiffu8);
-  *q12u8 = vqsubq_u8(*q12u8, qdiffu8);
-  *q13u8 = vqsubq_u8(*q13u8, qdiffu8);
-  *q14u8 = vqsubq_u8(*q14u8, qdiffu8);
-  *q15u8 = vqsubq_u8(*q15u8, qdiffu8);
-  return;
-}
-
-static INLINE void ST_16x8(uint8_t *d, int d_stride, uint8x16_t *q8u8,
-                           uint8x16_t *q9u8, uint8x16_t *q10u8,
-                           uint8x16_t *q11u8, uint8x16_t *q12u8,
-                           uint8x16_t *q13u8, uint8x16_t *q14u8,
-                           uint8x16_t *q15u8) {
-  vst1q_u8(d, *q8u8);
-  d += d_stride;
-  vst1q_u8(d, *q9u8);
-  d += d_stride;
-  vst1q_u8(d, *q10u8);
-  d += d_stride;
-  vst1q_u8(d, *q11u8);
-  d += d_stride;
-  vst1q_u8(d, *q12u8);
-  d += d_stride;
-  vst1q_u8(d, *q13u8);
-  d += d_stride;
-  vst1q_u8(d, *q14u8);
-  d += d_stride;
-  vst1q_u8(d, *q15u8);
-  return;
-}
-
-void aom_idct32x32_1_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
-  uint8x16_t q0u8, q8u8, q9u8, q10u8, q11u8, q12u8, q13u8, q14u8, q15u8;
-  int i, j, dest_stride8;
-  uint8_t *d;
-  int16_t a1;
-  int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
-
-  out = dct_const_round_shift(out * cospi_16_64);
-  a1 = ROUND_POWER_OF_TWO(out, 6);
-
-  dest_stride8 = dest_stride * 8;
-  if (a1 >= 0) {  // diff_positive_32_32
-    a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1;
-    q0u8 = vdupq_n_u8(a1);
-    for (i = 0; i < 2; i++, dest += 16) {  // diff_positive_32_32_loop
-      d = dest;
-      for (j = 0; j < 4; j++) {
-        LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8,
-                &q14u8, &q15u8);
-        ADD_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8,
-                      &q14u8, &q15u8);
-        ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8,
-                &q14u8, &q15u8);
-        d += dest_stride8;
-      }
-    }
-  } else {  // diff_negative_32_32
-    a1 = -a1;
-    a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1;
-    q0u8 = vdupq_n_u8(a1);
-    for (i = 0; i < 2; i++, dest += 16) {  // diff_negative_32_32_loop
-      d = dest;
-      for (j = 0; j < 4; j++) {
-        LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8,
-                &q14u8, &q15u8);
-        SUB_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8,
-                      &q14u8, &q15u8);
-        ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8,
-                &q14u8, &q15u8);
-        d += dest_stride8;
-      }
-    }
-  }
-  return;
-}
--- a/aom_dsp/arm/idct32x32_add_neon.c
+++ b/aom_dsp/arm/idct32x32_add_neon.c
@@ -1,686 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "./aom_config.h"
-#include "aom_dsp/txfm_common.h"
-
-#define LOAD_FROM_TRANSPOSED(prev, first, second) \
-  q14s16 = vld1q_s16(trans_buf + first * 8);      \
-  q13s16 = vld1q_s16(trans_buf + second * 8);
-
-#define LOAD_FROM_OUTPUT(prev, first, second, qA, qB) \
-  qA = vld1q_s16(out + first * 32);                   \
-  qB = vld1q_s16(out + second * 32);
-
-#define STORE_IN_OUTPUT(prev, first, second, qA, qB) \
-  vst1q_s16(out + first * 32, qA);                   \
-  vst1q_s16(out + second * 32, qB);
-
-#define STORE_COMBINE_CENTER_RESULTS(r10, r9) \
-  __STORE_COMBINE_CENTER_RESULTS(r10, r9, stride, q6s16, q7s16, q8s16, q9s16);
-static INLINE void __STORE_COMBINE_CENTER_RESULTS(uint8_t *p1, uint8_t *p2,
-                                                  int stride, int16x8_t q6s16,
-                                                  int16x8_t q7s16,
-                                                  int16x8_t q8s16,
-                                                  int16x8_t q9s16) {
-  int16x4_t d8s16, d9s16, d10s16, d11s16;
-
-  d8s16 = vld1_s16((int16_t *)p1);
-  p1 += stride;
-  d11s16 = vld1_s16((int16_t *)p2);
-  p2 -= stride;
-  d9s16 = vld1_s16((int16_t *)p1);
-  d10s16 = vld1_s16((int16_t *)p2);
-
-  q7s16 = vrshrq_n_s16(q7s16, 6);
-  q8s16 = vrshrq_n_s16(q8s16, 6);
-  q9s16 = vrshrq_n_s16(q9s16, 6);
-  q6s16 = vrshrq_n_s16(q6s16, 6);
-
-  q7s16 = vreinterpretq_s16_u16(
-      vaddw_u8(vreinterpretq_u16_s16(q7s16), vreinterpret_u8_s16(d9s16)));
-  q8s16 = vreinterpretq_s16_u16(
-      vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_s16(d10s16)));
-  q9s16 = vreinterpretq_s16_u16(
-      vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_s16(d11s16)));
-  q6s16 = vreinterpretq_s16_u16(
-      vaddw_u8(vreinterpretq_u16_s16(q6s16), vreinterpret_u8_s16(d8s16)));
-
-  d9s16 = vreinterpret_s16_u8(vqmovun_s16(q7s16));
-  d10s16 = vreinterpret_s16_u8(vqmovun_s16(q8s16));
-  d11s16 = vreinterpret_s16_u8(vqmovun_s16(q9s16));
-  d8s16 = vreinterpret_s16_u8(vqmovun_s16(q6s16));
-
-  vst1_s16((int16_t *)p1, d9s16);
-  p1 -= stride;
-  vst1_s16((int16_t *)p2, d10s16);
-  p2 += stride;
-  vst1_s16((int16_t *)p1, d8s16);
-  vst1_s16((int16_t *)p2, d11s16);
-  return;
-}
-
-#define STORE_COMBINE_EXTREME_RESULTS(r7, r6) \
-  ;                                           \
-  __STORE_COMBINE_EXTREME_RESULTS(r7, r6, stride, q4s16, q5s16, q6s16, q7s16);
-static INLINE void __STORE_COMBINE_EXTREME_RESULTS(uint8_t *p1, uint8_t *p2,
-                                                   int stride, int16x8_t q4s16,
-                                                   int16x8_t q5s16,
-                                                   int16x8_t q6s16,
-                                                   int16x8_t q7s16) {
-  int16x4_t d4s16, d5s16, d6s16, d7s16;
-
-  d4s16 = vld1_s16((int16_t *)p1);
-  p1 += stride;
-  d7s16 = vld1_s16((int16_t *)p2);
-  p2 -= stride;
-  d5s16 = vld1_s16((int16_t *)p1);
-  d6s16 = vld1_s16((int16_t *)p2);
-
-  q5s16 = vrshrq_n_s16(q5s16, 6);
-  q6s16 = vrshrq_n_s16(q6s16, 6);
-  q7s16 = vrshrq_n_s16(q7s16, 6);
-  q4s16 = vrshrq_n_s16(q4s16, 6);
-
-  q5s16 = vreinterpretq_s16_u16(
-      vaddw_u8(vreinterpretq_u16_s16(q5s16), vreinterpret_u8_s16(d5s16)));
-  q6s16 = vreinterpretq_s16_u16(
-      vaddw_u8(vreinterpretq_u16_s16(q6s16), vreinterpret_u8_s16(d6s16)));
-  q7s16 = vreinterpretq_s16_u16(
-      vaddw_u8(vreinterpretq_u16_s16(q7s16), vreinterpret_u8_s16(d7s16)));
-  q4s16 = vreinterpretq_s16_u16(
-      vaddw_u8(vreinterpretq_u16_s16(q4s16), vreinterpret_u8_s16(d4s16)));
-
-  d5s16 = vreinterpret_s16_u8(vqmovun_s16(q5s16));
-  d6s16 = vreinterpret_s16_u8(vqmovun_s16(q6s16));
-  d7s16 = vreinterpret_s16_u8(vqmovun_s16(q7s16));
-  d4s16 = vreinterpret_s16_u8(vqmovun_s16(q4s16));
-
-  vst1_s16((int16_t *)p1, d5s16);
-  p1 -= stride;
-  vst1_s16((int16_t *)p2, d6s16);
-  p2 += stride;
-  vst1_s16((int16_t *)p2, d7s16);
-  vst1_s16((int16_t *)p1, d4s16);
-  return;
-}
-
-#define DO_BUTTERFLY_STD(const_1, const_2, qA, qB) \
-  DO_BUTTERFLY(q14s16, q13s16, const_1, const_2, qA, qB);
-static INLINE void DO_BUTTERFLY(int16x8_t q14s16, int16x8_t q13s16,
-                                int16_t first_const, int16_t second_const,
-                                int16x8_t *qAs16, int16x8_t *qBs16) {
-  int16x4_t d30s16, d31s16;
-  int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q15s32;
-  int16x4_t dCs16, dDs16, dAs16, dBs16;
-
-  dCs16 = vget_low_s16(q14s16);
-  dDs16 = vget_high_s16(q14s16);
-  dAs16 = vget_low_s16(q13s16);
-  dBs16 = vget_high_s16(q13s16);
-
-  d30s16 = vdup_n_s16(first_const);
-  d31s16 = vdup_n_s16(second_const);
-
-  q8s32 = vmull_s16(dCs16, d30s16);
-  q10s32 = vmull_s16(dAs16, d31s16);
-  q9s32 = vmull_s16(dDs16, d30s16);
-  q11s32 = vmull_s16(dBs16, d31s16);
-  q12s32 = vmull_s16(dCs16, d31s16);
-
-  q8s32 = vsubq_s32(q8s32, q10s32);
-  q9s32 = vsubq_s32(q9s32, q11s32);
-
-  q10s32 = vmull_s16(dDs16, d31s16);
-  q11s32 = vmull_s16(dAs16, d30s16);
-  q15s32 = vmull_s16(dBs16, d30s16);
-
-  q11s32 = vaddq_s32(q12s32, q11s32);
-  q10s32 = vaddq_s32(q10s32, q15s32);
-
-  *qAs16 = vcombine_s16(vqrshrn_n_s32(q8s32, 14), vqrshrn_n_s32(q9s32, 14));
-  *qBs16 = vcombine_s16(vqrshrn_n_s32(q11s32, 14), vqrshrn_n_s32(q10s32, 14));
-  return;
-}
-
-static INLINE void idct32_transpose_pair(int16_t *input, int16_t *t_buf) {
-  int16_t *in;
-  int i;
-  const int stride = 32;
-  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
-  int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
-  int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
-  int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32;
-  int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16;
-
-  for (i = 0; i < 4; i++, input += 8) {
-    in = input;
-    q8s16 = vld1q_s16(in);
-    in += stride;
-    q9s16 = vld1q_s16(in);
-    in += stride;
-    q10s16 = vld1q_s16(in);
-    in += stride;
-    q11s16 = vld1q_s16(in);
-    in += stride;
-    q12s16 = vld1q_s16(in);
-    in += stride;
-    q13s16 = vld1q_s16(in);
-    in += stride;
-    q14s16 = vld1q_s16(in);
-    in += stride;
-    q15s16 = vld1q_s16(in);
-
-    d16s16 = vget_low_s16(q8s16);
-    d17s16 = vget_high_s16(q8s16);
-    d18s16 = vget_low_s16(q9s16);
-    d19s16 = vget_high_s16(q9s16);
-    d20s16 = vget_low_s16(q10s16);
-    d21s16 = vget_high_s16(q10s16);
-    d22s16 = vget_low_s16(q11s16);
-    d23s16 = vget_high_s16(q11s16);
-    d24s16 = vget_low_s16(q12s16);
-    d25s16 = vget_high_s16(q12s16);
-    d26s16 = vget_low_s16(q13s16);
-    d27s16 = vget_high_s16(q13s16);
-    d28s16 = vget_low_s16(q14s16);
-    d29s16 = vget_high_s16(q14s16);
-    d30s16 = vget_low_s16(q15s16);
-    d31s16 = vget_high_s16(q15s16);
-
-    q8s16 = vcombine_s16(d16s16, d24s16);   // vswp d17, d24
-    q9s16 = vcombine_s16(d18s16, d26s16);   // vswp d19, d26
-    q10s16 = vcombine_s16(d20s16, d28s16);  // vswp d21, d28
-    q11s16 = vcombine_s16(d22s16, d30s16);  // vswp d23, d30
-    q12s16 = vcombine_s16(d17s16, d25s16);
-    q13s16 = vcombine_s16(d19s16, d27s16);
-    q14s16 = vcombine_s16(d21s16, d29s16);
-    q15s16 = vcombine_s16(d23s16, d31s16);
-
-    q0x2s32 =
-        vtrnq_s32(vreinterpretq_s32_s16(q8s16), vreinterpretq_s32_s16(q10s16));
-    q1x2s32 =
-        vtrnq_s32(vreinterpretq_s32_s16(q9s16), vreinterpretq_s32_s16(q11s16));
-    q2x2s32 =
-        vtrnq_s32(vreinterpretq_s32_s16(q12s16), vreinterpretq_s32_s16(q14s16));
-    q3x2s32 =
-        vtrnq_s32(vreinterpretq_s32_s16(q13s16), vreinterpretq_s32_s16(q15s16));
-
-    q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]),   // q8
-                        vreinterpretq_s16_s32(q1x2s32.val[0]));  // q9
-    q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]),   // q10
-                        vreinterpretq_s16_s32(q1x2s32.val[1]));  // q11
-    q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]),   // q12
-                        vreinterpretq_s16_s32(q3x2s32.val[0]));  // q13
-    q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]),   // q14
-                        vreinterpretq_s16_s32(q3x2s32.val[1]));  // q15
-
-    vst1q_s16(t_buf, q0x2s16.val[0]);
-    t_buf += 8;
-    vst1q_s16(t_buf, q0x2s16.val[1]);
-    t_buf += 8;
-    vst1q_s16(t_buf, q1x2s16.val[0]);
-    t_buf += 8;
-    vst1q_s16(t_buf, q1x2s16.val[1]);
-    t_buf += 8;
-    vst1q_s16(t_buf, q2x2s16.val[0]);
-    t_buf += 8;
-    vst1q_s16(t_buf, q2x2s16.val[1]);
-    t_buf += 8;
-    vst1q_s16(t_buf, q3x2s16.val[0]);
-    t_buf += 8;
-    vst1q_s16(t_buf, q3x2s16.val[1]);
-    t_buf += 8;
-  }
-  return;
-}
-
-static INLINE void idct32_bands_end_1st_pass(int16_t *out, int16x8_t q2s16,
-                                             int16x8_t q3s16, int16x8_t q6s16,
-                                             int16x8_t q7s16, int16x8_t q8s16,
-                                             int16x8_t q9s16, int16x8_t q10s16,
-                                             int16x8_t q11s16, int16x8_t q12s16,
-                                             int16x8_t q13s16, int16x8_t q14s16,
-                                             int16x8_t q15s16) {
-  int16x8_t q0s16, q1s16, q4s16, q5s16;
-
-  STORE_IN_OUTPUT(17, 16, 17, q6s16, q7s16);
-  STORE_IN_OUTPUT(17, 14, 15, q8s16, q9s16);
-
-  LOAD_FROM_OUTPUT(15, 30, 31, q0s16, q1s16);
-  q4s16 = vaddq_s16(q2s16, q1s16);
-  q5s16 = vaddq_s16(q3s16, q0s16);
-  q6s16 = vsubq_s16(q3s16, q0s16);
-  q7s16 = vsubq_s16(q2s16, q1s16);
-  STORE_IN_OUTPUT(31, 30, 31, q6s16, q7s16);
-  STORE_IN_OUTPUT(31, 0, 1, q4s16, q5s16);
-
-  LOAD_FROM_OUTPUT(1, 12, 13, q0s16, q1s16);
-  q2s16 = vaddq_s16(q10s16, q1s16);
-  q3s16 = vaddq_s16(q11s16, q0s16);
-  q4s16 = vsubq_s16(q11s16, q0s16);
-  q5s16 = vsubq_s16(q10s16, q1s16);
-
-  LOAD_FROM_OUTPUT(13, 18, 19, q0s16, q1s16);
-  q8s16 = vaddq_s16(q4s16, q1s16);
-  q9s16 = vaddq_s16(q5s16, q0s16);
-  q6s16 = vsubq_s16(q5s16, q0s16);
-  q7s16 = vsubq_s16(q4s16, q1s16);
-  STORE_IN_OUTPUT(19, 18, 19, q6s16, q7s16);
-  STORE_IN_OUTPUT(19, 12, 13, q8s16, q9s16);
-
-  LOAD_FROM_OUTPUT(13, 28, 29, q0s16, q1s16);
-  q4s16 = vaddq_s16(q2s16, q1s16);
-  q5s16 = vaddq_s16(q3s16, q0s16);
-  q6s16 = vsubq_s16(q3s16, q0s16);
-  q7s16 = vsubq_s16(q2s16, q1s16);
-  STORE_IN_OUTPUT(29, 28, 29, q6s16, q7s16);
-  STORE_IN_OUTPUT(29, 2, 3, q4s16, q5s16);
-
-  LOAD_FROM_OUTPUT(3, 10, 11, q0s16, q1s16);
-  q2s16 = vaddq_s16(q12s16, q1s16);
-  q3s16 = vaddq_s16(q13s16, q0s16);
-  q4s16 = vsubq_s16(q13s16, q0s16);
-  q5s16 = vsubq_s16(q12s16, q1s16);
-
-  LOAD_FROM_OUTPUT(11, 20, 21, q0s16, q1s16);
-  q8s16 = vaddq_s16(q4s16, q1s16);
-  q9s16 = vaddq_s16(q5s16, q0s16);
-  q6s16 = vsubq_s16(q5s16, q0s16);
-  q7s16 = vsubq_s16(q4s16, q1s16);
-  STORE_IN_OUTPUT(21, 20, 21, q6s16, q7s16);
-  STORE_IN_OUTPUT(21, 10, 11, q8s16, q9s16);
-
-  LOAD_FROM_OUTPUT(11, 26, 27, q0s16, q1s16);
-  q4s16 = vaddq_s16(q2s16, q1s16);
-  q5s16 = vaddq_s16(q3s16, q0s16);
-  q6s16 = vsubq_s16(q3s16, q0s16);
-  q7s16 = vsubq_s16(q2s16, q1s16);
-  STORE_IN_OUTPUT(27, 26, 27, q6s16, q7s16);
-  STORE_IN_OUTPUT(27, 4, 5, q4s16, q5s16);
-
-  LOAD_FROM_OUTPUT(5, 8, 9, q0s16, q1s16);
-  q2s16 = vaddq_s16(q14s16, q1s16);
-  q3s16 = vaddq_s16(q15s16, q0s16);
-  q4s16 = vsubq_s16(q15s16, q0s16);
-  q5s16 = vsubq_s16(q14s16, q1s16);
-
-  LOAD_FROM_OUTPUT(9, 22, 23, q0s16, q1s16);
-  q8s16 = vaddq_s16(q4s16, q1s16);
-  q9s16 = vaddq_s16(q5s16, q0s16);
-  q6s16 = vsubq_s16(q5s16, q0s16);
-  q7s16 = vsubq_s16(q4s16, q1s16);
-  STORE_IN_OUTPUT(23, 22, 23, q6s16, q7s16);
-  STORE_IN_OUTPUT(23, 8, 9, q8s16, q9s16);
-
-  LOAD_FROM_OUTPUT(9, 24, 25, q0s16, q1s16);
-  q4s16 = vaddq_s16(q2s16, q1s16);
-  q5s16 = vaddq_s16(q3s16, q0s16);
-  q6s16 = vsubq_s16(q3s16, q0s16);
-  q7s16 = vsubq_s16(q2s16, q1s16);
-  STORE_IN_OUTPUT(25, 24, 25, q6s16, q7s16);
-  STORE_IN_OUTPUT(25, 6, 7, q4s16, q5s16);
-  return;
-}
-
-static INLINE void idct32_bands_end_2nd_pass(
-    int16_t *out, uint8_t *dest, int stride, int16x8_t q2s16, int16x8_t q3s16,
-    int16x8_t q6s16, int16x8_t q7s16, int16x8_t q8s16, int16x8_t q9s16,
-    int16x8_t q10s16, int16x8_t q11s16, int16x8_t q12s16, int16x8_t q13s16,
-    int16x8_t q14s16, int16x8_t q15s16) {
-  uint8_t *r6 = dest + 31 * stride;
-  uint8_t *r7 = dest /* +  0 * stride*/;
-  uint8_t *r9 = dest + 15 * stride;
-  uint8_t *r10 = dest + 16 * stride;
-  int str2 = stride << 1;
-  int16x8_t q0s16, q1s16, q4s16, q5s16;
-
-  STORE_COMBINE_CENTER_RESULTS(r10, r9);
-  r10 += str2;
-  r9 -= str2;
-
-  LOAD_FROM_OUTPUT(17, 30, 31, q0s16, q1s16)
-  q4s16 = vaddq_s16(q2s16, q1s16);
-  q5s16 = vaddq_s16(q3s16, q0s16);
-  q6s16 = vsubq_s16(q3s16, q0s16);
-  q7s16 = vsubq_s16(q2s16, q1s16);
-  STORE_COMBINE_EXTREME_RESULTS(r7, r6);
-  r7 += str2;
-  r6 -= str2;
-
-  LOAD_FROM_OUTPUT(31, 12, 13, q0s16, q1s16)
-  q2s16 = vaddq_s16(q10s16, q1s16);
-  q3s16 = vaddq_s16(q11s16, q0s16);
-  q4s16 = vsubq_s16(q11s16, q0s16);
-  q5s16 = vsubq_s16(q10s16, q1s16);
-
-  LOAD_FROM_OUTPUT(13, 18, 19, q0s16, q1s16)
-  q8s16 = vaddq_s16(q4s16, q1s16);
-  q9s16 = vaddq_s16(q5s16, q0s16);
-  q6s16 = vsubq_s16(q5s16, q0s16);
-  q7s16 = vsubq_s16(q4s16, q1s16);
-  STORE_COMBINE_CENTER_RESULTS(r10, r9);
-  r10 += str2;
-  r9 -= str2;
-
-  LOAD_FROM_OUTPUT(19, 28, 29, q0s16, q1s16)
-  q4s16 = vaddq_s16(q2s16, q1s16);
-  q5s16 = vaddq_s16(q3s16, q0s16);
-  q6s16 = vsubq_s16(q3s16, q0s16);
-  q7s16 = vsubq_s16(q2s16, q1s16);
-  STORE_COMBINE_EXTREME_RESULTS(r7, r6);
-  r7 += str2;
-  r6 -= str2;
-
-  LOAD_FROM_OUTPUT(29, 10, 11, q0s16, q1s16)
-  q2s16 = vaddq_s16(q12s16, q1s16);
-  q3s16 = vaddq_s16(q13s16, q0s16);
-  q4s16 = vsubq_s16(q13s16, q0s16);
-  q5s16 = vsubq_s16(q12s16, q1s16);
-
-  LOAD_FROM_OUTPUT(11, 20, 21, q0s16, q1s16)
-  q8s16 = vaddq_s16(q4s16, q1s16);
-  q9s16 = vaddq_s16(q5s16, q0s16);
-  q6s16 = vsubq_s16(q5s16, q0s16);
-  q7s16 = vsubq_s16(q4s16, q1s16);
-  STORE_COMBINE_CENTER_RESULTS(r10, r9);
-  r10 += str2;
-  r9 -= str2;
-
-  LOAD_FROM_OUTPUT(21, 26, 27, q0s16, q1s16)
-  q4s16 = vaddq_s16(q2s16, q1s16);
-  q5s16 = vaddq_s16(q3s16, q0s16);
-  q6s16 = vsubq_s16(q3s16, q0s16);
-  q7s16 = vsubq_s16(q2s16, q1s16);
-  STORE_COMBINE_EXTREME_RESULTS(r7, r6);
-  r7 += str2;
-  r6 -= str2;
-
-  LOAD_FROM_OUTPUT(27, 8, 9, q0s16, q1s16)
-  q2s16 = vaddq_s16(q14s16, q1s16);
-  q3s16 = vaddq_s16(q15s16, q0s16);
-  q4s16 = vsubq_s16(q15s16, q0s16);
-  q5s16 = vsubq_s16(q14s16, q1s16);
-
-  LOAD_FROM_OUTPUT(9, 22, 23, q0s16, q1s16)
-  q8s16 = vaddq_s16(q4s16, q1s16);
-  q9s16 = vaddq_s16(q5s16, q0s16);
-  q6s16 = vsubq_s16(q5s16, q0s16);
-  q7s16 = vsubq_s16(q4s16, q1s16);
-  STORE_COMBINE_CENTER_RESULTS(r10, r9);
-
-  LOAD_FROM_OUTPUT(23, 24, 25, q0s16, q1s16)
-  q4s16 = vaddq_s16(q2s16, q1s16);
-  q5s16 = vaddq_s16(q3s16, q0s16);
-  q6s16 = vsubq_s16(q3s16, q0s16);
-  q7s16 = vsubq_s16(q2s16, q1s16);
-  STORE_COMBINE_EXTREME_RESULTS(r7, r6);
-  return;
-}
-
-void aom_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int stride) {
-  int i, idct32_pass_loop;
-  int16_t trans_buf[32 * 8];
-  int16_t pass1[32 * 32];
-  int16_t pass2[32 * 32];
-  int16_t *out;
-  int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
-  int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
-
-  for (idct32_pass_loop = 0, out = pass1; idct32_pass_loop < 2;
-       idct32_pass_loop++,
-      input = pass1,  // the input of pass2 is the result of pass1
-       out = pass2) {
-    for (i = 0; i < 4; i++, input += 32 * 8, out += 8) {  // idct32_bands_loop
-      idct32_transpose_pair(input, trans_buf);
-
-      // -----------------------------------------
-      // BLOCK A: 16-19,28-31
-      // -----------------------------------------
-      // generate 16,17,30,31
-      // part of stage 1
-      LOAD_FROM_TRANSPOSED(0, 1, 31)
-      DO_BUTTERFLY_STD(cospi_31_64, cospi_1_64, &q0s16, &q2s16)
-      LOAD_FROM_TRANSPOSED(31, 17, 15)
-      DO_BUTTERFLY_STD(cospi_15_64, cospi_17_64, &q1s16, &q3s16)
-      // part of stage 2
-      q4s16 = vaddq_s16(q0s16, q1s16);
-      q13s16 = vsubq_s16(q0s16, q1s16);
-      q6s16 = vaddq_s16(q2s16, q3s16);
-      q14s16 = vsubq_s16(q2s16, q3s16);
-      // part of stage 3
-      DO_BUTTERFLY_STD(cospi_28_64, cospi_4_64, &q5s16, &q7s16)
-
-      // generate 18,19,28,29
-      // part of stage 1
-      LOAD_FROM_TRANSPOSED(15, 9, 23)
-      DO_BUTTERFLY_STD(cospi_23_64, cospi_9_64, &q0s16, &q2s16)
-      LOAD_FROM_TRANSPOSED(23, 25, 7)
-      DO_BUTTERFLY_STD(cospi_7_64, cospi_25_64, &q1s16, &q3s16)
-      // part of stage 2
-      q13s16 = vsubq_s16(q3s16, q2s16);
-      q3s16 = vaddq_s16(q3s16, q2s16);
-      q14s16 = vsubq_s16(q1s16, q0s16);
-      q2s16 = vaddq_s16(q1s16, q0s16);
-      // part of stage 3
-      DO_BUTTERFLY_STD(-cospi_4_64, -cospi_28_64, &q1s16, &q0s16)
-      // part of stage 4
-      q8s16 = vaddq_s16(q4s16, q2s16);
-      q9s16 = vaddq_s16(q5s16, q0s16);
-      q10s16 = vaddq_s16(q7s16, q1s16);
-      q15s16 = vaddq_s16(q6s16, q3s16);
-      q13s16 = vsubq_s16(q5s16, q0s16);
-      q14s16 = vsubq_s16(q7s16, q1s16);
-      STORE_IN_OUTPUT(0, 16, 31, q8s16, q15s16)
-      STORE_IN_OUTPUT(31, 17, 30, q9s16, q10s16)
-      // part of stage 5
-      DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q0s16, &q1s16)
-      STORE_IN_OUTPUT(30, 29, 18, q1s16, q0s16)
-      // part of stage 4
-      q13s16 = vsubq_s16(q4s16, q2s16);
-      q14s16 = vsubq_s16(q6s16, q3s16);
-      // part of stage 5
-      DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q4s16, &q6s16)
-      STORE_IN_OUTPUT(18, 19, 28, q4s16, q6s16)
-
-      // -----------------------------------------
-      // BLOCK B: 20-23,24-27
-      // -----------------------------------------
-      // generate 20,21,26,27
-      // part of stage 1
-      LOAD_FROM_TRANSPOSED(7, 5, 27)
-      DO_BUTTERFLY_STD(cospi_27_64, cospi_5_64, &q0s16, &q2s16)
-      LOAD_FROM_TRANSPOSED(27, 21, 11)
-      DO_BUTTERFLY_STD(cospi_11_64, cospi_21_64, &q1s16, &q3s16)
-      // part of stage 2
-      q13s16 = vsubq_s16(q0s16, q1s16);
-      q0s16 = vaddq_s16(q0s16, q1s16);
-      q14s16 = vsubq_s16(q2s16, q3s16);
-      q2s16 = vaddq_s16(q2s16, q3s16);
-      // part of stage 3
-      DO_BUTTERFLY_STD(cospi_12_64, cospi_20_64, &q1s16, &q3s16)
-
-      // generate 22,23,24,25
-      // part of stage 1
-      LOAD_FROM_TRANSPOSED(11, 13, 19)
-      DO_BUTTERFLY_STD(cospi_19_64, cospi_13_64, &q5s16, &q7s16)
-      LOAD_FROM_TRANSPOSED(19, 29, 3)
-      DO_BUTTERFLY_STD(cospi_3_64, cospi_29_64, &q4s16, &q6s16)
-      // part of stage 2
-      q14s16 = vsubq_s16(q4s16, q5s16);
-      q5s16 = vaddq_s16(q4s16, q5s16);
-      q13s16 = vsubq_s16(q6s16, q7s16);
-      q6s16 = vaddq_s16(q6s16, q7s16);
-      // part of stage 3
-      DO_BUTTERFLY_STD(-cospi_20_64, -cospi_12_64, &q4s16, &q7s16)
-      // part of stage 4
-      q10s16 = vaddq_s16(q7s16, q1s16);
-      q11s16 = vaddq_s16(q5s16, q0s16);
-      q12s16 = vaddq_s16(q6s16, q2s16);
-      q15s16 = vaddq_s16(q4s16, q3s16);
-      // part of stage 6
-      LOAD_FROM_OUTPUT(28, 16, 17, q14s16, q13s16)
-      q8s16 = vaddq_s16(q14s16, q11s16);
-      q9s16 = vaddq_s16(q13s16, q10s16);
-      q13s16 = vsubq_s16(q13s16, q10s16);
-      q11s16 = vsubq_s16(q14s16, q11s16);
-      STORE_IN_OUTPUT(17, 17, 16, q9s16, q8s16)
-      LOAD_FROM_OUTPUT(16, 30, 31, q14s16, q9s16)
-      q8s16 = vsubq_s16(q9s16, q12s16);
-      q10s16 = vaddq_s16(q14s16, q15s16);
-      q14s16 = vsubq_s16(q14s16, q15s16);
-      q12s16 = vaddq_s16(q9s16, q12s16);
-      STORE_IN_OUTPUT(31, 30, 31, q10s16, q12s16)
-      // part of stage 7
-      DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16)
-      STORE_IN_OUTPUT(31, 25, 22, q14s16, q13s16)
-      q13s16 = q11s16;
-      q14s16 = q8s16;
-      DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16)
-      STORE_IN_OUTPUT(22, 24, 23, q14s16, q13s16)
-      // part of stage 4
-      q14s16 = vsubq_s16(q5s16, q0s16);
-      q13s16 = vsubq_s16(q6s16, q2s16);
-      DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q5s16, &q6s16);
-      q14s16 = vsubq_s16(q7s16, q1s16);
-      q13s16 = vsubq_s16(q4s16, q3s16);
-      DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q0s16, &q1s16);
-      // part of stage 6
-      LOAD_FROM_OUTPUT(23, 18, 19, q14s16, q13s16)
-      q8s16 = vaddq_s16(q14s16, q1s16);
-      q9s16 = vaddq_s16(q13s16, q6s16);
-      q13s16 = vsubq_s16(q13s16, q6s16);
-      q1s16 = vsubq_s16(q14s16, q1s16);
-      STORE_IN_OUTPUT(19, 18, 19, q8s16, q9s16)
-      LOAD_FROM_OUTPUT(19, 28, 29, q8s16, q9s16)
-      q14s16 = vsubq_s16(q8s16, q5s16);
-      q10s16 = vaddq_s16(q8s16, q5s16);
-      q11s16 = vaddq_s16(q9s16, q0s16);
-      q0s16 = vsubq_s16(q9s16, q0s16);
-      STORE_IN_OUTPUT(29, 28, 29, q10s16, q11s16)
-      // part of stage 7
-      DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16)
-      STORE_IN_OUTPUT(29, 20, 27, q13s16, q14s16)
-      DO_BUTTERFLY(q0s16, q1s16, cospi_16_64, cospi_16_64, &q1s16, &q0s16);
-      STORE_IN_OUTPUT(27, 21, 26, q1s16, q0s16)
-
-      // -----------------------------------------
-      // BLOCK C: 8-10,11-15
-      // -----------------------------------------
-      // generate 8,9,14,15
-      // part of stage 2
-      LOAD_FROM_TRANSPOSED(3, 2, 30)
-      DO_BUTTERFLY_STD(cospi_30_64, cospi_2_64, &q0s16, &q2s16)
-      LOAD_FROM_TRANSPOSED(30, 18, 14)
-      DO_BUTTERFLY_STD(cospi_14_64, cospi_18_64, &q1s16, &q3s16)
-      // part of stage 3
-      q13s16 = vsubq_s16(q0s16, q1s16);
-      q0s16 = vaddq_s16(q0s16, q1s16);
-      q14s16 = vsubq_s16(q2s16, q3s16);
-      q2s16 = vaddq_s16(q2s16, q3s16);
-      // part of stage 4
-      DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q1s16, &q3s16)
-
-      // generate 10,11,12,13
-      // part of stage 2
-      LOAD_FROM_TRANSPOSED(14, 10, 22)
-      DO_BUTTERFLY_STD(cospi_22_64, cospi_10_64, &q5s16, &q7s16)
-      LOAD_FROM_TRANSPOSED(22, 26, 6)
-      DO_BUTTERFLY_STD(cospi_6_64, cospi_26_64, &q4s16, &q6s16)
-      // part of stage 3
-      q14s16 = vsubq_s16(q4s16, q5s16);
-      q5s16 = vaddq_s16(q4s16, q5s16);
-      q13s16 = vsubq_s16(q6s16, q7s16);
-      q6s16 = vaddq_s16(q6s16, q7s16);
-      // part of stage 4
-      DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q4s16, &q7s16)
-      // part of stage 5
-      q8s16 = vaddq_s16(q0s16, q5s16);
-      q9s16 = vaddq_s16(q1s16, q7s16);
-      q13s16 = vsubq_s16(q1s16, q7s16);
-      q14s16 = vsubq_s16(q3s16, q4s16);
-      q10s16 = vaddq_s16(q3s16, q4s16);
-      q15s16 = vaddq_s16(q2s16, q6s16);
-      STORE_IN_OUTPUT(26, 8, 15, q8s16, q15s16)
-      STORE_IN_OUTPUT(15, 9, 14, q9s16, q10s16)
-      // part of stage 6
-      DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16)
-      STORE_IN_OUTPUT(14, 13, 10, q3s16, q1s16)
-      q13s16 = vsubq_s16(q0s16, q5s16);
-      q14s16 = vsubq_s16(q2s16, q6s16);
-      DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16)
-      STORE_IN_OUTPUT(10, 11, 12, q1s16, q3s16)
-
-      // -----------------------------------------
-      // BLOCK D: 0-3,4-7
-      // -----------------------------------------
-      // generate 4,5,6,7
-      // part of stage 3
-      LOAD_FROM_TRANSPOSED(6, 4, 28)
-      DO_BUTTERFLY_STD(cospi_28_64, cospi_4_64, &q0s16, &q2s16)
-      LOAD_FROM_TRANSPOSED(28, 20, 12)
-      DO_BUTTERFLY_STD(cospi_12_64, cospi_20_64, &q1s16, &q3s16)
-      // part of stage 4
-      q13s16 = vsubq_s16(q0s16, q1s16);
-      q0s16 = vaddq_s16(q0s16, q1s16);
-      q14s16 = vsubq_s16(q2s16, q3s16);
-      q2s16 = vaddq_s16(q2s16, q3s16);
-      // part of stage 5
-      DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16)
-
-      // generate 0,1,2,3
-      // part of stage 4
-      LOAD_FROM_TRANSPOSED(12, 0, 16)
-      DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q5s16, &q7s16)
-      LOAD_FROM_TRANSPOSED(16, 8, 24)
-      DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q14s16, &q6s16)
-      // part of stage 5
-      q4s16 = vaddq_s16(q7s16, q6s16);
-      q7s16 = vsubq_s16(q7s16, q6s16);
-      q6s16 = vsubq_s16(q5s16, q14s16);
-      q5s16 = vaddq_s16(q5s16, q14s16);
-      // part of stage 6
-      q8s16 = vaddq_s16(q4s16, q2s16);
-      q9s16 = vaddq_s16(q5s16, q3s16);
-      q10s16 = vaddq_s16(q6s16, q1s16);
-      q11s16 = vaddq_s16(q7s16, q0s16);
-      q12s16 = vsubq_s16(q7s16, q0s16);
-      q13s16 = vsubq_s16(q6s16, q1s16);
-      q14s16 = vsubq_s16(q5s16, q3s16);
-      q15s16 = vsubq_s16(q4s16, q2s16);
-      // part of stage 7
-      LOAD_FROM_OUTPUT(12, 14, 15, q0s16, q1s16)
-      q2s16 = vaddq_s16(q8s16, q1s16);
-      q3s16 = vaddq_s16(q9s16, q0s16);
-      q4s16 = vsubq_s16(q9s16, q0s16);
-      q5s16 = vsubq_s16(q8s16, q1s16);
-      LOAD_FROM_OUTPUT(15, 16, 17, q0s16, q1s16)
-      q8s16 = vaddq_s16(q4s16, q1s16);
-      q9s16 = vaddq_s16(q5s16, q0s16);
-      q6s16 = vsubq_s16(q5s16, q0s16);
-      q7s16 = vsubq_s16(q4s16, q1s16);
-
-      if (idct32_pass_loop == 0) {
-        idct32_bands_end_1st_pass(out, q2s16, q3s16, q6s16, q7s16, q8s16, q9s16,
-                                  q10s16, q11s16, q12s16, q13s16, q14s16,
-                                  q15s16);
-      } else {
-        idct32_bands_end_2nd_pass(out, dest, stride, q2s16, q3s16, q6s16, q7s16,
-                                  q8s16, q9s16, q10s16, q11s16, q12s16, q13s16,
-                                  q14s16, q15s16);
-        dest += 8;
-      }
-    }
-  }
-  return;
-}
--- a/aom_dsp/arm/idct4x4_1_add_neon.c
+++ b/aom_dsp/arm/idct4x4_1_add_neon.c
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "aom_dsp/inv_txfm.h"
-#include "aom_ports/mem.h"
-
-void aom_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
-  uint8x8_t d6u8;
-  uint32x2_t d2u32 = vdup_n_u32(0);
-  uint16x8_t q8u16;
-  int16x8_t q0s16;
-  uint8_t *d1, *d2;
-  int16_t i, a1;
-  int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
-  out = dct_const_round_shift(out * cospi_16_64);
-  a1 = ROUND_POWER_OF_TWO(out, 4);
-
-  q0s16 = vdupq_n_s16(a1);
-
-  // dc_only_idct_add
-  d1 = d2 = dest;
-  for (i = 0; i < 2; i++) {
-    d2u32 = vld1_lane_u32((const uint32_t *)d1, d2u32, 0);
-    d1 += dest_stride;
-    d2u32 = vld1_lane_u32((const uint32_t *)d1, d2u32, 1);
-    d1 += dest_stride;
-
-    q8u16 = vaddw_u8(vreinterpretq_u16_s16(q0s16), vreinterpret_u8_u32(d2u32));
-    d6u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
-
-    vst1_lane_u32((uint32_t *)d2, vreinterpret_u32_u8(d6u8), 0);
-    d2 += dest_stride;
-    vst1_lane_u32((uint32_t *)d2, vreinterpret_u32_u8(d6u8), 1);
-    d2 += dest_stride;
-  }
-  return;
-}
--- a/aom_dsp/arm/idct4x4_add_neon.c
+++ b/aom_dsp/arm/idct4x4_add_neon.c
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "aom_dsp/txfm_common.h"
-
-void aom_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
-  uint8x8_t d26u8, d27u8;
-  uint32x2_t d26u32, d27u32;
-  uint16x8_t q8u16, q9u16;
-  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16;
-  int16x4_t d22s16, d23s16, d24s16, d26s16, d27s16, d28s16, d29s16;
-  int16x8_t q8s16, q9s16, q13s16, q14s16;
-  int32x4_t q1s32, q13s32, q14s32, q15s32;
-  int16x4x2_t d0x2s16, d1x2s16;
-  int32x4x2_t q0x2s32;
-  uint8_t *d;
-
-  d26u32 = d27u32 = vdup_n_u32(0);
-
-  q8s16 = vld1q_s16(input);
-  q9s16 = vld1q_s16(input + 8);
-
-  d16s16 = vget_low_s16(q8s16);
-  d17s16 = vget_high_s16(q8s16);
-  d18s16 = vget_low_s16(q9s16);
-  d19s16 = vget_high_s16(q9s16);
-
-  d0x2s16 = vtrn_s16(d16s16, d17s16);
-  d1x2s16 = vtrn_s16(d18s16, d19s16);
-  q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]);
-  q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]);
-
-  d20s16 = vdup_n_s16((int16_t)cospi_8_64);
-  d21s16 = vdup_n_s16((int16_t)cospi_16_64);
-
-  q0x2s32 =
-      vtrnq_s32(vreinterpretq_s32_s16(q8s16), vreinterpretq_s32_s16(q9s16));
-  d16s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[0]));
-  d17s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[0]));
-  d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
-  d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
-
-  d22s16 = vdup_n_s16((int16_t)cospi_24_64);
-
-  // stage 1
-  d23s16 = vadd_s16(d16s16, d18s16);
-  d24s16 = vsub_s16(d16s16, d18s16);
-
-  q15s32 = vmull_s16(d17s16, d22s16);
-  q1s32 = vmull_s16(d17s16, d20s16);
-  q13s32 = vmull_s16(d23s16, d21s16);
-  q14s32 = vmull_s16(d24s16, d21s16);
-
-  q15s32 = vmlsl_s16(q15s32, d19s16, d20s16);
-  q1s32 = vmlal_s16(q1s32, d19s16, d22s16);
-
-  d26s16 = vqrshrn_n_s32(q13s32, 14);
-  d27s16 = vqrshrn_n_s32(q14s32, 14);
-  d29s16 = vqrshrn_n_s32(q15s32, 14);
-  d28s16 = vqrshrn_n_s32(q1s32, 14);
-  q13s16 = vcombine_s16(d26s16, d27s16);
-  q14s16 = vcombine_s16(d28s16, d29s16);
-
-  // stage 2
-  q8s16 = vaddq_s16(q13s16, q14s16);
-  q9s16 = vsubq_s16(q13s16, q14s16);
-
-  d16s16 = vget_low_s16(q8s16);
-  d17s16 = vget_high_s16(q8s16);
-  d18s16 = vget_high_s16(q9s16);  // vswp d18 d19
-  d19s16 = vget_low_s16(q9s16);
-
-  d0x2s16 = vtrn_s16(d16s16, d17s16);
-  d1x2s16 = vtrn_s16(d18s16, d19s16);
-  q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]);
-  q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]);
-
-  q0x2s32 =
-      vtrnq_s32(vreinterpretq_s32_s16(q8s16), vreinterpretq_s32_s16(q9s16));
-  d16s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[0]));
-  d17s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[0]));
-  d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
-  d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
-
-  // do the transform on columns
-  // stage 1
-  d23s16 = vadd_s16(d16s16, d18s16);
-  d24s16 = vsub_s16(d16s16, d18s16);
-
-  q15s32 = vmull_s16(d17s16, d22s16);
-  q1s32 = vmull_s16(d17s16, d20s16);
-  q13s32 = vmull_s16(d23s16, d21s16);
-  q14s32 = vmull_s16(d24s16, d21s16);
-
-  q15s32 = vmlsl_s16(q15s32, d19s16, d20s16);
-  q1s32 = vmlal_s16(q1s32, d19s16, d22s16);
-
-  d26s16 = vqrshrn_n_s32(q13s32, 14);
-  d27s16 = vqrshrn_n_s32(q14s32, 14);
-  d29s16 = vqrshrn_n_s32(q15s32, 14);
-  d28s16 = vqrshrn_n_s32(q1s32, 14);
-  q13s16 = vcombine_s16(d26s16, d27s16);
-  q14s16 = vcombine_s16(d28s16, d29s16);
-
-  // stage 2
-  q8s16 = vaddq_s16(q13s16, q14s16);
-  q9s16 = vsubq_s16(q13s16, q14s16);
-
-  q8s16 = vrshrq_n_s16(q8s16, 4);
-  q9s16 = vrshrq_n_s16(q9s16, 4);
-
-  d = dest;
-  d26u32 = vld1_lane_u32((const uint32_t *)d, d26u32, 0);
-  d += dest_stride;
-  d26u32 = vld1_lane_u32((const uint32_t *)d, d26u32, 1);
-  d += dest_stride;
-  d27u32 = vld1_lane_u32((const uint32_t *)d, d27u32, 1);
-  d += dest_stride;
-  d27u32 = vld1_lane_u32((const uint32_t *)d, d27u32, 0);
-
-  q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32));
-  q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32));
-
-  d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
-  d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
-
-  d = dest;
-  vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d26u8), 0);
-  d += dest_stride;
-  vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d26u8), 1);
-  d += dest_stride;
-  vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d27u8), 1);
-  d += dest_stride;
-  vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d27u8), 0);
-  return;
-}
--- a/aom_dsp/arm/idct8x8_1_add_neon.c
+++ b/aom_dsp/arm/idct8x8_1_add_neon.c
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "aom_dsp/inv_txfm.h"
-#include "aom_ports/mem.h"
-
-void aom_idct8x8_1_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
-  uint8x8_t d2u8, d3u8, d30u8, d31u8;
-  uint64x1_t d2u64, d3u64, d4u64, d5u64;
-  uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16;
-  int16x8_t q0s16;
-  uint8_t *d1, *d2;
-  int16_t i, a1;
-  int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
-  out = dct_const_round_shift(out * cospi_16_64);
-  a1 = ROUND_POWER_OF_TWO(out, 5);
-
-  q0s16 = vdupq_n_s16(a1);
-  q0u16 = vreinterpretq_u16_s16(q0s16);
-
-  d1 = d2 = dest;
-  for (i = 0; i < 2; i++) {
-    d2u64 = vld1_u64((const uint64_t *)d1);
-    d1 += dest_stride;
-    d3u64 = vld1_u64((const uint64_t *)d1);
-    d1 += dest_stride;
-    d4u64 = vld1_u64((const uint64_t *)d1);
-    d1 += dest_stride;
-    d5u64 = vld1_u64((const uint64_t *)d1);
-    d1 += dest_stride;
-
-    q9u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d2u64));
-    q10u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d3u64));
-    q11u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d4u64));
-    q12u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d5u64));
-
-    d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
-    d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
-    d30u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
-    d31u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16));
-
-    vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
-    d2 += dest_stride;
-    vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
-    d2 += dest_stride;
-    vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d30u8));
-    d2 += dest_stride;
-    vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d31u8));
-    d2 += dest_stride;
-  }
-  return;
-}
--- a/aom_dsp/arm/idct8x8_add_neon.c
+++ b/aom_dsp/arm/idct8x8_add_neon.c
@@ -1,509 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "./aom_config.h"
-#include "aom_dsp/txfm_common.h"
-
-static INLINE void TRANSPOSE8X8(int16x8_t *q8s16, int16x8_t *q9s16,
-                                int16x8_t *q10s16, int16x8_t *q11s16,
-                                int16x8_t *q12s16, int16x8_t *q13s16,
-                                int16x8_t *q14s16, int16x8_t *q15s16) {
-  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
-  int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
-  int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32;
-  int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16;
-
-  d16s16 = vget_low_s16(*q8s16);
-  d17s16 = vget_high_s16(*q8s16);
-  d18s16 = vget_low_s16(*q9s16);
-  d19s16 = vget_high_s16(*q9s16);
-  d20s16 = vget_low_s16(*q10s16);
-  d21s16 = vget_high_s16(*q10s16);
-  d22s16 = vget_low_s16(*q11s16);
-  d23s16 = vget_high_s16(*q11s16);
-  d24s16 = vget_low_s16(*q12s16);
-  d25s16 = vget_high_s16(*q12s16);
-  d26s16 = vget_low_s16(*q13s16);
-  d27s16 = vget_high_s16(*q13s16);
-  d28s16 = vget_low_s16(*q14s16);
-  d29s16 = vget_high_s16(*q14s16);
-  d30s16 = vget_low_s16(*q15s16);
-  d31s16 = vget_high_s16(*q15s16);
-
-  *q8s16 = vcombine_s16(d16s16, d24s16);   // vswp d17, d24
-  *q9s16 = vcombine_s16(d18s16, d26s16);   // vswp d19, d26
-  *q10s16 = vcombine_s16(d20s16, d28s16);  // vswp d21, d28
-  *q11s16 = vcombine_s16(d22s16, d30s16);  // vswp d23, d30
-  *q12s16 = vcombine_s16(d17s16, d25s16);
-  *q13s16 = vcombine_s16(d19s16, d27s16);
-  *q14s16 = vcombine_s16(d21s16, d29s16);
-  *q15s16 = vcombine_s16(d23s16, d31s16);
-
-  q0x2s32 =
-      vtrnq_s32(vreinterpretq_s32_s16(*q8s16), vreinterpretq_s32_s16(*q10s16));
-  q1x2s32 =
-      vtrnq_s32(vreinterpretq_s32_s16(*q9s16), vreinterpretq_s32_s16(*q11s16));
-  q2x2s32 =
-      vtrnq_s32(vreinterpretq_s32_s16(*q12s16), vreinterpretq_s32_s16(*q14s16));
-  q3x2s32 =
-      vtrnq_s32(vreinterpretq_s32_s16(*q13s16), vreinterpretq_s32_s16(*q15s16));
-
-  q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]),   // q8
-                      vreinterpretq_s16_s32(q1x2s32.val[0]));  // q9
-  q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]),   // q10
-                      vreinterpretq_s16_s32(q1x2s32.val[1]));  // q11
-  q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]),   // q12
-                      vreinterpretq_s16_s32(q3x2s32.val[0]));  // q13
-  q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]),   // q14
-                      vreinterpretq_s16_s32(q3x2s32.val[1]));  // q15
-
-  *q8s16 = q0x2s16.val[0];
-  *q9s16 = q0x2s16.val[1];
-  *q10s16 = q1x2s16.val[0];
-  *q11s16 = q1x2s16.val[1];
-  *q12s16 = q2x2s16.val[0];
-  *q13s16 = q2x2s16.val[1];
-  *q14s16 = q3x2s16.val[0];
-  *q15s16 = q3x2s16.val[1];
-  return;
-}
-
-static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
-                              int16x8_t *q10s16, int16x8_t *q11s16,
-                              int16x8_t *q12s16, int16x8_t *q13s16,
-                              int16x8_t *q14s16, int16x8_t *q15s16) {
-  int16x4_t d0s16, d1s16, d2s16, d3s16;
-  int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
-  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
-  int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
-  int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
-  int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
-  int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
-
-  d0s16 = vdup_n_s16((int16_t)cospi_28_64);
-  d1s16 = vdup_n_s16((int16_t)cospi_4_64);
-  d2s16 = vdup_n_s16((int16_t)cospi_12_64);
-  d3s16 = vdup_n_s16((int16_t)cospi_20_64);
-
-  d16s16 = vget_low_s16(*q8s16);
-  d17s16 = vget_high_s16(*q8s16);
-  d18s16 = vget_low_s16(*q9s16);
-  d19s16 = vget_high_s16(*q9s16);
-  d20s16 = vget_low_s16(*q10s16);
-  d21s16 = vget_high_s16(*q10s16);
-  d22s16 = vget_low_s16(*q11s16);
-  d23s16 = vget_high_s16(*q11s16);
-  d24s16 = vget_low_s16(*q12s16);
-  d25s16 = vget_high_s16(*q12s16);
-  d26s16 = vget_low_s16(*q13s16);
-  d27s16 = vget_high_s16(*q13s16);
-  d28s16 = vget_low_s16(*q14s16);
-  d29s16 = vget_high_s16(*q14s16);
-  d30s16 = vget_low_s16(*q15s16);
-  d31s16 = vget_high_s16(*q15s16);
-
-  q2s32 = vmull_s16(d18s16, d0s16);
-  q3s32 = vmull_s16(d19s16, d0s16);
-  q5s32 = vmull_s16(d26s16, d2s16);
-  q6s32 = vmull_s16(d27s16, d2s16);
-
-  q2s32 = vmlsl_s16(q2s32, d30s16, d1s16);
-  q3s32 = vmlsl_s16(q3s32, d31s16, d1s16);
-  q5s32 = vmlsl_s16(q5s32, d22s16, d3s16);
-  q6s32 = vmlsl_s16(q6s32, d23s16, d3s16);
-
-  d8s16 = vqrshrn_n_s32(q2s32, 14);
-  d9s16 = vqrshrn_n_s32(q3s32, 14);
-  d10s16 = vqrshrn_n_s32(q5s32, 14);
-  d11s16 = vqrshrn_n_s32(q6s32, 14);
-  q4s16 = vcombine_s16(d8s16, d9s16);
-  q5s16 = vcombine_s16(d10s16, d11s16);
-
-  q2s32 = vmull_s16(d18s16, d1s16);
-  q3s32 = vmull_s16(d19s16, d1s16);
-  q9s32 = vmull_s16(d26s16, d3s16);
-  q13s32 = vmull_s16(d27s16, d3s16);
-
-  q2s32 = vmlal_s16(q2s32, d30s16, d0s16);
-  q3s32 = vmlal_s16(q3s32, d31s16, d0s16);
-  q9s32 = vmlal_s16(q9s32, d22s16, d2s16);
-  q13s32 = vmlal_s16(q13s32, d23s16, d2s16);
-
-  d14s16 = vqrshrn_n_s32(q2s32, 14);
-  d15s16 = vqrshrn_n_s32(q3s32, 14);
-  d12s16 = vqrshrn_n_s32(q9s32, 14);
-  d13s16 = vqrshrn_n_s32(q13s32, 14);
-  q6s16 = vcombine_s16(d12s16, d13s16);
-  q7s16 = vcombine_s16(d14s16, d15s16);
-
-  d0s16 = vdup_n_s16((int16_t)cospi_16_64);
-
-  q2s32 = vmull_s16(d16s16, d0s16);
-  q3s32 = vmull_s16(d17s16, d0s16);
-  q13s32 = vmull_s16(d16s16, d0s16);
-  q15s32 = vmull_s16(d17s16, d0s16);
-
-  q2s32 = vmlal_s16(q2s32, d24s16, d0s16);
-  q3s32 = vmlal_s16(q3s32, d25s16, d0s16);
-  q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
-  q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
-
-  d0s16 = vdup_n_s16((int16_t)cospi_24_64);
-  d1s16 = vdup_n_s16((int16_t)cospi_8_64);
-
-  d18s16 = vqrshrn_n_s32(q2s32, 14);
-  d19s16 = vqrshrn_n_s32(q3s32, 14);
-  d22s16 = vqrshrn_n_s32(q13s32, 14);
-  d23s16 = vqrshrn_n_s32(q15s32, 14);
-  *q9s16 = vcombine_s16(d18s16, d19s16);
-  *q11s16 = vcombine_s16(d22s16, d23s16);
-
-  q2s32 = vmull_s16(d20s16, d0s16);
-  q3s32 = vmull_s16(d21s16, d0s16);
-  q8s32 = vmull_s16(d20s16, d1s16);
-  q12s32 = vmull_s16(d21s16, d1s16);
-
-  q2s32 = vmlsl_s16(q2s32, d28s16, d1s16);
-  q3s32 = vmlsl_s16(q3s32, d29s16, d1s16);
-  q8s32 = vmlal_s16(q8s32, d28s16, d0s16);
-  q12s32 = vmlal_s16(q12s32, d29s16, d0s16);
-
-  d26s16 = vqrshrn_n_s32(q2s32, 14);
-  d27s16 = vqrshrn_n_s32(q3s32, 14);
-  d30s16 = vqrshrn_n_s32(q8s32, 14);
-  d31s16 = vqrshrn_n_s32(q12s32, 14);
-  *q13s16 = vcombine_s16(d26s16, d27s16);
-  *q15s16 = vcombine_s16(d30s16, d31s16);
-
-  q0s16 = vaddq_s16(*q9s16, *q15s16);
-  q1s16 = vaddq_s16(*q11s16, *q13s16);
-  q2s16 = vsubq_s16(*q11s16, *q13s16);
-  q3s16 = vsubq_s16(*q9s16, *q15s16);
-
-  *q13s16 = vsubq_s16(q4s16, q5s16);
-  q4s16 = vaddq_s16(q4s16, q5s16);
-  *q14s16 = vsubq_s16(q7s16, q6s16);
-  q7s16 = vaddq_s16(q7s16, q6s16);
-  d26s16 = vget_low_s16(*q13s16);
-  d27s16 = vget_high_s16(*q13s16);
-  d28s16 = vget_low_s16(*q14s16);
-  d29s16 = vget_high_s16(*q14s16);
-
-  d16s16 = vdup_n_s16((int16_t)cospi_16_64);
-
-  q9s32 = vmull_s16(d28s16, d16s16);
-  q10s32 = vmull_s16(d29s16, d16s16);
-  q11s32 = vmull_s16(d28s16, d16s16);
-  q12s32 = vmull_s16(d29s16, d16s16);
-
-  q9s32 = vmlsl_s16(q9s32, d26s16, d16s16);
-  q10s32 = vmlsl_s16(q10s32, d27s16, d16s16);
-  q11s32 = vmlal_s16(q11s32, d26s16, d16s16);
-  q12s32 = vmlal_s16(q12s32, d27s16, d16s16);
-
-  d10s16 = vqrshrn_n_s32(q9s32, 14);
-  d11s16 = vqrshrn_n_s32(q10s32, 14);
-  d12s16 = vqrshrn_n_s32(q11s32, 14);
-  d13s16 = vqrshrn_n_s32(q12s32, 14);
-  q5s16 = vcombine_s16(d10s16, d11s16);
-  q6s16 = vcombine_s16(d12s16, d13s16);
-
-  *q8s16 = vaddq_s16(q0s16, q7s16);
-  *q9s16 = vaddq_s16(q1s16, q6s16);
-  *q10s16 = vaddq_s16(q2s16, q5s16);
-  *q11s16 = vaddq_s16(q3s16, q4s16);
-  *q12s16 = vsubq_s16(q3s16, q4s16);
-  *q13s16 = vsubq_s16(q2s16, q5s16);
-  *q14s16 = vsubq_s16(q1s16, q6s16);
-  *q15s16 = vsubq_s16(q0s16, q7s16);
-  return;
-}
-
-void aom_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
-  uint8_t *d1, *d2;
-  uint8x8_t d0u8, d1u8, d2u8, d3u8;
-  uint64x1_t d0u64, d1u64, d2u64, d3u64;
-  int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
-  uint16x8_t q8u16, q9u16, q10u16, q11u16;
-
-  q8s16 = vld1q_s16(input);
-  q9s16 = vld1q_s16(input + 8);
-  q10s16 = vld1q_s16(input + 16);
-  q11s16 = vld1q_s16(input + 24);
-  q12s16 = vld1q_s16(input + 32);
-  q13s16 = vld1q_s16(input + 40);
-  q14s16 = vld1q_s16(input + 48);
-  q15s16 = vld1q_s16(input + 56);
-
-  TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
-               &q15s16);
-
-  IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
-             &q15s16);
-
-  TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
-               &q15s16);
-
-  IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
-             &q15s16);
-
-  q8s16 = vrshrq_n_s16(q8s16, 5);
-  q9s16 = vrshrq_n_s16(q9s16, 5);
-  q10s16 = vrshrq_n_s16(q10s16, 5);
-  q11s16 = vrshrq_n_s16(q11s16, 5);
-  q12s16 = vrshrq_n_s16(q12s16, 5);
-  q13s16 = vrshrq_n_s16(q13s16, 5);
-  q14s16 = vrshrq_n_s16(q14s16, 5);
-  q15s16 = vrshrq_n_s16(q15s16, 5);
-
-  d1 = d2 = dest;
-
-  d0u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-  d1u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-  d2u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-  d3u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-
-  q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
-  q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
-  q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
-  q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
-
-  d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
-  d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
-  d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
-  d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
-
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
-  d2 += dest_stride;
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
-  d2 += dest_stride;
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
-  d2 += dest_stride;
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
-  d2 += dest_stride;
-
-  q8s16 = q12s16;
-  q9s16 = q13s16;
-  q10s16 = q14s16;
-  q11s16 = q15s16;
-
-  d0u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-  d1u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-  d2u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-  d3u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-
-  q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
-  q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
-  q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
-  q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
-
-  d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
-  d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
-  d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
-  d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
-
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
-  d2 += dest_stride;
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
-  d2 += dest_stride;
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
-  d2 += dest_stride;
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
-  d2 += dest_stride;
-  return;
-}
-
-void aom_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
-  uint8_t *d1, *d2;
-  uint8x8_t d0u8, d1u8, d2u8, d3u8;
-  int16x4_t d10s16, d11s16, d12s16, d13s16, d16s16;
-  int16x4_t d26s16, d27s16, d28s16, d29s16;
-  uint64x1_t d0u64, d1u64, d2u64, d3u64;
-  int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
-  int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
-  uint16x8_t q8u16, q9u16, q10u16, q11u16;
-  int32x4_t q9s32, q10s32, q11s32, q12s32;
-
-  q8s16 = vld1q_s16(input);
-  q9s16 = vld1q_s16(input + 8);
-  q10s16 = vld1q_s16(input + 16);
-  q11s16 = vld1q_s16(input + 24);
-  q12s16 = vld1q_s16(input + 32);
-  q13s16 = vld1q_s16(input + 40);
-  q14s16 = vld1q_s16(input + 48);
-  q15s16 = vld1q_s16(input + 56);
-
-  TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
-               &q15s16);
-
-  // First transform rows
-  // stage 1
-  q0s16 = vdupq_n_s16((int16_t)cospi_28_64 * 2);
-  q1s16 = vdupq_n_s16((int16_t)cospi_4_64 * 2);
-
-  q4s16 = vqrdmulhq_s16(q9s16, q0s16);
-
-  q0s16 = vdupq_n_s16(-(int16_t)cospi_20_64 * 2);
-
-  q7s16 = vqrdmulhq_s16(q9s16, q1s16);
-
-  q1s16 = vdupq_n_s16((int16_t)cospi_12_64 * 2);
-
-  q5s16 = vqrdmulhq_s16(q11s16, q0s16);
-
-  q0s16 = vdupq_n_s16((int16_t)cospi_16_64 * 2);
-
-  q6s16 = vqrdmulhq_s16(q11s16, q1s16);
-
-  // stage 2 & stage 3 - even half
-  q1s16 = vdupq_n_s16((int16_t)cospi_24_64 * 2);
-
-  q9s16 = vqrdmulhq_s16(q8s16, q0s16);
-
-  q0s16 = vdupq_n_s16((int16_t)cospi_8_64 * 2);
-
-  q13s16 = vqrdmulhq_s16(q10s16, q1s16);
-
-  q15s16 = vqrdmulhq_s16(q10s16, q0s16);
-
-  // stage 3 -odd half
-  q0s16 = vaddq_s16(q9s16, q15s16);
-  q1s16 = vaddq_s16(q9s16, q13s16);
-  q2s16 = vsubq_s16(q9s16, q13s16);
-  q3s16 = vsubq_s16(q9s16, q15s16);
-
-  // stage 2 - odd half
-  q13s16 = vsubq_s16(q4s16, q5s16);
-  q4s16 = vaddq_s16(q4s16, q5s16);
-  q14s16 = vsubq_s16(q7s16, q6s16);
-  q7s16 = vaddq_s16(q7s16, q6s16);
-  d26s16 = vget_low_s16(q13s16);
-  d27s16 = vget_high_s16(q13s16);
-  d28s16 = vget_low_s16(q14s16);
-  d29s16 = vget_high_s16(q14s16);
-
-  d16s16 = vdup_n_s16((int16_t)cospi_16_64);
-  q9s32 = vmull_s16(d28s16, d16s16);
-  q10s32 = vmull_s16(d29s16, d16s16);
-  q11s32 = vmull_s16(d28s16, d16s16);
-  q12s32 = vmull_s16(d29s16, d16s16);
-
-  q9s32 = vmlsl_s16(q9s32, d26s16, d16s16);
-  q10s32 = vmlsl_s16(q10s32, d27s16, d16s16);
-  q11s32 = vmlal_s16(q11s32, d26s16, d16s16);
-  q12s32 = vmlal_s16(q12s32, d27s16, d16s16);
-
-  d10s16 = vqrshrn_n_s32(q9s32, 14);
-  d11s16 = vqrshrn_n_s32(q10s32, 14);
-  d12s16 = vqrshrn_n_s32(q11s32, 14);
-  d13s16 = vqrshrn_n_s32(q12s32, 14);
-  q5s16 = vcombine_s16(d10s16, d11s16);
-  q6s16 = vcombine_s16(d12s16, d13s16);
-
-  // stage 4
-  q8s16 = vaddq_s16(q0s16, q7s16);
-  q9s16 = vaddq_s16(q1s16, q6s16);
-  q10s16 = vaddq_s16(q2s16, q5s16);
-  q11s16 = vaddq_s16(q3s16, q4s16);
-  q12s16 = vsubq_s16(q3s16, q4s16);
-  q13s16 = vsubq_s16(q2s16, q5s16);
-  q14s16 = vsubq_s16(q1s16, q6s16);
-  q15s16 = vsubq_s16(q0s16, q7s16);
-
-  TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
-               &q15s16);
-
-  IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
-             &q15s16);
-
-  q8s16 = vrshrq_n_s16(q8s16, 5);
-  q9s16 = vrshrq_n_s16(q9s16, 5);
-  q10s16 = vrshrq_n_s16(q10s16, 5);
-  q11s16 = vrshrq_n_s16(q11s16, 5);
-  q12s16 = vrshrq_n_s16(q12s16, 5);
-  q13s16 = vrshrq_n_s16(q13s16, 5);
-  q14s16 = vrshrq_n_s16(q14s16, 5);
-  q15s16 = vrshrq_n_s16(q15s16, 5);
-
-  d1 = d2 = dest;
-
-  d0u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-  d1u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-  d2u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-  d3u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-
-  q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
-  q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
-  q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
-  q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
-
-  d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
-  d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
-  d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
-  d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
-
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
-  d2 += dest_stride;
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
-  d2 += dest_stride;
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
-  d2 += dest_stride;
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
-  d2 += dest_stride;
-
-  q8s16 = q12s16;
-  q9s16 = q13s16;
-  q10s16 = q14s16;
-  q11s16 = q15s16;
-
-  d0u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-  d1u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-  d2u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-  d3u64 = vld1_u64((uint64_t *)d1);
-  d1 += dest_stride;
-
-  q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
-  q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
-  q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
-  q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
-
-  d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
-  d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
-  d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
-  d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
-
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
-  d2 += dest_stride;
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
-  d2 += dest_stride;
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
-  d2 += dest_stride;
-  vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
-  d2 += dest_stride;
-  return;
-}
--- a/aom_dsp/arm/loopfilter_16_neon.c
+++ b/aom_dsp/arm/loopfilter_16_neon.c
@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "./aom_dsp_rtcd.h"
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-
-static INLINE void loop_filter_neon_16(uint8x16_t qblimit,  // blimit
-                                       uint8x16_t qlimit,   // limit
-                                       uint8x16_t qthresh,  // thresh
-                                       uint8x16_t q3,       // p3
-                                       uint8x16_t q4,       // p2
-                                       uint8x16_t q5,       // p1
-                                       uint8x16_t q6,       // p0
-                                       uint8x16_t q7,       // q0
-                                       uint8x16_t q8,       // q1
-                                       uint8x16_t q9,       // q2
-                                       uint8x16_t q10,      // q3
-                                       uint8x16_t *q5r,     // p1
-                                       uint8x16_t *q6r,     // p0
-                                       uint8x16_t *q7r,     // q0
-                                       uint8x16_t *q8r) {   // q1
-  uint8x16_t q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
-  int16x8_t q2s16, q11s16;
-  uint16x8_t q4u16;
-  int8x16_t q0s8, q1s8, q2s8, q11s8, q12s8, q13s8;
-  int8x8_t d2s8, d3s8;
-
-  q11u8 = vabdq_u8(q3, q4);
-  q12u8 = vabdq_u8(q4, q5);
-  q13u8 = vabdq_u8(q5, q6);
-  q14u8 = vabdq_u8(q8, q7);
-  q3 = vabdq_u8(q9, q8);
-  q4 = vabdq_u8(q10, q9);
-
-  q11u8 = vmaxq_u8(q11u8, q12u8);
-  q12u8 = vmaxq_u8(q13u8, q14u8);
-  q3 = vmaxq_u8(q3, q4);
-  q15u8 = vmaxq_u8(q11u8, q12u8);
-
-  q9 = vabdq_u8(q6, q7);
-
-  // aom_hevmask
-  q13u8 = vcgtq_u8(q13u8, qthresh);
-  q14u8 = vcgtq_u8(q14u8, qthresh);
-  q15u8 = vmaxq_u8(q15u8, q3);
-
-  q2u8 = vabdq_u8(q5, q8);
-  q9 = vqaddq_u8(q9, q9);
-
-  q15u8 = vcgeq_u8(qlimit, q15u8);
-
-  // aom_filter() function
-  // convert to signed
-  q10 = vdupq_n_u8(0x80);
-  q8 = veorq_u8(q8, q10);
-  q7 = veorq_u8(q7, q10);
-  q6 = veorq_u8(q6, q10);
-  q5 = veorq_u8(q5, q10);
-
-  q2u8 = vshrq_n_u8(q2u8, 1);
-  q9 = vqaddq_u8(q9, q2u8);
-
-  q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
-                   vget_low_s8(vreinterpretq_s8_u8(q6)));
-  q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
-                    vget_high_s8(vreinterpretq_s8_u8(q6)));
-
-  q9 = vcgeq_u8(qblimit, q9);
-
-  q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), vreinterpretq_s8_u8(q8));
-
-  q14u8 = vorrq_u8(q13u8, q14u8);
-
-  q4u16 = vdupq_n_u16(3);
-  q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
-  q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
-
-  q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
-  q15u8 = vandq_u8(q15u8, q9);
-
-  q1s8 = vreinterpretq_s8_u8(q1u8);
-  q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
-  q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
-
-  q4 = vdupq_n_u8(3);
-  q9 = vdupq_n_u8(4);
-  // aom_filter = clamp(aom_filter + 3 * ( qs0 - ps0))
-  d2s8 = vqmovn_s16(q2s16);
-  d3s8 = vqmovn_s16(q11s16);
-  q1s8 = vcombine_s8(d2s8, d3s8);
-  q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
-  q1s8 = vreinterpretq_s8_u8(q1u8);
-
-  q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q4));
-  q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
-  q2s8 = vshrq_n_s8(q2s8, 3);
-  q1s8 = vshrq_n_s8(q1s8, 3);
-
-  q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
-  q0s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
-
-  q1s8 = vrshrq_n_s8(q1s8, 1);
-  q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
-
-  q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
-  q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
-
-  *q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q10);
-  *q7r = veorq_u8(vreinterpretq_u8_s8(q0s8), q10);
-  *q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q10);
-  *q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q10);
-  return;
-}
-
-void aom_lpf_horizontal_4_dual_neon(
-    uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
-    const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
-    const uint8_t *limit1, const uint8_t *thresh1) {
-  uint8x8_t dblimit0, dlimit0, dthresh0, dblimit1, dlimit1, dthresh1;
-  uint8x16_t qblimit, qlimit, qthresh;
-  uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8;
-
-  dblimit0 = vld1_u8(blimit0);
-  dlimit0 = vld1_u8(limit0);
-  dthresh0 = vld1_u8(thresh0);
-  dblimit1 = vld1_u8(blimit1);
-  dlimit1 = vld1_u8(limit1);
-  dthresh1 = vld1_u8(thresh1);
-  qblimit = vcombine_u8(dblimit0, dblimit1);
-  qlimit = vcombine_u8(dlimit0, dlimit1);
-  qthresh = vcombine_u8(dthresh0, dthresh1);
-
-  s -= (p << 2);
-
-  q3u8 = vld1q_u8(s);
-  s += p;
-  q4u8 = vld1q_u8(s);
-  s += p;
-  q5u8 = vld1q_u8(s);
-  s += p;
-  q6u8 = vld1q_u8(s);
-  s += p;
-  q7u8 = vld1q_u8(s);
-  s += p;
-  q8u8 = vld1q_u8(s);
-  s += p;
-  q9u8 = vld1q_u8(s);
-  s += p;
-  q10u8 = vld1q_u8(s);
-
-  loop_filter_neon_16(qblimit, qlimit, qthresh, q3u8, q4u8, q5u8, q6u8, q7u8,
-                      q8u8, q9u8, q10u8, &q5u8, &q6u8, &q7u8, &q8u8);
-
-  s -= (p * 5);
-  vst1q_u8(s, q5u8);
-  s += p;
-  vst1q_u8(s, q6u8);
-  s += p;
-  vst1q_u8(s, q7u8);
-  s += p;
-  vst1q_u8(s, q8u8);
-  return;
-}
--- a/aom_dsp/arm/loopfilter_4_neon.c
+++ b/aom_dsp/arm/loopfilter_4_neon.c
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "./aom_dsp_rtcd.h"
-
-static INLINE void loop_filter_neon(uint8x8_t dblimit,   // flimit
-                                    uint8x8_t dlimit,    // limit
-                                    uint8x8_t dthresh,   // thresh
-                                    uint8x8_t d3u8,      // p3
-                                    uint8x8_t d4u8,      // p2
-                                    uint8x8_t d5u8,      // p1
-                                    uint8x8_t d6u8,      // p0
-                                    uint8x8_t d7u8,      // q0
-                                    uint8x8_t d16u8,     // q1
-                                    uint8x8_t d17u8,     // q2
-                                    uint8x8_t d18u8,     // q3
-                                    uint8x8_t *d4ru8,    // p1
-                                    uint8x8_t *d5ru8,    // p0
-                                    uint8x8_t *d6ru8,    // q0
-                                    uint8x8_t *d7ru8) {  // q1
-  uint8x8_t d19u8, d20u8, d21u8, d22u8, d23u8, d27u8, d28u8;
-  int16x8_t q12s16;
-  int8x8_t d19s8, d20s8, d21s8, d26s8, d27s8, d28s8;
-
-  d19u8 = vabd_u8(d3u8, d4u8);
-  d20u8 = vabd_u8(d4u8, d5u8);
-  d21u8 = vabd_u8(d5u8, d6u8);
-  d22u8 = vabd_u8(d16u8, d7u8);
-  d3u8 = vabd_u8(d17u8, d16u8);
-  d4u8 = vabd_u8(d18u8, d17u8);
-
-  d19u8 = vmax_u8(d19u8, d20u8);
-  d20u8 = vmax_u8(d21u8, d22u8);
-  d3u8 = vmax_u8(d3u8, d4u8);
-  d23u8 = vmax_u8(d19u8, d20u8);
-
-  d17u8 = vabd_u8(d6u8, d7u8);
-
-  d21u8 = vcgt_u8(d21u8, dthresh);
-  d22u8 = vcgt_u8(d22u8, dthresh);
-  d23u8 = vmax_u8(d23u8, d3u8);
-
-  d28u8 = vabd_u8(d5u8, d16u8);
-  d17u8 = vqadd_u8(d17u8, d17u8);
-
-  d23u8 = vcge_u8(dlimit, d23u8);
-
-  d18u8 = vdup_n_u8(0x80);
-  d5u8 = veor_u8(d5u8, d18u8);
-  d6u8 = veor_u8(d6u8, d18u8);
-  d7u8 = veor_u8(d7u8, d18u8);
-  d16u8 = veor_u8(d16u8, d18u8);
-
-  d28u8 = vshr_n_u8(d28u8, 1);
-  d17u8 = vqadd_u8(d17u8, d28u8);
-
-  d19u8 = vdup_n_u8(3);
-
-  d28s8 = vsub_s8(vreinterpret_s8_u8(d7u8), vreinterpret_s8_u8(d6u8));
-
-  d17u8 = vcge_u8(dblimit, d17u8);
-
-  d27s8 = vqsub_s8(vreinterpret_s8_u8(d5u8), vreinterpret_s8_u8(d16u8));
-
-  d22u8 = vorr_u8(d21u8, d22u8);
-
-  q12s16 = vmull_s8(d28s8, vreinterpret_s8_u8(d19u8));
-
-  d27u8 = vand_u8(vreinterpret_u8_s8(d27s8), d22u8);
-  d23u8 = vand_u8(d23u8, d17u8);
-
-  q12s16 = vaddw_s8(q12s16, vreinterpret_s8_u8(d27u8));
-
-  d17u8 = vdup_n_u8(4);
-
-  d27s8 = vqmovn_s16(q12s16);
-  d27u8 = vand_u8(vreinterpret_u8_s8(d27s8), d23u8);
-  d27s8 = vreinterpret_s8_u8(d27u8);
-
-  d28s8 = vqadd_s8(d27s8, vreinterpret_s8_u8(d19u8));
-  d27s8 = vqadd_s8(d27s8, vreinterpret_s8_u8(d17u8));
-  d28s8 = vshr_n_s8(d28s8, 3);
-  d27s8 = vshr_n_s8(d27s8, 3);
-
-  d19s8 = vqadd_s8(vreinterpret_s8_u8(d6u8), d28s8);
-  d26s8 = vqsub_s8(vreinterpret_s8_u8(d7u8), d27s8);
-
-  d27s8 = vrshr_n_s8(d27s8, 1);
-  d27s8 = vbic_s8(d27s8, vreinterpret_s8_u8(d22u8));
-
-  d21s8 = vqadd_s8(vreinterpret_s8_u8(d5u8), d27s8);
-  d20s8 = vqsub_s8(vreinterpret_s8_u8(d16u8), d27s8);
-
-  *d4ru8 = veor_u8(vreinterpret_u8_s8(d21s8), d18u8);
-  *d5ru8 = veor_u8(vreinterpret_u8_s8(d19s8), d18u8);
-  *d6ru8 = veor_u8(vreinterpret_u8_s8(d26s8), d18u8);
-  *d7ru8 = veor_u8(vreinterpret_u8_s8(d20s8), d18u8);
-  return;
-}
-
-void aom_lpf_horizontal_4_neon(uint8_t *src, int pitch, const uint8_t *blimit,
-                               const uint8_t *limit, const uint8_t *thresh) {
-  int i;
-  uint8_t *s, *psrc;
-  uint8x8_t dblimit, dlimit, dthresh;
-  uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
-
-  dblimit = vld1_u8(blimit);
-  dlimit = vld1_u8(limit);
-  dthresh = vld1_u8(thresh);
-
-  psrc = src - (pitch << 2);
-  for (i = 0; i < 1; i++) {
-    s = psrc + i * 8;
-
-    d3u8 = vld1_u8(s);
-    s += pitch;
-    d4u8 = vld1_u8(s);
-    s += pitch;
-    d5u8 = vld1_u8(s);
-    s += pitch;
-    d6u8 = vld1_u8(s);
-    s += pitch;
-    d7u8 = vld1_u8(s);
-    s += pitch;
-    d16u8 = vld1_u8(s);
-    s += pitch;
-    d17u8 = vld1_u8(s);
-    s += pitch;
-    d18u8 = vld1_u8(s);
-
-    loop_filter_neon(dblimit, dlimit, dthresh, d3u8, d4u8, d5u8, d6u8, d7u8,
-                     d16u8, d17u8, d18u8, &d4u8, &d5u8, &d6u8, &d7u8);
-
-    s -= (pitch * 5);
-    vst1_u8(s, d4u8);
-    s += pitch;
-    vst1_u8(s, d5u8);
-    s += pitch;
-    vst1_u8(s, d6u8);
-    s += pitch;
-    vst1_u8(s, d7u8);
-  }
-  return;
-}
-
-void aom_lpf_vertical_4_neon(uint8_t *src, int pitch, const uint8_t *blimit,
-                             const uint8_t *limit, const uint8_t *thresh) {
-  int i, pitch8;
-  uint8_t *s;
-  uint8x8_t dblimit, dlimit, dthresh;
-  uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
-  uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3;
-  uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7;
-  uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
-  uint8x8x4_t d4Result;
-
-  dblimit = vld1_u8(blimit);
-  dlimit = vld1_u8(limit);
-  dthresh = vld1_u8(thresh);
-
-  pitch8 = pitch * 8;
-  for (i = 0; i < 1; i++, src += pitch8) {
-    s = src - (i + 1) * 4;
-
-    d3u8 = vld1_u8(s);
-    s += pitch;
-    d4u8 = vld1_u8(s);
-    s += pitch;
-    d5u8 = vld1_u8(s);
-    s += pitch;
-    d6u8 = vld1_u8(s);
-    s += pitch;
-    d7u8 = vld1_u8(s);
-    s += pitch;
-    d16u8 = vld1_u8(s);
-    s += pitch;
-    d17u8 = vld1_u8(s);
-    s += pitch;
-    d18u8 = vld1_u8(s);
-
-    d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), vreinterpret_u32_u8(d7u8));
-    d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), vreinterpret_u32_u8(d16u8));
-    d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), vreinterpret_u32_u8(d17u8));
-    d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), vreinterpret_u32_u8(d18u8));
-
-    d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]),
-                      vreinterpret_u16_u32(d2tmp2.val[0]));
-    d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]),
-                      vreinterpret_u16_u32(d2tmp3.val[0]));
-    d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]),
-                      vreinterpret_u16_u32(d2tmp2.val[1]));
-    d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]),
-                      vreinterpret_u16_u32(d2tmp3.val[1]));
-
-    d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]),
-                     vreinterpret_u8_u16(d2tmp5.val[0]));
-    d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]),
-                     vreinterpret_u8_u16(d2tmp5.val[1]));
-    d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]),
-                      vreinterpret_u8_u16(d2tmp7.val[0]));
-    d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]),
-                      vreinterpret_u8_u16(d2tmp7.val[1]));
-
-    d3u8 = d2tmp8.val[0];
-    d4u8 = d2tmp8.val[1];
-    d5u8 = d2tmp9.val[0];
-    d6u8 = d2tmp9.val[1];
-    d7u8 = d2tmp10.val[0];
-    d16u8 = d2tmp10.val[1];
-    d17u8 = d2tmp11.val[0];
-    d18u8 = d2tmp11.val[1];
-
-    loop_filter_neon(dblimit, dlimit, dthresh, d3u8, d4u8, d5u8, d6u8, d7u8,
-                     d16u8, d17u8, d18u8, &d4u8, &d5u8, &d6u8, &d7u8);
-
-    d4Result.val[0] = d4u8;
-    d4Result.val[1] = d5u8;
-    d4Result.val[2] = d6u8;
-    d4Result.val[3] = d7u8;
-
-    src -= 2;
-    vst4_lane_u8(src, d4Result, 0);
-    src += pitch;
-    vst4_lane_u8(src, d4Result, 1);
-    src += pitch;
-    vst4_lane_u8(src, d4Result, 2);
-    src += pitch;
-    vst4_lane_u8(src, d4Result, 3);
-    src += pitch;
-    vst4_lane_u8(src, d4Result, 4);
-    src += pitch;
-    vst4_lane_u8(src, d4Result, 5);
-    src += pitch;
-    vst4_lane_u8(src, d4Result, 6);
-    src += pitch;
-    vst4_lane_u8(src, d4Result, 7);
-  }
-  return;
-}
--- a/aom_dsp/arm/loopfilter_8_neon.c
+++ b/aom_dsp/arm/loopfilter_8_neon.c
@@ -1,430 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "./aom_dsp_rtcd.h"
-
-static INLINE void mbloop_filter_neon(uint8x8_t dblimit,   // mblimit
-                                      uint8x8_t dlimit,    // limit
-                                      uint8x8_t dthresh,   // thresh
-                                      uint8x8_t d3u8,      // p2
-                                      uint8x8_t d4u8,      // p2
-                                      uint8x8_t d5u8,      // p1
-                                      uint8x8_t d6u8,      // p0
-                                      uint8x8_t d7u8,      // q0
-                                      uint8x8_t d16u8,     // q1
-                                      uint8x8_t d17u8,     // q2
-                                      uint8x8_t d18u8,     // q3
-                                      uint8x8_t *d0ru8,    // p1
-                                      uint8x8_t *d1ru8,    // p1
-                                      uint8x8_t *d2ru8,    // p0
-                                      uint8x8_t *d3ru8,    // q0
-                                      uint8x8_t *d4ru8,    // q1
-                                      uint8x8_t *d5ru8) {  // q1
-  uint32_t flat;
-  uint8x8_t d0u8, d1u8, d2u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8;
-  uint8x8_t d25u8, d26u8, d27u8, d28u8, d29u8, d30u8, d31u8;
-  int16x8_t q15s16;
-  uint16x8_t q10u16, q14u16;
-  int8x8_t d21s8, d24s8, d25s8, d26s8, d28s8, d29s8, d30s8;
-
-  d19u8 = vabd_u8(d3u8, d4u8);
-  d20u8 = vabd_u8(d4u8, d5u8);
-  d21u8 = vabd_u8(d5u8, d6u8);
-  d22u8 = vabd_u8(d16u8, d7u8);
-  d23u8 = vabd_u8(d17u8, d16u8);
-  d24u8 = vabd_u8(d18u8, d17u8);
-
-  d19u8 = vmax_u8(d19u8, d20u8);
-  d20u8 = vmax_u8(d21u8, d22u8);
-
-  d25u8 = vabd_u8(d6u8, d4u8);
-
-  d23u8 = vmax_u8(d23u8, d24u8);
-
-  d26u8 = vabd_u8(d7u8, d17u8);
-
-  d19u8 = vmax_u8(d19u8, d20u8);
-
-  d24u8 = vabd_u8(d6u8, d7u8);
-  d27u8 = vabd_u8(d3u8, d6u8);
-  d28u8 = vabd_u8(d18u8, d7u8);
-
-  d19u8 = vmax_u8(d19u8, d23u8);
-
-  d23u8 = vabd_u8(d5u8, d16u8);
-  d24u8 = vqadd_u8(d24u8, d24u8);
-
-  d19u8 = vcge_u8(dlimit, d19u8);
-
-  d25u8 = vmax_u8(d25u8, d26u8);
-  d26u8 = vmax_u8(d27u8, d28u8);
-
-  d23u8 = vshr_n_u8(d23u8, 1);
-
-  d25u8 = vmax_u8(d25u8, d26u8);
-
-  d24u8 = vqadd_u8(d24u8, d23u8);
-
-  d20u8 = vmax_u8(d20u8, d25u8);
-
-  d23u8 = vdup_n_u8(1);
-  d24u8 = vcge_u8(dblimit, d24u8);
-
-  d21u8 = vcgt_u8(d21u8, dthresh);
-
-  d20u8 = vcge_u8(d23u8, d20u8);
-
-  d19u8 = vand_u8(d19u8, d24u8);
-
-  d23u8 = vcgt_u8(d22u8, dthresh);
-
-  d20u8 = vand_u8(d20u8, d19u8);
-
-  d22u8 = vdup_n_u8(0x80);
-
-  d23u8 = vorr_u8(d21u8, d23u8);
-
-  q10u16 = vcombine_u16(vreinterpret_u16_u8(d20u8), vreinterpret_u16_u8(d21u8));
-
-  d30u8 = vshrn_n_u16(q10u16, 4);
-  flat = vget_lane_u32(vreinterpret_u32_u8(d30u8), 0);
-
-  if (flat == 0xffffffff) {  // Check for all 1's, power_branch_only
-    d27u8 = vdup_n_u8(3);
-    d21u8 = vdup_n_u8(2);
-    q14u16 = vaddl_u8(d6u8, d7u8);
-    q14u16 = vmlal_u8(q14u16, d3u8, d27u8);
-    q14u16 = vmlal_u8(q14u16, d4u8, d21u8);
-    q14u16 = vaddw_u8(q14u16, d5u8);
-    *d0ru8 = vqrshrn_n_u16(q14u16, 3);
-
-    q14u16 = vsubw_u8(q14u16, d3u8);
-    q14u16 = vsubw_u8(q14u16, d4u8);
-    q14u16 = vaddw_u8(q14u16, d5u8);
-    q14u16 = vaddw_u8(q14u16, d16u8);
-    *d1ru8 = vqrshrn_n_u16(q14u16, 3);
-
-    q14u16 = vsubw_u8(q14u16, d3u8);
-    q14u16 = vsubw_u8(q14u16, d5u8);
-    q14u16 = vaddw_u8(q14u16, d6u8);
-    q14u16 = vaddw_u8(q14u16, d17u8);
-    *d2ru8 = vqrshrn_n_u16(q14u16, 3);
-
-    q14u16 = vsubw_u8(q14u16, d3u8);
-    q14u16 = vsubw_u8(q14u16, d6u8);
-    q14u16 = vaddw_u8(q14u16, d7u8);
-    q14u16 = vaddw_u8(q14u16, d18u8);
-    *d3ru8 = vqrshrn_n_u16(q14u16, 3);
-
-    q14u16 = vsubw_u8(q14u16, d4u8);
-    q14u16 = vsubw_u8(q14u16, d7u8);
-    q14u16 = vaddw_u8(q14u16, d16u8);
-    q14u16 = vaddw_u8(q14u16, d18u8);
-    *d4ru8 = vqrshrn_n_u16(q14u16, 3);
-
-    q14u16 = vsubw_u8(q14u16, d5u8);
-    q14u16 = vsubw_u8(q14u16, d16u8);
-    q14u16 = vaddw_u8(q14u16, d17u8);
-    q14u16 = vaddw_u8(q14u16, d18u8);
-    *d5ru8 = vqrshrn_n_u16(q14u16, 3);
-  } else {
-    d21u8 = veor_u8(d7u8, d22u8);
-    d24u8 = veor_u8(d6u8, d22u8);
-    d25u8 = veor_u8(d5u8, d22u8);
-    d26u8 = veor_u8(d16u8, d22u8);
-
-    d27u8 = vdup_n_u8(3);
-
-    d28s8 = vsub_s8(vreinterpret_s8_u8(d21u8), vreinterpret_s8_u8(d24u8));
-    d29s8 = vqsub_s8(vreinterpret_s8_u8(d25u8), vreinterpret_s8_u8(d26u8));
-
-    q15s16 = vmull_s8(d28s8, vreinterpret_s8_u8(d27u8));
-
-    d29s8 = vand_s8(d29s8, vreinterpret_s8_u8(d23u8));
-
-    q15s16 = vaddw_s8(q15s16, d29s8);
-
-    d29u8 = vdup_n_u8(4);
-
-    d28s8 = vqmovn_s16(q15s16);
-
-    d28s8 = vand_s8(d28s8, vreinterpret_s8_u8(d19u8));
-
-    d30s8 = vqadd_s8(d28s8, vreinterpret_s8_u8(d27u8));
-    d29s8 = vqadd_s8(d28s8, vreinterpret_s8_u8(d29u8));
-    d30s8 = vshr_n_s8(d30s8, 3);
-    d29s8 = vshr_n_s8(d29s8, 3);
-
-    d24s8 = vqadd_s8(vreinterpret_s8_u8(d24u8), d30s8);
-    d21s8 = vqsub_s8(vreinterpret_s8_u8(d21u8), d29s8);
-
-    d29s8 = vrshr_n_s8(d29s8, 1);
-    d29s8 = vbic_s8(d29s8, vreinterpret_s8_u8(d23u8));
-
-    d25s8 = vqadd_s8(vreinterpret_s8_u8(d25u8), d29s8);
-    d26s8 = vqsub_s8(vreinterpret_s8_u8(d26u8), d29s8);
-
-    if (flat == 0) {  // filter_branch_only
-      *d0ru8 = d4u8;
-      *d1ru8 = veor_u8(vreinterpret_u8_s8(d25s8), d22u8);
-      *d2ru8 = veor_u8(vreinterpret_u8_s8(d24s8), d22u8);
-      *d3ru8 = veor_u8(vreinterpret_u8_s8(d21s8), d22u8);
-      *d4ru8 = veor_u8(vreinterpret_u8_s8(d26s8), d22u8);
-      *d5ru8 = d17u8;
-      return;
-    }
-
-    d21u8 = veor_u8(vreinterpret_u8_s8(d21s8), d22u8);
-    d24u8 = veor_u8(vreinterpret_u8_s8(d24s8), d22u8);
-    d25u8 = veor_u8(vreinterpret_u8_s8(d25s8), d22u8);
-    d26u8 = veor_u8(vreinterpret_u8_s8(d26s8), d22u8);
-
-    d23u8 = vdup_n_u8(2);
-    q14u16 = vaddl_u8(d6u8, d7u8);
-    q14u16 = vmlal_u8(q14u16, d3u8, d27u8);
-    q14u16 = vmlal_u8(q14u16, d4u8, d23u8);
-
-    d0u8 = vbsl_u8(d20u8, dblimit, d4u8);
-
-    q14u16 = vaddw_u8(q14u16, d5u8);
-
-    d1u8 = vbsl_u8(d20u8, dlimit, d25u8);
-
-    d30u8 = vqrshrn_n_u16(q14u16, 3);
-
-    q14u16 = vsubw_u8(q14u16, d3u8);
-    q14u16 = vsubw_u8(q14u16, d4u8);
-    q14u16 = vaddw_u8(q14u16, d5u8);
-    q14u16 = vaddw_u8(q14u16, d16u8);
-
-    d2u8 = vbsl_u8(d20u8, dthresh, d24u8);
-
-    d31u8 = vqrshrn_n_u16(q14u16, 3);
-
-    q14u16 = vsubw_u8(q14u16, d3u8);
-    q14u16 = vsubw_u8(q14u16, d5u8);
-    q14u16 = vaddw_u8(q14u16, d6u8);
-    q14u16 = vaddw_u8(q14u16, d17u8);
-
-    *d0ru8 = vbsl_u8(d20u8, d30u8, d0u8);
-
-    d23u8 = vqrshrn_n_u16(q14u16, 3);
-
-    q14u16 = vsubw_u8(q14u16, d3u8);
-    q14u16 = vsubw_u8(q14u16, d6u8);
-    q14u16 = vaddw_u8(q14u16, d7u8);
-
-    *d1ru8 = vbsl_u8(d20u8, d31u8, d1u8);
-
-    q14u16 = vaddw_u8(q14u16, d18u8);
-
-    *d2ru8 = vbsl_u8(d20u8, d23u8, d2u8);
-
-    d22u8 = vqrshrn_n_u16(q14u16, 3);
-
-    q14u16 = vsubw_u8(q14u16, d4u8);
-    q14u16 = vsubw_u8(q14u16, d7u8);
-    q14u16 = vaddw_u8(q14u16, d16u8);
-
-    d3u8 = vbsl_u8(d20u8, d3u8, d21u8);
-
-    q14u16 = vaddw_u8(q14u16, d18u8);
-
-    d4u8 = vbsl_u8(d20u8, d4u8, d26u8);
-
-    d6u8 = vqrshrn_n_u16(q14u16, 3);
-
-    q14u16 = vsubw_u8(q14u16, d5u8);
-    q14u16 = vsubw_u8(q14u16, d16u8);
-    q14u16 = vaddw_u8(q14u16, d17u8);
-    q14u16 = vaddw_u8(q14u16, d18u8);
-
-    d5u8 = vbsl_u8(d20u8, d5u8, d17u8);
-
-    d7u8 = vqrshrn_n_u16(q14u16, 3);
-
-    *d3ru8 = vbsl_u8(d20u8, d22u8, d3u8);
-    *d4ru8 = vbsl_u8(d20u8, d6u8, d4u8);
-    *d5ru8 = vbsl_u8(d20u8, d7u8, d5u8);
-  }
-  return;
-}
-
-void aom_lpf_horizontal_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
-                               const uint8_t *limit, const uint8_t *thresh) {
-  int i;
-  uint8_t *s, *psrc;
-  uint8x8_t dblimit, dlimit, dthresh;
-  uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
-  uint8x8_t d16u8, d17u8, d18u8;
-
-  dblimit = vld1_u8(blimit);
-  dlimit = vld1_u8(limit);
-  dthresh = vld1_u8(thresh);
-
-  psrc = src - (pitch << 2);
-  for (i = 0; i < 1; i++) {
-    s = psrc + i * 8;
-
-    d3u8 = vld1_u8(s);
-    s += pitch;
-    d4u8 = vld1_u8(s);
-    s += pitch;
-    d5u8 = vld1_u8(s);
-    s += pitch;
-    d6u8 = vld1_u8(s);
-    s += pitch;
-    d7u8 = vld1_u8(s);
-    s += pitch;
-    d16u8 = vld1_u8(s);
-    s += pitch;
-    d17u8 = vld1_u8(s);
-    s += pitch;
-    d18u8 = vld1_u8(s);
-
-    mbloop_filter_neon(dblimit, dlimit, dthresh, d3u8, d4u8, d5u8, d6u8, d7u8,
-                       d16u8, d17u8, d18u8, &d0u8, &d1u8, &d2u8, &d3u8, &d4u8,
-                       &d5u8);
-
-    s -= (pitch * 6);
-    vst1_u8(s, d0u8);
-    s += pitch;
-    vst1_u8(s, d1u8);
-    s += pitch;
-    vst1_u8(s, d2u8);
-    s += pitch;
-    vst1_u8(s, d3u8);
-    s += pitch;
-    vst1_u8(s, d4u8);
-    s += pitch;
-    vst1_u8(s, d5u8);
-  }
-  return;
-}
-
-void aom_lpf_vertical_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
-                             const uint8_t *limit, const uint8_t *thresh) {
-  int i;
-  uint8_t *s;
-  uint8x8_t dblimit, dlimit, dthresh;
-  uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
-  uint8x8_t d16u8, d17u8, d18u8;
-  uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3;
-  uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7;
-  uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
-  uint8x8x4_t d4Result;
-  uint8x8x2_t d2Result;
-
-  dblimit = vld1_u8(blimit);
-  dlimit = vld1_u8(limit);
-  dthresh = vld1_u8(thresh);
-
-  for (i = 0; i < 1; i++) {
-    s = src + (i * (pitch << 3)) - 4;
-
-    d3u8 = vld1_u8(s);
-    s += pitch;
-    d4u8 = vld1_u8(s);
-    s += pitch;
-    d5u8 = vld1_u8(s);
-    s += pitch;
-    d6u8 = vld1_u8(s);
-    s += pitch;
-    d7u8 = vld1_u8(s);
-    s += pitch;
-    d16u8 = vld1_u8(s);
-    s += pitch;
-    d17u8 = vld1_u8(s);
-    s += pitch;
-    d18u8 = vld1_u8(s);
-
-    d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), vreinterpret_u32_u8(d7u8));
-    d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), vreinterpret_u32_u8(d16u8));
-    d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), vreinterpret_u32_u8(d17u8));
-    d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), vreinterpret_u32_u8(d18u8));
-
-    d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]),
-                      vreinterpret_u16_u32(d2tmp2.val[0]));
-    d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]),
-                      vreinterpret_u16_u32(d2tmp3.val[0]));
-    d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]),
-                      vreinterpret_u16_u32(d2tmp2.val[1]));
-    d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]),
-                      vreinterpret_u16_u32(d2tmp3.val[1]));
-
-    d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]),
-                     vreinterpret_u8_u16(d2tmp5.val[0]));
-    d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]),
-                     vreinterpret_u8_u16(d2tmp5.val[1]));
-    d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]),
-                      vreinterpret_u8_u16(d2tmp7.val[0]));
-    d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]),
-                      vreinterpret_u8_u16(d2tmp7.val[1]));
-
-    d3u8 = d2tmp8.val[0];
-    d4u8 = d2tmp8.val[1];
-    d5u8 = d2tmp9.val[0];
-    d6u8 = d2tmp9.val[1];
-    d7u8 = d2tmp10.val[0];
-    d16u8 = d2tmp10.val[1];
-    d17u8 = d2tmp11.val[0];
-    d18u8 = d2tmp11.val[1];
-
-    mbloop_filter_neon(dblimit, dlimit, dthresh, d3u8, d4u8, d5u8, d6u8, d7u8,
-                       d16u8, d17u8, d18u8, &d0u8, &d1u8, &d2u8, &d3u8, &d4u8,
-                       &d5u8);
-
-    d4Result.val[0] = d0u8;
-    d4Result.val[1] = d1u8;
-    d4Result.val[2] = d2u8;
-    d4Result.val[3] = d3u8;
-
-    d2Result.val[0] = d4u8;
-    d2Result.val[1] = d5u8;
-
-    s = src - 3;
-    vst4_lane_u8(s, d4Result, 0);
-    s += pitch;
-    vst4_lane_u8(s, d4Result, 1);
-    s += pitch;
-    vst4_lane_u8(s, d4Result, 2);
-    s += pitch;
-    vst4_lane_u8(s, d4Result, 3);
-    s += pitch;
-    vst4_lane_u8(s, d4Result, 4);
-    s += pitch;
-    vst4_lane_u8(s, d4Result, 5);
-    s += pitch;
-    vst4_lane_u8(s, d4Result, 6);
-    s += pitch;
-    vst4_lane_u8(s, d4Result, 7);
-
-    s = src + 1;
-    vst2_lane_u8(s, d2Result, 0);
-    s += pitch;
-    vst2_lane_u8(s, d2Result, 1);
-    s += pitch;
-    vst2_lane_u8(s, d2Result, 2);
-    s += pitch;
-    vst2_lane_u8(s, d2Result, 3);
-    s += pitch;
-    vst2_lane_u8(s, d2Result, 4);
-    s += pitch;
-    vst2_lane_u8(s, d2Result, 5);
-    s += pitch;
-    vst2_lane_u8(s, d2Result, 6);
-    s += pitch;
-    vst2_lane_u8(s, d2Result, 7);
-  }
-  return;
-}
--- a/aom_dsp/arm/loopfilter_neon.c
+++ b/aom_dsp/arm/loopfilter_neon.c
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "./aom_dsp_rtcd.h"
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-
-void aom_lpf_vertical_4_dual_neon(uint8_t *s, int p, const uint8_t *blimit0,
-                                  const uint8_t *limit0, const uint8_t *thresh0,
-                                  const uint8_t *blimit1, const uint8_t *limit1,
-                                  const uint8_t *thresh1) {
-  aom_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0);
-  aom_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1);
-}
-
-#if HAVE_NEON_ASM
-void aom_lpf_horizontal_8_dual_neon(
-    uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
-    const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
-    const uint8_t *limit1, const uint8_t *thresh1) {
-  aom_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0);
-  aom_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_8_dual_neon(uint8_t *s, int p, const uint8_t *blimit0,
-                                  const uint8_t *limit0, const uint8_t *thresh0,
-                                  const uint8_t *blimit1, const uint8_t *limit1,
-                                  const uint8_t *thresh1) {
-  aom_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0);
-  aom_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
-                                   const uint8_t *limit,
-                                   const uint8_t *thresh) {
-  aom_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
-  aom_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);
-}
-#endif  // HAVE_NEON_ASM
--- a/aom_dsp/arm/save_reg_neon.asm
+++ b/aom_dsp/arm/save_reg_neon.asm
@@ -1,39 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-
-    EXPORT  |aom_push_neon|
-    EXPORT  |aom_pop_neon|
-
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-|aom_push_neon| PROC
-    vst1.i64            {d8, d9, d10, d11}, [r0]!
-    vst1.i64            {d12, d13, d14, d15}, [r0]!
-    bx              lr
-
-    ENDP
-
-|aom_pop_neon| PROC
-    vld1.i64            {d8, d9, d10, d11}, [r0]!
-    vld1.i64            {d12, d13, d14, d15}, [r0]!
-    bx              lr
-
-    ENDP
-
-    END
-
--- a/aom_dsp/arm/subpel_variance_media.c
+++ b/aom_dsp/arm/subpel_variance_media.c
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_MEDIA
-static const int16_t bilinear_filters_media[8][2] = { { 128, 0 }, { 112, 16 },
-                                                      { 96, 32 }, { 80, 48 },
-                                                      { 64, 64 }, { 48, 80 },
-                                                      { 32, 96 }, { 16, 112 } };
-
-extern void aom_filter_block2d_bil_first_pass_media(
-    const uint8_t *src_ptr, uint16_t *dst_ptr, uint32_t src_pitch,
-    uint32_t height, uint32_t width, const int16_t *filter);
-
-extern void aom_filter_block2d_bil_second_pass_media(
-    const uint16_t *src_ptr, uint8_t *dst_ptr, int32_t src_pitch,
-    uint32_t height, uint32_t width, const int16_t *filter);
-
-unsigned int aom_sub_pixel_variance8x8_media(
-    const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset,
-    const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) {
-  uint16_t first_pass[10 * 8];
-  uint8_t second_pass[8 * 8];
-  const int16_t *HFilter, *VFilter;
-
-  HFilter = bilinear_filters_media[xoffset];
-  VFilter = bilinear_filters_media[yoffset];
-
-  aom_filter_block2d_bil_first_pass_media(src_ptr, first_pass,
-                                          src_pixels_per_line, 9, 8, HFilter);
-  aom_filter_block2d_bil_second_pass_media(first_pass, second_pass, 8, 8, 8,
-                                           VFilter);
-
-  return aom_variance8x8_media(second_pass, 8, dst_ptr, dst_pixels_per_line,
-                               sse);
-}
-
-unsigned int aom_sub_pixel_variance16x16_media(
-    const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset,
-    const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) {
-  uint16_t first_pass[36 * 16];
-  uint8_t second_pass[20 * 16];
-  const int16_t *HFilter, *VFilter;
-  unsigned int var;
-
-  if (xoffset == 4 && yoffset == 0) {
-    var = aom_variance_halfpixvar16x16_h_media(
-        src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
-  } else if (xoffset == 0 && yoffset == 4) {
-    var = aom_variance_halfpixvar16x16_v_media(
-        src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
-  } else if (xoffset == 4 && yoffset == 4) {
-    var = aom_variance_halfpixvar16x16_hv_media(
-        src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
-  } else {
-    HFilter = bilinear_filters_media[xoffset];
-    VFilter = bilinear_filters_media[yoffset];
-
-    aom_filter_block2d_bil_first_pass_media(
-        src_ptr, first_pass, src_pixels_per_line, 17, 16, HFilter);
-    aom_filter_block2d_bil_second_pass_media(first_pass, second_pass, 16, 16,
-                                             16, VFilter);
-
-    var = aom_variance16x16_media(second_pass, 16, dst_ptr, dst_pixels_per_line,
-                                  sse);
-  }
-  return var;
-}
-#endif  // HAVE_MEDIA
--- a/aom_dsp/arm/subtract_neon.c
+++ b/aom_dsp/arm/subtract_neon.c
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-
-void aom_subtract_block_neon(int rows, int cols, int16_t *diff,
-                             ptrdiff_t diff_stride, const uint8_t *src,
-                             ptrdiff_t src_stride, const uint8_t *pred,
-                             ptrdiff_t pred_stride) {
-  int r, c;
-
-  if (cols > 16) {
-    for (r = 0; r < rows; ++r) {
-      for (c = 0; c < cols; c += 32) {
-        const uint8x16_t v_src_00 = vld1q_u8(&src[c + 0]);
-        const uint8x16_t v_src_16 = vld1q_u8(&src[c + 16]);
-        const uint8x16_t v_pred_00 = vld1q_u8(&pred[c + 0]);
-        const uint8x16_t v_pred_16 = vld1q_u8(&pred[c + 16]);
-        const uint16x8_t v_diff_lo_00 =
-            vsubl_u8(vget_low_u8(v_src_00), vget_low_u8(v_pred_00));
-        const uint16x8_t v_diff_hi_00 =
-            vsubl_u8(vget_high_u8(v_src_00), vget_high_u8(v_pred_00));
-        const uint16x8_t v_diff_lo_16 =
-            vsubl_u8(vget_low_u8(v_src_16), vget_low_u8(v_pred_16));
-        const uint16x8_t v_diff_hi_16 =
-            vsubl_u8(vget_high_u8(v_src_16), vget_high_u8(v_pred_16));
-        vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(v_diff_lo_00));
-        vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(v_diff_hi_00));
-        vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(v_diff_lo_16));
-        vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(v_diff_hi_16));
-      }
-      diff += diff_stride;
-      pred += pred_stride;
-      src += src_stride;
-    }
-  } else if (cols > 8) {
-    for (r = 0; r < rows; ++r) {
-      const uint8x16_t v_src = vld1q_u8(&src[0]);
-      const uint8x16_t v_pred = vld1q_u8(&pred[0]);
-      const uint16x8_t v_diff_lo =
-          vsubl_u8(vget_low_u8(v_src), vget_low_u8(v_pred));
-      const uint16x8_t v_diff_hi =
-          vsubl_u8(vget_high_u8(v_src), vget_high_u8(v_pred));
-      vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff_lo));
-      vst1q_s16(&diff[8], vreinterpretq_s16_u16(v_diff_hi));
-      diff += diff_stride;
-      pred += pred_stride;
-      src += src_stride;
-    }
-  } else if (cols > 4) {
-    for (r = 0; r < rows; ++r) {
-      const uint8x8_t v_src = vld1_u8(&src[0]);
-      const uint8x8_t v_pred = vld1_u8(&pred[0]);
-      const uint16x8_t v_diff = vsubl_u8(v_src, v_pred);
-      vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff));
-      diff += diff_stride;
-      pred += pred_stride;
-      src += src_stride;
-    }
-  } else {
-    for (r = 0; r < rows; ++r) {
-      for (c = 0; c < cols; ++c) diff[c] = src[c] - pred[c];
-
-      diff += diff_stride;
-      pred += pred_stride;
-      src += src_stride;
-    }
-  }
-}
--- a/aom_dsp/arm/variance_neon.c
+++ b/aom_dsp/arm/variance_neon.c
@@ -1,400 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "./aom_dsp_rtcd.h"
-#include "./aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
-  const int32x4_t a = vpaddlq_s16(v_16x8);
-  const int64x2_t b = vpaddlq_s32(a);
-  const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
-                               vreinterpret_s32_s64(vget_high_s64(b)));
-  return vget_lane_s32(c, 0);
-}
-
-static INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) {
-  const int64x2_t b = vpaddlq_s32(v_32x4);
-  const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
-                               vreinterpret_s32_s64(vget_high_s64(b)));
-  return vget_lane_s32(c, 0);
-}
-
-// w * h must be less than 2048 or local variable v_sum may overflow.
-static void variance_neon_w8(const uint8_t *a, int a_stride, const uint8_t *b,
-                             int b_stride, int w, int h, uint32_t *sse,
-                             int *sum) {
-  int i, j;
-  int16x8_t v_sum = vdupq_n_s16(0);
-  int32x4_t v_sse_lo = vdupq_n_s32(0);
-  int32x4_t v_sse_hi = vdupq_n_s32(0);
-
-  for (i = 0; i < h; ++i) {
-    for (j = 0; j < w; j += 8) {
-      const uint8x8_t v_a = vld1_u8(&a[j]);
-      const uint8x8_t v_b = vld1_u8(&b[j]);
-      const uint16x8_t v_diff = vsubl_u8(v_a, v_b);
-      const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff);
-      v_sum = vaddq_s16(v_sum, sv_diff);
-      v_sse_lo =
-          vmlal_s16(v_sse_lo, vget_low_s16(sv_diff), vget_low_s16(sv_diff));
-      v_sse_hi =
-          vmlal_s16(v_sse_hi, vget_high_s16(sv_diff), vget_high_s16(sv_diff));
-    }
-    a += a_stride;
-    b += b_stride;
-  }
-
-  *sum = horizontal_add_s16x8(v_sum);
-  *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi));
-}
-
-void aom_get8x8var_neon(const uint8_t *a, int a_stride, const uint8_t *b,
-                        int b_stride, unsigned int *sse, int *sum) {
-  variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, sum);
-}
-
-void aom_get16x16var_neon(const uint8_t *a, int a_stride, const uint8_t *b,
-                          int b_stride, unsigned int *sse, int *sum) {
-  variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, sum);
-}
-
-unsigned int aom_variance8x8_neon(const uint8_t *a, int a_stride,
-                                  const uint8_t *b, int b_stride,
-                                  unsigned int *sse) {
-  int sum;
-  variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
-  return *sse - (((int64_t)sum * sum) >> 6);  //  >> 6 = / 8 * 8
-}
-
-unsigned int aom_variance16x16_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, &sum);
-  return *sse - (((int64_t)sum * sum) >> 8);  //  >> 8 = / 16 * 16
-}
-
-unsigned int aom_variance32x32_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_neon_w8(a, a_stride, b, b_stride, 32, 32, sse, &sum);
-  return *sse - (((int64_t)sum * sum) >> 10);  // >> 10 = / 32 * 32
-}
-
-unsigned int aom_variance32x64_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum1, sum2;
-  uint32_t sse1, sse2;
-  variance_neon_w8(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1);
-  variance_neon_w8(a + (32 * a_stride), a_stride, b + (32 * b_stride), b_stride,
-                   32, 32, &sse2, &sum2);
-  *sse = sse1 + sse2;
-  sum1 += sum2;
-  return *sse - (((int64_t)sum1 * sum1) >> 11);  // >> 11 = / 32 * 64
-}
-
-unsigned int aom_variance64x32_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum1, sum2;
-  uint32_t sse1, sse2;
-  variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
-  variance_neon_w8(a + (16 * a_stride), a_stride, b + (16 * b_stride), b_stride,
-                   64, 16, &sse2, &sum2);
-  *sse = sse1 + sse2;
-  sum1 += sum2;
-  return *sse - (((int64_t)sum1 * sum1) >> 11);  // >> 11 = / 32 * 64
-}
-
-unsigned int aom_variance64x64_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum1, sum2;
-  uint32_t sse1, sse2;
-
-  variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
-  variance_neon_w8(a + (16 * a_stride), a_stride, b + (16 * b_stride), b_stride,
-                   64, 16, &sse2, &sum2);
-  sse1 += sse2;
-  sum1 += sum2;
-
-  variance_neon_w8(a + (16 * 2 * a_stride), a_stride, b + (16 * 2 * b_stride),
-                   b_stride, 64, 16, &sse2, &sum2);
-  sse1 += sse2;
-  sum1 += sum2;
-
-  variance_neon_w8(a + (16 * 3 * a_stride), a_stride, b + (16 * 3 * b_stride),
-                   b_stride, 64, 16, &sse2, &sum2);
-  *sse = sse1 + sse2;
-  sum1 += sum2;
-  return *sse - (((int64_t)sum1 * sum1) >> 12);  // >> 12 = / 64 * 64
-}
-
-unsigned int aom_variance16x8_neon(const unsigned char *src_ptr,
-                                   int source_stride,
-                                   const unsigned char *ref_ptr,
-                                   int recon_stride, unsigned int *sse) {
-  int i;
-  int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
-  uint32x2_t d0u32, d10u32;
-  int64x1_t d0s64, d1s64;
-  uint8x16_t q0u8, q1u8, q2u8, q3u8;
-  uint16x8_t q11u16, q12u16, q13u16, q14u16;
-  int32x4_t q8s32, q9s32, q10s32;
-  int64x2_t q0s64, q1s64, q5s64;
-
-  q8s32 = vdupq_n_s32(0);
-  q9s32 = vdupq_n_s32(0);
-  q10s32 = vdupq_n_s32(0);
-
-  for (i = 0; i < 4; i++) {
-    q0u8 = vld1q_u8(src_ptr);
-    src_ptr += source_stride;
-    q1u8 = vld1q_u8(src_ptr);
-    src_ptr += source_stride;
-    __builtin_prefetch(src_ptr);
-
-    q2u8 = vld1q_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    q3u8 = vld1q_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    __builtin_prefetch(ref_ptr);
-
-    q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
-    q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
-    q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
-    q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
-    d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-    d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-    q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
-    q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
-    q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
-    d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-    d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-    q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
-    q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
-    q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
-    d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-    d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-    q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
-    q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
-    q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
-
-    d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
-    d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
-    q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
-    q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
-    q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
-  }
-
-  q10s32 = vaddq_s32(q10s32, q9s32);
-  q0s64 = vpaddlq_s32(q8s32);
-  q1s64 = vpaddlq_s32(q10s32);
-
-  d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
-  d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-  q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), vreinterpret_s32_s64(d0s64));
-  vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
-
-  d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
-  d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
-
-  return vget_lane_u32(d0u32, 0);
-}
-
-unsigned int aom_variance8x16_neon(const unsigned char *src_ptr,
-                                   int source_stride,
-                                   const unsigned char *ref_ptr,
-                                   int recon_stride, unsigned int *sse) {
-  int i;
-  uint8x8_t d0u8, d2u8, d4u8, d6u8;
-  int16x4_t d22s16, d23s16, d24s16, d25s16;
-  uint32x2_t d0u32, d10u32;
-  int64x1_t d0s64, d1s64;
-  uint16x8_t q11u16, q12u16;
-  int32x4_t q8s32, q9s32, q10s32;
-  int64x2_t q0s64, q1s64, q5s64;
-
-  q8s32 = vdupq_n_s32(0);
-  q9s32 = vdupq_n_s32(0);
-  q10s32 = vdupq_n_s32(0);
-
-  for (i = 0; i < 8; i++) {
-    d0u8 = vld1_u8(src_ptr);
-    src_ptr += source_stride;
-    d2u8 = vld1_u8(src_ptr);
-    src_ptr += source_stride;
-    __builtin_prefetch(src_ptr);
-
-    d4u8 = vld1_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    d6u8 = vld1_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    __builtin_prefetch(ref_ptr);
-
-    q11u16 = vsubl_u8(d0u8, d4u8);
-    q12u16 = vsubl_u8(d2u8, d6u8);
-
-    d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-    d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-    q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
-    q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
-    q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
-    d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-    d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-    q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
-    q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
-    q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-  }
-
-  q10s32 = vaddq_s32(q10s32, q9s32);
-  q0s64 = vpaddlq_s32(q8s32);
-  q1s64 = vpaddlq_s32(q10s32);
-
-  d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
-  d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-  q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), vreinterpret_s32_s64(d0s64));
-  vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
-
-  d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
-  d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
-
-  return vget_lane_u32(d0u32, 0);
-}
-
-unsigned int aom_mse16x16_neon(const unsigned char *src_ptr, int source_stride,
-                               const unsigned char *ref_ptr, int recon_stride,
-                               unsigned int *sse) {
-  int i;
-  int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
-  int64x1_t d0s64;
-  uint8x16_t q0u8, q1u8, q2u8, q3u8;
-  int32x4_t q7s32, q8s32, q9s32, q10s32;
-  uint16x8_t q11u16, q12u16, q13u16, q14u16;
-  int64x2_t q1s64;
-
-  q7s32 = vdupq_n_s32(0);
-  q8s32 = vdupq_n_s32(0);
-  q9s32 = vdupq_n_s32(0);
-  q10s32 = vdupq_n_s32(0);
-
-  for (i = 0; i < 8; i++) {  // mse16x16_neon_loop
-    q0u8 = vld1q_u8(src_ptr);
-    src_ptr += source_stride;
-    q1u8 = vld1q_u8(src_ptr);
-    src_ptr += source_stride;
-    q2u8 = vld1q_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    q3u8 = vld1q_u8(ref_ptr);
-    ref_ptr += recon_stride;
-
-    q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
-    q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
-    q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
-    q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
-    d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-    d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-    q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
-    q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
-
-    d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-    d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-    q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
-    q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
-    d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-    d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-    q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
-    q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
-
-    d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
-    d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
-    q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
-    q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
-  }
-
-  q7s32 = vaddq_s32(q7s32, q8s32);
-  q9s32 = vaddq_s32(q9s32, q10s32);
-  q10s32 = vaddq_s32(q7s32, q9s32);
-
-  q1s64 = vpaddlq_s32(q10s32);
-  d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-  vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
-  return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
-}
-
-unsigned int aom_get4x4sse_cs_neon(const unsigned char *src_ptr,
-                                   int source_stride,
-                                   const unsigned char *ref_ptr,
-                                   int recon_stride) {
-  int16x4_t d22s16, d24s16, d26s16, d28s16;
-  int64x1_t d0s64;
-  uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
-  int32x4_t q7s32, q8s32, q9s32, q10s32;
-  uint16x8_t q11u16, q12u16, q13u16, q14u16;
-  int64x2_t q1s64;
-
-  d0u8 = vld1_u8(src_ptr);
-  src_ptr += source_stride;
-  d4u8 = vld1_u8(ref_ptr);
-  ref_ptr += recon_stride;
-  d1u8 = vld1_u8(src_ptr);
-  src_ptr += source_stride;
-  d5u8 = vld1_u8(ref_ptr);
-  ref_ptr += recon_stride;
-  d2u8 = vld1_u8(src_ptr);
-  src_ptr += source_stride;
-  d6u8 = vld1_u8(ref_ptr);
-  ref_ptr += recon_stride;
-  d3u8 = vld1_u8(src_ptr);
-  src_ptr += source_stride;
-  d7u8 = vld1_u8(ref_ptr);
-  ref_ptr += recon_stride;
-
-  q11u16 = vsubl_u8(d0u8, d4u8);
-  q12u16 = vsubl_u8(d1u8, d5u8);
-  q13u16 = vsubl_u8(d2u8, d6u8);
-  q14u16 = vsubl_u8(d3u8, d7u8);
-
-  d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
-  d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
-  d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
-  d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
-
-  q7s32 = vmull_s16(d22s16, d22s16);
-  q8s32 = vmull_s16(d24s16, d24s16);
-  q9s32 = vmull_s16(d26s16, d26s16);
-  q10s32 = vmull_s16(d28s16, d28s16);
-
-  q7s32 = vaddq_s32(q7s32, q8s32);
-  q9s32 = vaddq_s32(q9s32, q10s32);
-  q9s32 = vaddq_s32(q7s32, q9s32);
-
-  q1s64 = vpaddlq_s32(q9s32);
-  d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-  return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
-}
--- a/aom_dsp/bitreader.h
+++ b/aom_dsp/bitreader.h
@@ -1,240 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_BITREADER_H_
-#define AOM_DSP_BITREADER_H_
-
-#include <assert.h>
-#include <limits.h>
-
-#include "./aom_config.h"
-#if CONFIG_EC_ADAPT && !CONFIG_EC_MULTISYMBOL
-#error "CONFIG_EC_ADAPT is enabled without enabling CONFIG_EC_MULTISYMBOL."
-#endif
-
-#include "aom/aomdx.h"
-#include "aom/aom_integer.h"
-#if CONFIG_ANS
-#include "aom_dsp/ansreader.h"
-#elif CONFIG_DAALA_EC
-#include "aom_dsp/daalaboolreader.h"
-#else
-#include "aom_dsp/dkboolreader.h"
-#endif
-#include "aom_dsp/prob.h"
-#include "av1/common/odintrin.h"
-
-#if CONFIG_ACCOUNTING
-#include "av1/common/accounting.h"
-#define ACCT_STR_NAME acct_str
-#define ACCT_STR_PARAM , const char *ACCT_STR_NAME
-#define ACCT_STR_ARG(s) , s
-#else
-#define ACCT_STR_PARAM
-#define ACCT_STR_ARG(s)
-#endif
-
-#define aom_read(r, prob, ACCT_STR_NAME) \
-  aom_read_(r, prob ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_bit(r, ACCT_STR_NAME) \
-  aom_read_bit_(r ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_tree(r, tree, probs, ACCT_STR_NAME) \
-  aom_read_tree_(r, tree, probs ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_literal(r, bits, ACCT_STR_NAME) \
-  aom_read_literal_(r, bits ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_tree_bits(r, tree, probs, ACCT_STR_NAME) \
-  aom_read_tree_bits_(r, tree, probs ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_symbol(r, cdf, nsymbs, ACCT_STR_NAME) \
-  aom_read_symbol_(r, cdf, nsymbs ACCT_STR_ARG(ACCT_STR_NAME))
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if CONFIG_ANS
-typedef struct AnsDecoder aom_reader;
-#elif CONFIG_DAALA_EC
-typedef struct daala_reader aom_reader;
-#else
-typedef struct aom_dk_reader aom_reader;
-#endif
-
-static INLINE int aom_reader_init(aom_reader *r, const uint8_t *buffer,
-                                  size_t size, aom_decrypt_cb decrypt_cb,
-                                  void *decrypt_state) {
-#if CONFIG_ANS
-  (void)decrypt_cb;
-  (void)decrypt_state;
-  assert(size <= INT_MAX);
-  return ans_read_init(r, buffer, size);
-#elif CONFIG_DAALA_EC
-  (void)decrypt_cb;
-  (void)decrypt_state;
-  return aom_daala_reader_init(r, buffer, size);
-#else
-  return aom_dk_reader_init(r, buffer, size, decrypt_cb, decrypt_state);
-#endif
-}
-
-static INLINE const uint8_t *aom_reader_find_end(aom_reader *r) {
-#if CONFIG_ANS
-  (void)r;
-  assert(0 && "Use the raw buffer size with ANS");
-  return NULL;
-#elif CONFIG_DAALA_EC
-  return aom_daala_reader_find_end(r);
-#else
-  return aom_dk_reader_find_end(r);
-#endif
-}
-
-static INLINE int aom_reader_has_error(aom_reader *r) {
-#if CONFIG_ANS
-  return ans_reader_has_error(r);
-#elif CONFIG_DAALA_EC
-  return aom_daala_reader_has_error(r);
-#else
-  return aom_dk_reader_has_error(r);
-#endif
-}
-
-// Returns the position in the bit reader in bits.
-static INLINE uint32_t aom_reader_tell(const aom_reader *r) {
-#if CONFIG_ANS
-  (void)r;
-  assert(0 && "aom_reader_tell() is unimplemented for ANS");
-  return 0;
-#elif CONFIG_DAALA_EC
-  return aom_daala_reader_tell(r);
-#else
-  return aom_dk_reader_tell(r);
-#endif
-}
-
-// Returns the position in the bit reader in 1/8th bits.
-static INLINE uint32_t aom_reader_tell_frac(const aom_reader *r) {
-#if CONFIG_ANS
-  (void)r;
-  assert(0 && "aom_reader_tell_frac() is unimplemented for ANS");
-  return 0;
-#elif CONFIG_DAALA_EC
-  return aom_daala_reader_tell_frac(r);
-#else
-  return aom_dk_reader_tell_frac(r);
-#endif
-}
-
-#if CONFIG_ACCOUNTING
-static INLINE void aom_process_accounting(const aom_reader *r ACCT_STR_PARAM) {
-  if (r->accounting != NULL) {
-    uint32_t tell_frac;
-    tell_frac = aom_reader_tell_frac(r);
-    aom_accounting_record(r->accounting, ACCT_STR_NAME,
-                          tell_frac - r->accounting->last_tell_frac);
-    r->accounting->last_tell_frac = tell_frac;
-  }
-}
-#endif
-
-static INLINE int aom_read_(aom_reader *r, int prob ACCT_STR_PARAM) {
-  int ret;
-#if CONFIG_ANS
-  ret = uabs_read(r, prob);
-#elif CONFIG_DAALA_EC
-  ret = aom_daala_read(r, prob);
-#else
-  ret = aom_dk_read(r, prob);
-#endif
-#if CONFIG_ACCOUNTING
-  if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
-#endif
-  return ret;
-}
-
-static INLINE int aom_read_bit_(aom_reader *r ACCT_STR_PARAM) {
-  int ret;
-#if CONFIG_ANS
-  ret = uabs_read_bit(r);  // Non trivial optimization at half probability
-#else
-  ret = aom_read(r, 128, NULL);  // aom_prob_half
-#endif
-#if CONFIG_ACCOUNTING
-  if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
-#endif
-  return ret;
-}
-
-static INLINE int aom_read_literal_(aom_reader *r, int bits ACCT_STR_PARAM) {
-  int literal = 0, bit;
-
-  for (bit = bits - 1; bit >= 0; bit--) literal |= aom_read_bit(r, NULL) << bit;
-#if CONFIG_ACCOUNTING
-  if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
-#endif
-  return literal;
-}
-
-static INLINE int aom_read_tree_bits_(aom_reader *r, const aom_tree_index *tree,
-                                      const aom_prob *probs ACCT_STR_PARAM) {
-  aom_tree_index i = 0;
-
-  while ((i = tree[i + aom_read(r, probs[i >> 1], NULL)]) > 0) continue;
-#if CONFIG_ACCOUNTING
-  if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
-#endif
-  return -i;
-}
-
-static INLINE int aom_read_tree_(aom_reader *r, const aom_tree_index *tree,
-                                 const aom_prob *probs ACCT_STR_PARAM) {
-  int ret;
-#if CONFIG_DAALA_EC
-  ret = daala_read_tree_bits(r, tree, probs);
-#else
-  ret = aom_read_tree_bits(r, tree, probs, NULL);
-#endif
-#if CONFIG_ACCOUNTING
-  if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
-#endif
-  return ret;
-}
-
-#if CONFIG_EC_MULTISYMBOL
-static INLINE int aom_read_symbol_(aom_reader *r, aom_cdf_prob *cdf,
-                                   int nsymbs ACCT_STR_PARAM) {
-  int ret;
-#if CONFIG_RANS
-  (void)nsymbs;
-  ret = rans_read(r, cdf);
-#elif CONFIG_DAALA_EC
-  ret = daala_read_symbol(r, cdf, nsymbs);
-#else
-#error \
-    "CONFIG_EC_MULTISYMBOL is selected without a valid backing entropy " \
-  "coder. Enable daala_ec or ans for a valid configuration."
-#endif
-
-#if CONFIG_EC_ADAPT
-  update_cdf(cdf, ret, nsymbs);
-#endif
-
-#if CONFIG_ACCOUNTING
-  if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
-#endif
-  return ret;
-}
-#endif  // CONFIG_EC_MULTISYMBOL
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_BITREADER_H_
--- a/aom_dsp/bitreader_buffer.c
+++ b/aom_dsp/bitreader_buffer.c
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "./aom_config.h"
-#include "./bitreader_buffer.h"
-
-size_t aom_rb_bytes_read(struct aom_read_bit_buffer *rb) {
-  return (rb->bit_offset + 7) >> 3;
-}
-
-int aom_rb_read_bit(struct aom_read_bit_buffer *rb) {
-  const size_t off = rb->bit_offset;
-  const size_t p = off >> 3;
-  const int q = 7 - (int)(off & 0x7);
-  if (rb->bit_buffer + p < rb->bit_buffer_end) {
-    const int bit = (rb->bit_buffer[p] >> q) & 1;
-    rb->bit_offset = off + 1;
-    return bit;
-  } else {
-    rb->error_handler(rb->error_handler_data);
-    return 0;
-  }
-}
-
-int aom_rb_read_literal(struct aom_read_bit_buffer *rb, int bits) {
-  int value = 0, bit;
-  for (bit = bits - 1; bit >= 0; bit--) value |= aom_rb_read_bit(rb) << bit;
-  return value;
-}
-
-int aom_rb_read_signed_literal(struct aom_read_bit_buffer *rb, int bits) {
-  const int value = aom_rb_read_literal(rb, bits);
-  return aom_rb_read_bit(rb) ? -value : value;
-}
-
-int aom_rb_read_inv_signed_literal(struct aom_read_bit_buffer *rb, int bits) {
-  const int nbits = sizeof(unsigned) * 8 - bits - 1;
-  const unsigned value = (unsigned)aom_rb_read_literal(rb, bits + 1) << nbits;
-  return ((int)value) >> nbits;
-}
--- a/aom_dsp/bitreader_buffer.h
+++ b/aom_dsp/bitreader_buffer.h
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_BITREADER_BUFFER_H_
-#define AOM_DSP_BITREADER_BUFFER_H_
-
-#include <limits.h>
-
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef void (*aom_rb_error_handler)(void *data);
-
-struct aom_read_bit_buffer {
-  const uint8_t *bit_buffer;
-  const uint8_t *bit_buffer_end;
-  size_t bit_offset;
-
-  void *error_handler_data;
-  aom_rb_error_handler error_handler;
-};
-
-size_t aom_rb_bytes_read(struct aom_read_bit_buffer *rb);
-
-int aom_rb_read_bit(struct aom_read_bit_buffer *rb);
-
-int aom_rb_read_literal(struct aom_read_bit_buffer *rb, int bits);
-
-int aom_rb_read_signed_literal(struct aom_read_bit_buffer *rb, int bits);
-
-int aom_rb_read_inv_signed_literal(struct aom_read_bit_buffer *rb, int bits);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_BITREADER_BUFFER_H_
--- a/aom_dsp/bitwriter.h
+++ b/aom_dsp/bitwriter.h
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_BITWRITER_H_
-#define AOM_DSP_BITWRITER_H_
-
-#include <assert.h>
-#include "./aom_config.h"
-#if CONFIG_EC_ADAPT && !CONFIG_EC_MULTISYMBOL
-#error "CONFIG_EC_ADAPT is enabled without enabling CONFIG_EC_MULTISYMBOL"
-#endif
-
-#if CONFIG_ANS
-#include "aom_dsp/buf_ans.h"
-#elif CONFIG_DAALA_EC
-#include "aom_dsp/daalaboolwriter.h"
-#else
-#include "aom_dsp/dkboolwriter.h"
-#endif
-#include "aom_dsp/prob.h"
-
-#if CONFIG_RD_DEBUG
-#include "av1/encoder/cost.h"
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if CONFIG_ANS
-typedef struct BufAnsCoder aom_writer;
-#elif CONFIG_DAALA_EC
-typedef struct daala_writer aom_writer;
-#else
-typedef struct aom_dk_writer aom_writer;
-#endif
-
-typedef struct TOKEN_STATS { int64_t cost; } TOKEN_STATS;
-
-static INLINE void aom_start_encode(aom_writer *bc, uint8_t *buffer) {
-#if CONFIG_ANS
-  (void)bc;
-  (void)buffer;
-  assert(0 && "buf_ans requires a more complicated startup procedure");
-#elif CONFIG_DAALA_EC
-  aom_daala_start_encode(bc, buffer);
-#else
-  aom_dk_start_encode(bc, buffer);
-#endif
-}
-
-static INLINE void aom_stop_encode(aom_writer *bc) {
-#if CONFIG_ANS
-  (void)bc;
-  assert(0 && "buf_ans requires a more complicated shutdown procedure");
-#elif CONFIG_DAALA_EC
-  aom_daala_stop_encode(bc);
-#else
-  aom_dk_stop_encode(bc);
-#endif
-}
-
-static INLINE void aom_write(aom_writer *br, int bit, int probability) {
-#if CONFIG_ANS
-  buf_uabs_write(br, bit, probability);
-#elif CONFIG_DAALA_EC
-  aom_daala_write(br, bit, probability);
-#else
-  aom_dk_write(br, bit, probability);
-#endif
-}
-
-static INLINE void aom_write_record(aom_writer *br, int bit, int probability,
-                                    TOKEN_STATS *token_stats) {
-  aom_write(br, bit, probability);
-#if CONFIG_RD_DEBUG
-  token_stats->cost += av1_cost_bit(probability, bit);
-#else
-  (void)token_stats;
-#endif
-}
-
-static INLINE void aom_write_bit(aom_writer *w, int bit) {
-  aom_write(w, bit, 128);  // aom_prob_half
-}
-
-static INLINE void aom_write_bit_record(aom_writer *w, int bit,
-                                        TOKEN_STATS *token_stats) {
-  aom_write_record(w, bit, 128, token_stats);  // aom_prob_half
-}
-
-static INLINE void aom_write_literal(aom_writer *w, int data, int bits) {
-  int bit;
-
-  for (bit = bits - 1; bit >= 0; bit--) aom_write_bit(w, 1 & (data >> bit));
-}
-
-static INLINE void aom_write_tree_bits(aom_writer *w, const aom_tree_index *tr,
-                                       const aom_prob *probs, int bits, int len,
-                                       aom_tree_index i) {
-  do {
-    const int bit = (bits >> --len) & 1;
-    aom_write(w, bit, probs[i >> 1]);
-    i = tr[i + bit];
-  } while (len);
-}
-
-static INLINE void aom_write_tree_bits_record(aom_writer *w,
-                                              const aom_tree_index *tr,
-                                              const aom_prob *probs, int bits,
-                                              int len, aom_tree_index i,
-                                              TOKEN_STATS *token_stats) {
-  do {
-    const int bit = (bits >> --len) & 1;
-    aom_write_record(w, bit, probs[i >> 1], token_stats);
-    i = tr[i + bit];
-  } while (len);
-}
-
-static INLINE void aom_write_tree(aom_writer *w, const aom_tree_index *tree,
-                                  const aom_prob *probs, int bits, int len,
-                                  aom_tree_index i) {
-#if CONFIG_DAALA_EC
-  daala_write_tree_bits(w, tree, probs, bits, len, i);
-#else
-  aom_write_tree_bits(w, tree, probs, bits, len, i);
-#endif
-}
-
-static INLINE void aom_write_tree_record(aom_writer *w,
-                                         const aom_tree_index *tree,
-                                         const aom_prob *probs, int bits,
-                                         int len, aom_tree_index i,
-                                         TOKEN_STATS *token_stats) {
-#if CONFIG_DAALA_EC
-  (void)token_stats;
-  daala_write_tree_bits(w, tree, probs, bits, len, i);
-#else
-  aom_write_tree_bits_record(w, tree, probs, bits, len, i, token_stats);
-#endif
-}
-
-#if CONFIG_EC_MULTISYMBOL
-static INLINE void aom_write_symbol(aom_writer *w, int symb, aom_cdf_prob *cdf,
-                                    int nsymbs) {
-#if CONFIG_RANS
-  struct rans_sym s;
-  (void)nsymbs;
-  assert(cdf);
-  s.cum_prob = symb > 0 ? cdf[symb - 1] : 0;
-  s.prob = cdf[symb] - s.cum_prob;
-  buf_rans_write(w, &s);
-#elif CONFIG_DAALA_EC
-  daala_write_symbol(w, symb, cdf, nsymbs);
-#else
-#error \
-    "CONFIG_EC_MULTISYMBOL is selected without a valid backing entropy " \
-  "coder. Enable daala_ec or ans for a valid configuration."
-#endif
-
-#if CONFIG_EC_ADAPT
-  update_cdf(cdf, symb, nsymbs);
-#endif
-}
-#endif  // CONFIG_EC_MULTISYMBOL
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_BITWRITER_H_
--- a/aom_dsp/bitwriter_buffer.c
+++ b/aom_dsp/bitwriter_buffer.c
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <stdlib.h>
-
-#include "./aom_config.h"
-#include "./bitwriter_buffer.h"
-
-size_t aom_wb_bytes_written(const struct aom_write_bit_buffer *wb) {
-  return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0);
-}
-
-void aom_wb_write_bit(struct aom_write_bit_buffer *wb, int bit) {
-  const int off = (int)wb->bit_offset;
-  const int p = off / CHAR_BIT;
-  const int q = CHAR_BIT - 1 - off % CHAR_BIT;
-  if (q == CHAR_BIT - 1) {
-    wb->bit_buffer[p] = bit << q;
-  } else {
-    wb->bit_buffer[p] &= ~(1 << q);
-    wb->bit_buffer[p] |= bit << q;
-  }
-  wb->bit_offset = off + 1;
-}
-
-void aom_wb_write_literal(struct aom_write_bit_buffer *wb, int data, int bits) {
-  int bit;
-  for (bit = bits - 1; bit >= 0; bit--) aom_wb_write_bit(wb, (data >> bit) & 1);
-}
-
-void aom_wb_write_inv_signed_literal(struct aom_write_bit_buffer *wb, int data,
-                                     int bits) {
-  aom_wb_write_literal(wb, data, bits + 1);
-}
--- a/aom_dsp/bitwriter_buffer.h
+++ b/aom_dsp/bitwriter_buffer.h
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_BITWRITER_BUFFER_H_
-#define AOM_DSP_BITWRITER_BUFFER_H_
-
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct aom_write_bit_buffer {
-  uint8_t *bit_buffer;
-  size_t bit_offset;
-};
-
-size_t aom_wb_bytes_written(const struct aom_write_bit_buffer *wb);
-
-void aom_wb_write_bit(struct aom_write_bit_buffer *wb, int bit);
-
-void aom_wb_write_literal(struct aom_write_bit_buffer *wb, int data, int bits);
-
-void aom_wb_write_inv_signed_literal(struct aom_write_bit_buffer *wb, int data,
-                                     int bits);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_BITWRITER_BUFFER_H_
--- a/aom_dsp/blend.h
+++ b/aom_dsp/blend.h
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_BLEND_H_
-#define AOM_DSP_BLEND_H_
-
-#include "aom_ports/mem.h"
-
-// Various blending functions and macros.
-// See also the aom_blend_* functions in aom_dsp_rtcd.h
-
-// Alpha blending with alpha values from the range [0, 64], where 64
-// means use the first input and 0 means use the second input.
-
-#define AOM_BLEND_A64_ROUND_BITS 6
-#define AOM_BLEND_A64_MAX_ALPHA (1 << AOM_BLEND_A64_ROUND_BITS)  // 64
-
-#define AOM_BLEND_A64(a, v0, v1)                                          \
-  ROUND_POWER_OF_TWO((a) * (v0) + (AOM_BLEND_A64_MAX_ALPHA - (a)) * (v1), \
-                     AOM_BLEND_A64_ROUND_BITS)
-
-// Alpha blending with alpha values from the range [0, 256], where 256
-// means use the first input and 0 means use the second input.
-#define AOM_BLEND_A256_ROUND_BITS 8
-#define AOM_BLEND_A256_MAX_ALPHA (1 << AOM_BLEND_A256_ROUND_BITS)  // 256
-
-#define AOM_BLEND_A256(a, v0, v1)                                          \
-  ROUND_POWER_OF_TWO((a) * (v0) + (AOM_BLEND_A256_MAX_ALPHA - (a)) * (v1), \
-                     AOM_BLEND_A256_ROUND_BITS)
-
-// Blending by averaging.
-#define AOM_BLEND_AVG(v0, v1) ROUND_POWER_OF_TWO((v0) + (v1), 1)
-
-#endif  // AOM_DSP_BLEND_H_
--- a/aom_dsp/blend_a64_hmask.c
+++ b/aom_dsp/blend_a64_hmask.c
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/blend.h"
-
-#include "./aom_dsp_rtcd.h"
-
-void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride,
-                           const uint8_t *src0, uint32_t src0_stride,
-                           const uint8_t *src1, uint32_t src1_stride,
-                           const uint8_t *mask, int h, int w) {
-  int i, j;
-
-  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
-  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
-  assert(h >= 1);
-  assert(w >= 1);
-  assert(IS_POWER_OF_TWO(h));
-  assert(IS_POWER_OF_TWO(w));
-
-  for (i = 0; i < h; ++i) {
-    for (j = 0; j < w; ++j) {
-      dst[i * dst_stride + j] = AOM_BLEND_A64(
-          mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
-    }
-  }
-}
-
-#if CONFIG_AOM_HIGHBITDEPTH
-void aom_highbd_blend_a64_hmask_c(uint8_t *dst_8, uint32_t dst_stride,
-                                  const uint8_t *src0_8, uint32_t src0_stride,
-                                  const uint8_t *src1_8, uint32_t src1_stride,
-                                  const uint8_t *mask, int h, int w, int bd) {
-  int i, j;
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
-  const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
-  const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
-  (void)bd;
-
-  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
-  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
-  assert(h >= 1);
-  assert(w >= 1);
-  assert(IS_POWER_OF_TWO(h));
-  assert(IS_POWER_OF_TWO(w));
-
-  assert(bd == 8 || bd == 10 || bd == 12);
-
-  for (i = 0; i < h; ++i) {
-    for (j = 0; j < w; ++j) {
-      dst[i * dst_stride + j] = AOM_BLEND_A64(
-          mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
-    }
-  }
-}
-#endif  // CONFIG_AOM_HIGHBITDEPTH
--- a/aom_dsp/blend_a64_mask.c
+++ b/aom_dsp/blend_a64_mask.c
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_dsp/blend.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-#include "./aom_dsp_rtcd.h"
-
-// Blending with alpha mask. Mask values come from the range [0, 64],
-// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
-// be the same as dst, or dst can be different from both sources.
-
-void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride,
-                          const uint8_t *src0, uint32_t src0_stride,
-                          const uint8_t *src1, uint32_t src1_stride,
-                          const uint8_t *mask, uint32_t mask_stride, int h,
-                          int w, int subh, int subw) {
-  int i, j;
-
-  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
-  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
-  assert(h >= 1);
-  assert(w >= 1);
-  assert(IS_POWER_OF_TWO(h));
-  assert(IS_POWER_OF_TWO(w));
-
-  if (subw == 0 && subh == 0) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; ++j) {
-        const int m = mask[i * mask_stride + j];
-        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
-                                                src1[i * src1_stride + j]);
-      }
-    }
-  } else if (subw == 1 && subh == 1) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; ++j) {
-        const int m = ROUND_POWER_OF_TWO(
-            mask[(2 * i) * mask_stride + (2 * j)] +
-                mask[(2 * i + 1) * mask_stride + (2 * j)] +
-                mask[(2 * i) * mask_stride + (2 * j + 1)] +
-                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
-            2);
-        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
-                                                src1[i * src1_stride + j]);
-      }
-    }
-  } else if (subw == 1 && subh == 0) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; ++j) {
-        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
-                                    mask[i * mask_stride + (2 * j + 1)]);
-        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
-                                                src1[i * src1_stride + j]);
-      }
-    }
-  } else {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; ++j) {
-        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
-                                    mask[(2 * i + 1) * mask_stride + j]);
-        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
-                                                src1[i * src1_stride + j]);
-      }
-    }
-  }
-}
-
-#if CONFIG_AOM_HIGHBITDEPTH
-void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride,
-                                 const uint8_t *src0_8, uint32_t src0_stride,
-                                 const uint8_t *src1_8, uint32_t src1_stride,
-                                 const uint8_t *mask, uint32_t mask_stride,
-                                 int h, int w, int subh, int subw, int bd) {
-  int i, j;
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
-  const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
-  const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
-  (void)bd;
-
-  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
-  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
-  assert(h >= 1);
-  assert(w >= 1);
-  assert(IS_POWER_OF_TWO(h));
-  assert(IS_POWER_OF_TWO(w));
-
-  assert(bd == 8 || bd == 10 || bd == 12);
-
-  if (subw == 0 && subh == 0) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; ++j) {
-        const int m = mask[i * mask_stride + j];
-        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
-                                                src1[i * src1_stride + j]);
-      }
-    }
-  } else if (subw == 1 && subh == 1) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; ++j) {
-        const int m = ROUND_POWER_OF_TWO(
-            mask[(2 * i) * mask_stride + (2 * j)] +
-                mask[(2 * i + 1) * mask_stride + (2 * j)] +
-                mask[(2 * i) * mask_stride + (2 * j + 1)] +
-                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
-            2);
-        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
-                                                src1[i * src1_stride + j]);
-      }
-    }
-  } else if (subw == 1 && subh == 0) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; ++j) {
-        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
-                                    mask[i * mask_stride + (2 * j + 1)]);
-        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
-                                                src1[i * src1_stride + j]);
-      }
-    }
-  } else {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; ++j) {
-        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
-                                    mask[(2 * i + 1) * mask_stride + j]);
-        dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
-                                                src1[i * src1_stride + j]);
-      }
-    }
-  }
-}
-#endif  // CONFIG_AOM_HIGHBITDEPTH
--- a/aom_dsp/blend_a64_vmask.c
+++ b/aom_dsp/blend_a64_vmask.c
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/blend.h"
-
-#include "./aom_dsp_rtcd.h"
-
-void aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride,
-                           const uint8_t *src0, uint32_t src0_stride,
-                           const uint8_t *src1, uint32_t src1_stride,
-                           const uint8_t *mask, int h, int w) {
-  int i, j;
-
-  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
-  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
-  assert(h >= 1);
-  assert(w >= 1);
-  assert(IS_POWER_OF_TWO(h));
-  assert(IS_POWER_OF_TWO(w));
-
-  for (i = 0; i < h; ++i) {
-    const int m = mask[i];
-    for (j = 0; j < w; ++j) {
-      dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
-                                              src1[i * src1_stride + j]);
-    }
-  }
-}
-
-#if CONFIG_AOM_HIGHBITDEPTH
-void aom_highbd_blend_a64_vmask_c(uint8_t *dst_8, uint32_t dst_stride,
-                                  const uint8_t *src0_8, uint32_t src0_stride,
-                                  const uint8_t *src1_8, uint32_t src1_stride,
-                                  const uint8_t *mask, int h, int w, int bd) {
-  int i, j;
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
-  const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
-  const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
-  (void)bd;
-
-  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
-  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
-  assert(h >= 1);
-  assert(w >= 1);
-  assert(IS_POWER_OF_TWO(h));
-  assert(IS_POWER_OF_TWO(w));
-
-  assert(bd == 8 || bd == 10 || bd == 12);
-
-  for (i = 0; i < h; ++i) {
-    const int m = mask[i];
-    for (j = 0; j < w; ++j) {
-      dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
-                                              src1[i * src1_stride + j]);
-    }
-  }
-}
-#endif  // CONFIG_AOM_HIGHBITDEPTH
--- a/aom_dsp/buf_ans.c
+++ b/aom_dsp/buf_ans.c
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string.h>
-
-#include "aom_dsp/buf_ans.h"
-#include "aom_mem/aom_mem.h"
-#include "aom/internal/aom_codec_internal.h"
-
-void aom_buf_ans_alloc(struct BufAnsCoder *c,
-                       struct aom_internal_error_info *error, int size_hint) {
-  c->error = error;
-  c->size = size_hint;
-  AOM_CHECK_MEM_ERROR(error, c->buf, aom_malloc(c->size * sizeof(*c->buf)));
-  // Initialize to overfull to trigger the assert in write.
-  c->offset = c->size + 1;
-}
-
-void aom_buf_ans_free(struct BufAnsCoder *c) {
-  aom_free(c->buf);
-  c->buf = NULL;
-  c->size = 0;
-}
-
-void aom_buf_ans_grow(struct BufAnsCoder *c) {
-  struct buffered_ans_symbol *new_buf = NULL;
-  int new_size = c->size * 2;
-  AOM_CHECK_MEM_ERROR(c->error, new_buf,
-                      aom_malloc(new_size * sizeof(*new_buf)));
-  memcpy(new_buf, c->buf, c->size * sizeof(*c->buf));
-  aom_free(c->buf);
-  c->buf = new_buf;
-  c->size = new_size;
-}
--- a/aom_dsp/buf_ans.h
+++ b/aom_dsp/buf_ans.h
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_BUF_ANS_H_
-#define AOM_DSP_BUF_ANS_H_
-// Buffered forward ANS writer.
-// Symbols are written to the writer in forward (decode) order and serialized
-// backwards due to ANS's stack like behavior.
-
-#include <assert.h>
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/ans.h"
-#include "aom_dsp/answriter.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-#define ANS_METHOD_UABS 0
-#define ANS_METHOD_RANS 1
-
-struct buffered_ans_symbol {
-  unsigned int method : 1;  // one of ANS_METHOD_UABS or ANS_METHOD_RANS
-  // TODO(aconverse): Should be possible to write this in terms of start for ABS
-  unsigned int val_start : RANS_PROB_BITS;  // Boolean value for ABS
-                                            // start in symbol cycle for Rans
-  unsigned int prob : RANS_PROB_BITS;       // Probability of this symbol
-};
-
-struct BufAnsCoder {
-  struct aom_internal_error_info *error;
-  struct buffered_ans_symbol *buf;
-  int size;
-  int offset;
-};
-
-void aom_buf_ans_alloc(struct BufAnsCoder *c,
-                       struct aom_internal_error_info *error, int size_hint);
-
-void aom_buf_ans_free(struct BufAnsCoder *c);
-
-void aom_buf_ans_grow(struct BufAnsCoder *c);
-
-static INLINE void buf_ans_write_reset(struct BufAnsCoder *const c) {
-  c->offset = 0;
-}
-
-static INLINE void buf_uabs_write(struct BufAnsCoder *const c, uint8_t val,
-                                  AnsP8 prob) {
-  assert(c->offset <= c->size);
-  if (c->offset == c->size) {
-    aom_buf_ans_grow(c);
-  }
-  c->buf[c->offset].method = ANS_METHOD_UABS;
-  c->buf[c->offset].val_start = val;
-  c->buf[c->offset].prob = prob;
-  ++c->offset;
-}
-
-static INLINE void buf_rans_write(struct BufAnsCoder *const c,
-                                  const struct rans_sym *const sym) {
-  assert(c->offset <= c->size);
-  if (c->offset == c->size) {
-    aom_buf_ans_grow(c);
-  }
-  c->buf[c->offset].method = ANS_METHOD_RANS;
-  c->buf[c->offset].val_start = sym->cum_prob;
-  c->buf[c->offset].prob = sym->prob;
-  ++c->offset;
-}
-
-static INLINE void buf_ans_flush(const struct BufAnsCoder *const c,
-                                 struct AnsCoder *ans) {
-  int offset;
-  for (offset = c->offset - 1; offset >= 0; --offset) {
-    if (c->buf[offset].method == ANS_METHOD_RANS) {
-      struct rans_sym sym;
-      sym.prob = c->buf[offset].prob;
-      sym.cum_prob = c->buf[offset].val_start;
-      rans_write(ans, &sym);
-    } else {
-      uabs_write(ans, (uint8_t)c->buf[offset].val_start,
-                 (AnsP8)c->buf[offset].prob);
-    }
-  }
-}
-
-static INLINE void buf_uabs_write_bit(struct BufAnsCoder *c, int bit) {
-  buf_uabs_write(c, bit, 128);
-}
-
-static INLINE void buf_uabs_write_literal(struct BufAnsCoder *c, int literal,
-                                          int bits) {
-  int bit;
-
-  assert(bits < 31);
-  for (bit = bits - 1; bit >= 0; bit--)
-    buf_uabs_write_bit(c, 1 & (literal >> bit));
-}
-#ifdef __cplusplus
-}  // extern "C"
-#endif  // __cplusplus
-#endif  // AOM_DSP_BUF_ANS_H_
--- a/aom_dsp/daalaboolreader.c
+++ b/aom_dsp/daalaboolreader.c
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/daalaboolreader.h"
-
-int aom_daala_reader_init(daala_reader *r, const uint8_t *buffer, int size) {
-  if (size && !buffer) {
-    return 1;
-  }
-  r->buffer_end = buffer + size;
-  r->buffer = buffer;
-  od_ec_dec_init(&r->ec, buffer, size - 1);
-#if CONFIG_ACCOUNTING
-  r->accounting = NULL;
-#endif
-  return 0;
-}
-
-const uint8_t *aom_daala_reader_find_end(daala_reader *r) {
-  return r->buffer_end;
-}
-
-uint32_t aom_daala_reader_tell(const daala_reader *r) {
-  return od_ec_dec_tell(&r->ec);
-}
-
-uint32_t aom_daala_reader_tell_frac(const daala_reader *r) {
-  return od_ec_dec_tell_frac(&r->ec);
-}
--- a/aom_dsp/daalaboolreader.h
+++ b/aom_dsp/daalaboolreader.h
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_DAALABOOLREADER_H_
-#define AOM_DSP_DAALABOOLREADER_H_
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/entdec.h"
-#include "aom_dsp/prob.h"
-#if CONFIG_ACCOUNTING
-#include "av1/common/accounting.h"
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct daala_reader {
-  const uint8_t *buffer;
-  const uint8_t *buffer_end;
-  od_ec_dec ec;
-#if CONFIG_ACCOUNTING
-  Accounting *accounting;
-#endif
-};
-
-typedef struct daala_reader daala_reader;
-
-int aom_daala_reader_init(daala_reader *r, const uint8_t *buffer, int size);
-const uint8_t *aom_daala_reader_find_end(daala_reader *r);
-uint32_t aom_daala_reader_tell(const daala_reader *r);
-uint32_t aom_daala_reader_tell_frac(const daala_reader *r);
-
-static INLINE int aom_daala_read(daala_reader *r, int prob) {
-  if (prob == 128) {
-    return od_ec_dec_bits(&r->ec, 1, "aom_bits");
-  } else {
-    int p = ((prob << 15) + (256 - prob)) >> 8;
-    return od_ec_decode_bool_q15(&r->ec, p);
-  }
-}
-
-static INLINE int aom_daala_read_bit(daala_reader *r) {
-  return aom_daala_read(r, 128);
-}
-
-static INLINE int aom_daala_reader_has_error(daala_reader *r) {
-  return r->ec.error;
-}
-
-static INLINE int daala_read_tree_bits(daala_reader *r,
-                                       const aom_tree_index *tree,
-                                       const aom_prob *probs) {
-  aom_tree_index i = 0;
-  do {
-    aom_cdf_prob cdf[16];
-    aom_tree_index index[16];
-    int path[16];
-    int dist[16];
-    int nsymbs;
-    int symb;
-    nsymbs = tree_to_cdf(tree, probs, i, cdf, index, path, dist);
-    symb = od_ec_decode_cdf_q15(&r->ec, cdf, nsymbs);
-    OD_ASSERT(symb >= 0 && symb < nsymbs);
-    i = index[symb];
-  } while (i > 0);
-  return -i;
-}
-
-static INLINE int daala_read_symbol(daala_reader *r, const aom_cdf_prob *cdf,
-                                    int nsymbs) {
-  return od_ec_decode_cdf_q15(&r->ec, cdf, nsymbs);
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif
--- a/aom_dsp/daalaboolwriter.c
+++ b/aom_dsp/daalaboolwriter.c
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string.h>
-#include "aom_dsp/daalaboolwriter.h"
-
-void aom_daala_start_encode(daala_writer *br, uint8_t *source) {
-  br->buffer = source;
-  br->pos = 0;
-  od_ec_enc_init(&br->ec, 62025);
-}
-
-void aom_daala_stop_encode(daala_writer *br) {
-  uint32_t daala_bytes;
-  unsigned char *daala_data;
-  daala_data = od_ec_enc_done(&br->ec, &daala_bytes);
-  memcpy(br->buffer, daala_data, daala_bytes);
-  br->pos = daala_bytes;
-  /* Prevent ec bitstream from being detected as a superframe marker.
-     Must always be added, so that rawbits knows the exact length of the
-      bitstream. */
-  br->buffer[br->pos++] = 0;
-  od_ec_enc_clear(&br->ec);
-}
--- a/aom_dsp/daalaboolwriter.h
+++ b/aom_dsp/daalaboolwriter.h
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_DAALABOOLWRITER_H_
-#define AOM_DSP_DAALABOOLWRITER_H_
-
-#include "aom_dsp/entenc.h"
-#include "aom_dsp/prob.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct daala_writer {
-  unsigned int pos;
-  uint8_t *buffer;
-  od_ec_enc ec;
-};
-
-typedef struct daala_writer daala_writer;
-
-void aom_daala_start_encode(daala_writer *w, uint8_t *buffer);
-void aom_daala_stop_encode(daala_writer *w);
-
-static INLINE void aom_daala_write(daala_writer *w, int bit, int prob) {
-  if (prob == 128) {
-    od_ec_enc_bits(&w->ec, bit, 1);
-  } else {
-    int p = ((prob << 15) + (256 - prob)) >> 8;
-    od_ec_encode_bool_q15(&w->ec, bit, p);
-  }
-}
-
-static INLINE void daala_write_tree_bits(daala_writer *w,
-                                         const aom_tree_index *tree,
-                                         const aom_prob *probs, int bits,
-                                         int len, aom_tree_index i) {
-  aom_tree_index root;
-  root = i;
-  do {
-    aom_cdf_prob cdf[16];
-    aom_tree_index index[16];
-    int path[16];
-    int dist[16];
-    int nsymbs;
-    int symb;
-    int j;
-    /* Compute the CDF of the binary tree using the given probabilities. */
-    nsymbs = tree_to_cdf(tree, probs, root, cdf, index, path, dist);
-    /* Find the symbol to code. */
-    symb = -1;
-    for (j = 0; j < nsymbs; j++) {
-      /* If this symbol codes a leaf node,  */
-      if (index[j] <= 0) {
-        if (len == dist[j] && path[j] == bits) {
-          symb = j;
-          break;
-        }
-      } else {
-        if (len > dist[j] && path[j] == bits >> (len - dist[j])) {
-          symb = j;
-          break;
-        }
-      }
-    }
-    OD_ASSERT(symb != -1);
-    od_ec_encode_cdf_q15(&w->ec, symb, cdf, nsymbs);
-    bits &= (1 << (len - dist[symb])) - 1;
-    len -= dist[symb];
-  } while (len);
-}
-
-static INLINE void daala_write_symbol(daala_writer *w, int symb,
-                                      const aom_cdf_prob *cdf, int nsymbs) {
-  od_ec_encode_cdf_q15(&w->ec, symb, cdf, nsymbs);
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif
--- a/aom_dsp/deblock.c
+++ b/aom_dsp/deblock.c
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- *
- */
-
-#include <stdlib.h>
-#include "aom/aom_integer.h"
-
-const int16_t aom_rv[] = {
-  8,  5,  2,  2,  8,  12, 4,  9,  8,  3,  0,  3,  9,  0,  0,  0,  8,  3,  14,
-  4,  10, 1,  11, 14, 1,  14, 9,  6,  12, 11, 8,  6,  10, 0,  0,  8,  9,  0,
-  3,  14, 8,  11, 13, 4,  2,  9,  0,  3,  9,  6,  1,  2,  3,  14, 13, 1,  8,
-  2,  9,  7,  3,  3,  1,  13, 13, 6,  6,  5,  2,  7,  11, 9,  11, 8,  7,  3,
-  2,  0,  13, 13, 14, 4,  12, 5,  12, 10, 8,  10, 13, 10, 4,  14, 4,  10, 0,
-  8,  11, 1,  13, 7,  7,  14, 6,  14, 13, 2,  13, 5,  4,  4,  0,  10, 0,  5,
-  13, 2,  12, 7,  11, 13, 8,  0,  4,  10, 7,  2,  7,  2,  2,  5,  3,  4,  7,
-  3,  3,  14, 14, 5,  9,  13, 3,  14, 3,  6,  3,  0,  11, 8,  13, 1,  13, 1,
-  12, 0,  10, 9,  7,  6,  2,  8,  5,  2,  13, 7,  1,  13, 14, 7,  6,  7,  9,
-  6,  10, 11, 7,  8,  7,  5,  14, 8,  4,  4,  0,  8,  7,  10, 0,  8,  14, 11,
-  3,  12, 5,  7,  14, 3,  14, 5,  2,  6,  11, 12, 12, 8,  0,  11, 13, 1,  2,
-  0,  5,  10, 14, 7,  8,  0,  4,  11, 0,  8,  0,  3,  10, 5,  8,  0,  11, 6,
-  7,  8,  10, 7,  13, 9,  2,  5,  1,  5,  10, 2,  4,  3,  5,  6,  10, 8,  9,
-  4,  11, 14, 0,  10, 0,  5,  13, 2,  12, 7,  11, 13, 8,  0,  4,  10, 7,  2,
-  7,  2,  2,  5,  3,  4,  7,  3,  3,  14, 14, 5,  9,  13, 3,  14, 3,  6,  3,
-  0,  11, 8,  13, 1,  13, 1,  12, 0,  10, 9,  7,  6,  2,  8,  5,  2,  13, 7,
-  1,  13, 14, 7,  6,  7,  9,  6,  10, 11, 7,  8,  7,  5,  14, 8,  4,  4,  0,
-  8,  7,  10, 0,  8,  14, 11, 3,  12, 5,  7,  14, 3,  14, 5,  2,  6,  11, 12,
-  12, 8,  0,  11, 13, 1,  2,  0,  5,  10, 14, 7,  8,  0,  4,  11, 0,  8,  0,
-  3,  10, 5,  8,  0,  11, 6,  7,  8,  10, 7,  13, 9,  2,  5,  1,  5,  10, 2,
-  4,  3,  5,  6,  10, 8,  9,  4,  11, 14, 3,  8,  3,  7,  8,  5,  11, 4,  12,
-  3,  11, 9,  14, 8,  14, 13, 4,  3,  1,  2,  14, 6,  5,  4,  4,  11, 4,  6,
-  2,  1,  5,  8,  8,  12, 13, 5,  14, 10, 12, 13, 0,  9,  5,  5,  11, 10, 13,
-  9,  10, 13,
-};
-
-void aom_post_proc_down_and_across_mb_row_c(unsigned char *src_ptr,
-                                            unsigned char *dst_ptr,
-                                            int src_pixels_per_line,
-                                            int dst_pixels_per_line, int cols,
-                                            unsigned char *f, int size) {
-  unsigned char *p_src, *p_dst;
-  int row;
-  int col;
-  unsigned char v;
-  unsigned char d[4];
-
-  for (row = 0; row < size; row++) {
-    /* post_proc_down for one row */
-    p_src = src_ptr;
-    p_dst = dst_ptr;
-
-    for (col = 0; col < cols; col++) {
-      unsigned char p_above2 = p_src[col - 2 * src_pixels_per_line];
-      unsigned char p_above1 = p_src[col - src_pixels_per_line];
-      unsigned char p_below1 = p_src[col + src_pixels_per_line];
-      unsigned char p_below2 = p_src[col + 2 * src_pixels_per_line];
-
-      v = p_src[col];
-
-      if ((abs(v - p_above2) < f[col]) && (abs(v - p_above1) < f[col]) &&
-          (abs(v - p_below1) < f[col]) && (abs(v - p_below2) < f[col])) {
-        unsigned char k1, k2, k3;
-        k1 = (p_above2 + p_above1 + 1) >> 1;
-        k2 = (p_below2 + p_below1 + 1) >> 1;
-        k3 = (k1 + k2 + 1) >> 1;
-        v = (k3 + v + 1) >> 1;
-      }
-
-      p_dst[col] = v;
-    }
-
-    /* now post_proc_across */
-    p_src = dst_ptr;
-    p_dst = dst_ptr;
-
-    p_src[-2] = p_src[-1] = p_src[0];
-    p_src[cols] = p_src[cols + 1] = p_src[cols - 1];
-
-    for (col = 0; col < cols; col++) {
-      v = p_src[col];
-
-      if ((abs(v - p_src[col - 2]) < f[col]) &&
-          (abs(v - p_src[col - 1]) < f[col]) &&
-          (abs(v - p_src[col + 1]) < f[col]) &&
-          (abs(v - p_src[col + 2]) < f[col])) {
-        unsigned char k1, k2, k3;
-        k1 = (p_src[col - 2] + p_src[col - 1] + 1) >> 1;
-        k2 = (p_src[col + 2] + p_src[col + 1] + 1) >> 1;
-        k3 = (k1 + k2 + 1) >> 1;
-        v = (k3 + v + 1) >> 1;
-      }
-
-      d[col & 3] = v;
-
-      if (col >= 2) p_dst[col - 2] = d[(col - 2) & 3];
-    }
-
-    /* handle the last two pixels */
-    p_dst[col - 2] = d[(col - 2) & 3];
-    p_dst[col - 1] = d[(col - 1) & 3];
-
-    /* next row */
-    src_ptr += src_pixels_per_line;
-    dst_ptr += dst_pixels_per_line;
-  }
-}
-
-void aom_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows,
-                                 int cols, int flimit) {
-  int r, c, i;
-
-  unsigned char *s = src;
-  unsigned char d[16];
-
-  for (r = 0; r < rows; r++) {
-    int sumsq = 0;
-    int sum = 0;
-
-    for (i = -8; i < 0; i++) s[i] = s[0];
-
-    /* 17 avoids valgrind warning - we buffer values in c in d
-     * and only write them when we've read 8 ahead...
-     */
-    for (i = 0; i < 17; i++) s[i + cols] = s[cols - 1];
-
-    for (i = -8; i <= 6; i++) {
-      sumsq += s[i] * s[i];
-      sum += s[i];
-      d[i + 8] = 0;
-    }
-
-    for (c = 0; c < cols + 8; c++) {
-      int x = s[c + 7] - s[c - 8];
-      int y = s[c + 7] + s[c - 8];
-
-      sum += x;
-      sumsq += x * y;
-
-      d[c & 15] = s[c];
-
-      if (sumsq * 15 - sum * sum < flimit) {
-        d[c & 15] = (8 + sum + s[c]) >> 4;
-      }
-
-      s[c - 8] = d[(c - 8) & 15];
-    }
-
-    s += pitch;
-  }
-}
-
-void aom_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,
-                            int flimit) {
-  int r, c, i;
-  const int16_t *rv3 = &aom_rv[63 & rand()];
-
-  for (c = 0; c < cols; c++) {
-    unsigned char *s = &dst[c];
-    int sumsq = 0;
-    int sum = 0;
-    unsigned char d[16];
-    const int16_t *rv2 = rv3 + ((c * 17) & 127);
-
-    for (i = -8; i < 0; i++) s[i * pitch] = s[0];
-
-    /* 17 avoids valgrind warning - we buffer values in c in d
-     * and only write them when we've read 8 ahead...
-     */
-    for (i = 0; i < 17; i++) s[(i + rows) * pitch] = s[(rows - 1) * pitch];
-
-    for (i = -8; i <= 6; i++) {
-      sumsq += s[i * pitch] * s[i * pitch];
-      sum += s[i * pitch];
-    }
-
-    for (r = 0; r < rows + 8; r++) {
-      sumsq += s[7 * pitch] * s[7 * pitch] - s[-8 * pitch] * s[-8 * pitch];
-      sum += s[7 * pitch] - s[-8 * pitch];
-      d[r & 15] = s[0];
-
-      if (sumsq * 15 - sum * sum < flimit) {
-        d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4;
-      }
-      if (r >= 8) s[-8 * pitch] = d[(r - 8) & 15];
-      s += pitch;
-    }
-  }
-}
--- a/aom_dsp/dkboolreader.h
+++ b/aom_dsp/dkboolreader.h
@@ -1,180 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_DKBOOLREADER_H_
-#define AOM_DSP_DKBOOLREADER_H_
-
-#include <assert.h>
-#include <stddef.h>
-#include <limits.h>
-
-#include "./aom_config.h"
-#if CONFIG_BITSTREAM_DEBUG
-#include <assert.h>
-#include <stdio.h>
-#include "aom_util/debug_util.h"
-#endif  // CONFIG_BITSTREAM_DEBUG
-
-#include "aom_ports/mem.h"
-#include "aom/aomdx.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/prob.h"
-#if CONFIG_ACCOUNTING
-#include "av1/common/accounting.h"
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef size_t BD_VALUE;
-
-#define BD_VALUE_SIZE ((int)sizeof(BD_VALUE) * CHAR_BIT)
-
-// This is meant to be a large, positive constant that can still be efficiently
-// loaded as an immediate (on platforms like ARM, for example).
-// Even relatively modest values like 100 would work fine.
-#define LOTS_OF_BITS 0x40000000
-
-struct aom_dk_reader {
-  // Be careful when reordering this struct, it may impact the cache negatively.
-  BD_VALUE value;
-  unsigned int range;
-  int count;
-  const uint8_t *buffer_start;
-  const uint8_t *buffer_end;
-  const uint8_t *buffer;
-  aom_decrypt_cb decrypt_cb;
-  void *decrypt_state;
-  uint8_t clear_buffer[sizeof(BD_VALUE) + 1];
-#if CONFIG_ACCOUNTING
-  Accounting *accounting;
-#endif
-};
-
-int aom_dk_reader_init(struct aom_dk_reader *r, const uint8_t *buffer,
-                       size_t size, aom_decrypt_cb decrypt_cb,
-                       void *decrypt_state);
-
-void aom_dk_reader_fill(struct aom_dk_reader *r);
-
-const uint8_t *aom_dk_reader_find_end(struct aom_dk_reader *r);
-
-static INLINE uint32_t aom_dk_reader_tell(const struct aom_dk_reader *r) {
-  const uint32_t bits_read = (r->buffer - r->buffer_start) * CHAR_BIT;
-  const int count =
-      (r->count < LOTS_OF_BITS) ? r->count : r->count - LOTS_OF_BITS;
-  assert(r->buffer >= r->buffer_start);
-  return bits_read - (count + CHAR_BIT);
-}
-
-/*The resolution of fractional-precision bit usage measurements, i.e.,
-   3 => 1/8th bits.*/
-#define DK_BITRES (3)
-
-static INLINE uint32_t aom_dk_reader_tell_frac(const struct aom_dk_reader *r) {
-  uint32_t num_bits;
-  uint32_t range;
-  int l;
-  int i;
-  num_bits = aom_dk_reader_tell(r) << DK_BITRES;
-  range = r->range;
-  l = 0;
-  for (i = DK_BITRES; i-- > 0;) {
-    int b;
-    range = range * range >> 7;
-    b = (int)(range >> 8);
-    l = l << 1 | b;
-    range >>= b;
-  }
-  return num_bits - l;
-}
-
-static INLINE int aom_dk_reader_has_error(struct aom_dk_reader *r) {
-  // Check if we have reached the end of the buffer.
-  //
-  // Variable 'count' stores the number of bits in the 'value' buffer, minus
-  // 8. The top byte is part of the algorithm, and the remainder is buffered
-  // to be shifted into it. So if count == 8, the top 16 bits of 'value' are
-  // occupied, 8 for the algorithm and 8 in the buffer.
-  //
-  // When reading a byte from the user's buffer, count is filled with 8 and
-  // one byte is filled into the value buffer. When we reach the end of the
-  // data, count is additionally filled with LOTS_OF_BITS. So when
-  // count == LOTS_OF_BITS - 1, the user's data has been exhausted.
-  //
-  // 1 if we have tried to decode bits after the end of stream was encountered.
-  // 0 No error.
-  return r->count > BD_VALUE_SIZE && r->count < LOTS_OF_BITS;
-}
-
-static INLINE int aom_dk_read(struct aom_dk_reader *r, int prob) {
-  unsigned int bit = 0;
-  BD_VALUE value;
-  BD_VALUE bigsplit;
-  int count;
-  unsigned int range;
-  unsigned int split = (r->range * prob + (256 - prob)) >> CHAR_BIT;
-
-  if (r->count < 0) aom_dk_reader_fill(r);
-
-  value = r->value;
-  count = r->count;
-
-  bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT);
-
-  range = split;
-
-  if (value >= bigsplit) {
-    range = r->range - split;
-    value = value - bigsplit;
-    bit = 1;
-  }
-
-  {
-    register int shift = aom_norm[range];
-    range <<= shift;
-    value <<= shift;
-    count -= shift;
-  }
-  r->value = value;
-  r->count = count;
-  r->range = range;
-
-#if CONFIG_BITSTREAM_DEBUG
-  {
-    int ref_bit, ref_prob;
-    const int queue_r = bitstream_queue_get_read();
-    const int frame_idx = bitstream_queue_get_frame_read();
-    bitstream_queue_pop(&ref_bit, &ref_prob);
-    if (prob != ref_prob) {
-      fprintf(
-          stderr,
-          "\n *** prob error, frame_idx_r %d prob %d ref_prob %d queue_r %d\n",
-          frame_idx, prob, ref_prob, queue_r);
-      assert(0);
-    }
-    if ((int)bit != ref_bit) {
-      fprintf(stderr, "\n *** bit error, frame_idx_r %d bit %d ref_bit %d\n",
-              frame_idx, bit, ref_bit);
-      assert(0);
-    }
-  }
-#endif  // CONFIG_BITSTREAM_DEBUG
-
-  return bit;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_DKBOOLREADER_H_
--- a/aom_dsp/dkboolwriter.c
+++ b/aom_dsp/dkboolwriter.c
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "./dkboolwriter.h"
-
-static INLINE void aom_dk_write_bit(aom_dk_writer *w, int bit) {
-  aom_dk_write(w, bit, 128);  // aom_prob_half
-}
-
-void aom_dk_start_encode(aom_dk_writer *br, uint8_t *source) {
-  br->lowvalue = 0;
-  br->range = 255;
-  br->count = -24;
-  br->buffer = source;
-  br->pos = 0;
-  aom_dk_write_bit(br, 0);
-}
-
-void aom_dk_stop_encode(aom_dk_writer *br) {
-  int i;
-
-#if CONFIG_BITSTREAM_DEBUG
-  bitstream_queue_set_skip_write(1);
-#endif  // CONFIG_BITSTREAM_DEBUG
-
-  for (i = 0; i < 32; i++) aom_dk_write_bit(br, 0);
-
-#if CONFIG_BITSTREAM_DEBUG
-  bitstream_queue_set_skip_write(0);
-#endif  // CONFIG_BITSTREAM_DEBUG
-
-  // Ensure there's no ambigous collision with any index marker bytes
-  if ((br->buffer[br->pos - 1] & 0xe0) == 0xc0) br->buffer[br->pos++] = 0;
-}
--- a/aom_dsp/dkboolwriter.h
+++ b/aom_dsp/dkboolwriter.h
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_DKBOOLWRITER_H_
-#define AOM_DSP_DKBOOLWRITER_H_
-
-#include "./aom_config.h"
-
-#if CONFIG_BITSTREAM_DEBUG
-#include <stdio.h>
-#include "aom_util/debug_util.h"
-#endif  // CONFIG_BITSTREAM_DEBUG
-
-#include "aom_dsp/prob.h"
-#include "aom_ports/mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct aom_dk_writer {
-  unsigned int lowvalue;
-  unsigned int range;
-  int count;
-  unsigned int pos;
-  uint8_t *buffer;
-} aom_dk_writer;
-
-void aom_dk_start_encode(aom_dk_writer *bc, uint8_t *buffer);
-void aom_dk_stop_encode(aom_dk_writer *bc);
-
-static INLINE void aom_dk_write(aom_dk_writer *br, int bit, int probability) {
-  unsigned int split;
-  int count = br->count;
-  unsigned int range = br->range;
-  unsigned int lowvalue = br->lowvalue;
-  register int shift;
-
-#if CONFIG_BITSTREAM_DEBUG
-  // int queue_r = 0;
-  // int frame_idx_r = 0;
-  // int queue_w = bitstream_queue_get_write();
-  // int frame_idx_w = bitstream_queue_get_frame_write();
-  // if (frame_idx_w == frame_idx_r && queue_w == queue_r) {
-  //   fprintf(stderr, "\n *** bitstream queue at frame_idx_w %d queue_w %d\n",
-  //   frame_idx_w, queue_w);
-  // }
-  bitstream_queue_push(bit, probability);
-#endif  // CONFIG_BITSTREAM_DEBUG
-
-  split = 1 + (((range - 1) * probability) >> 8);
-
-  range = split;
-
-  if (bit) {
-    lowvalue += split;
-    range = br->range - split;
-  }
-
-  shift = aom_norm[range];
-
-  range <<= shift;
-  count += shift;
-
-  if (count >= 0) {
-    int offset = shift - count;
-
-    if ((lowvalue << (offset - 1)) & 0x80000000) {
-      int x = br->pos - 1;
-
-      while (x >= 0 && br->buffer[x] == 0xff) {
-        br->buffer[x] = 0;
-        x--;
-      }
-
-      br->buffer[x] += 1;
-    }
-
-    br->buffer[br->pos++] = (lowvalue >> (24 - offset));
-    lowvalue <<= offset;
-    shift = count;
-    lowvalue &= 0xffffff;
-    count -= 8;
-  }
-
-  lowvalue <<= shift;
-  br->count = count;
-  br->lowvalue = lowvalue;
-  br->range = range;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_DKBOOLWRITER_H_
--- a/aom_dsp/entcode.c
+++ b/aom_dsp/entcode.c
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifdef HAVE_CONFIG_H
-#include "./config.h"
-#endif
-
-#include "aom_dsp/entcode.h"
-
-/*CDFs for uniform probability distributions of small sizes (2 through 16,
-   inclusive).*/
-// clang-format off
-const uint16_t OD_UNIFORM_CDFS_Q15[135] = {
-  16384, 32768,
-  10923, 21845, 32768,
-  8192,  16384, 24576, 32768,
-  6554,  13107, 19661, 26214, 32768,
-  5461,  10923, 16384, 21845, 27307, 32768,
-  4681,   9362, 14043, 18725, 23406, 28087, 32768,
-  4096,   8192, 12288, 16384, 20480, 24576, 28672, 32768,
-  3641,   7282, 10923, 14564, 18204, 21845, 25486, 29127, 32768,
-  3277,   6554,  9830, 13107, 16384, 19661, 22938, 26214, 29491, 32768,
-  2979,   5958,  8937, 11916, 14895, 17873, 20852, 23831, 26810, 29789, 32768,
-  2731,   5461,  8192, 10923, 13653, 16384, 19115, 21845, 24576, 27307, 30037,
-  32768,
-  2521,   5041,  7562, 10082, 12603, 15124, 17644, 20165, 22686, 25206, 27727,
-  30247, 32768,
-  2341,   4681,  7022,  9362, 11703, 14043, 16384, 18725, 21065, 23406, 25746,
-  28087, 30427, 32768,
-  2185,   4369,  6554,  8738, 10923, 13107, 15292, 17476, 19661, 21845, 24030,
-  26214, 28399, 30583, 32768,
-  2048,   4096,  6144,  8192, 10240, 12288, 14336, 16384, 18432, 20480, 22528,
-  24576, 26624, 28672, 30720, 32768
-};
-// clang-format on
-
-/*Given the current total integer number of bits used and the current value of
-   rng, computes the fraction number of bits used to OD_BITRES precision.
-  This is used by od_ec_enc_tell_frac() and od_ec_dec_tell_frac().
-  nbits_total: The number of whole bits currently used, i.e., the value
-                returned by od_ec_enc_tell() or od_ec_dec_tell().
-  rng: The current value of rng from either the encoder or decoder state.
-  Return: The number of bits scaled by 2**OD_BITRES.
-          This will always be slightly larger than the exact value (e.g., all
-           rounding error is in the positive direction).*/
-uint32_t od_ec_tell_frac(uint32_t nbits_total, uint32_t rng) {
-  uint32_t nbits;
-  int l;
-  int i;
-  /*To handle the non-integral number of bits still left in the encoder/decoder
-     state, we compute the worst-case number of bits of val that must be
-     encoded to ensure that the value is inside the range for any possible
-     subsequent bits.
-    The computation here is independent of val itself (the decoder does not
-     even track that value), even though the real number of bits used after
-     od_ec_enc_done() may be 1 smaller if rng is a power of two and the
-     corresponding trailing bits of val are all zeros.
-    If we did try to track that special case, then coding a value with a
-     probability of 1/(1 << n) might sometimes appear to use more than n bits.
-    This may help explain the surprising result that a newly initialized
-     encoder or decoder claims to have used 1 bit.*/
-  nbits = nbits_total << OD_BITRES;
-  l = 0;
-  for (i = OD_BITRES; i-- > 0;) {
-    int b;
-    rng = rng * rng >> 15;
-    b = (int)(rng >> 16);
-    l = l << 1 | b;
-    rng >>= b;
-  }
-  return nbits - l;
-}
--- a/aom_dsp/entcode.h
+++ b/aom_dsp/entcode.h
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if !defined(_entcode_H)
-#define _entcode_H (1)
-#include <limits.h>
-#include <stddef.h>
-#include "av1/common/odintrin.h"
-
-/*Set this flag 1 to enable a "reduced overhead" version of the entropy coder.
-  This uses a partition function that more accurately follows the input
-   probability estimates at the expense of some additional CPU cost (though
-   still an order of magnitude less than a full division).
-
-  In classic arithmetic coding, the partition function maps a value x in the
-   range [0, ft] to a value in y in [0, r] with 0 < ft <= r via
-    y = x*r/ft.
-  Any deviation from this value increases coding inefficiency.
-
-  To avoid divisions, we require ft <= r < 2*ft (enforcing it by shifting up
-   ft if necessary), and replace that function with
-    y = x + OD_MINI(x, r - ft).
-  This counts values of x smaller than r - ft double compared to values larger
-   than r - ft, which over-estimates the probability of symbols at the start of
-   the alphabet, and under-estimates the probability of symbols at the end of
-   the alphabet.
-  The overall coding inefficiency assuming accurate probability models and
-   independent symbols is in the 1% range, which is similar to that of CABAC.
-
-  To reduce overhead even further, we split this into two cases:
-  1) r - ft > ft - (r - ft).
-     That is, we have more values of x that are double-counted than
-      single-counted.
-     In this case, we still double-count the first 2*r - 3*ft values of x, but
-      after that we alternate between single-counting and double-counting for
-      the rest.
-  2) r - ft < ft - (r - ft).
-     That is, we have more values of x that are single-counted than
-      double-counted.
-     In this case, we alternate between single-counting and double-counting for
-      the first 2*(r - ft) values of x, and single-count the rest.
-  For two equiprobable symbols in different places in the alphabet, this
-   reduces the maximum ratio of over-estimation to under-estimation from 2:1
-   for the previous partition function to either 4:3 or 3:2 (for each of the
-   two cases above, respectively), assuming symbol probabilities significantly
-   greater than 1/32768.
-  That reduces the worst-case per-symbol overhead from 1 bit to 0.58 bits.
-
-  The resulting function is
-    e = OD_MAXI(2*r - 3*ft, 0);
-    y = x + OD_MINI(x, e) + OD_MINI(OD_MAXI(x - e, 0) >> 1, r - ft).
-  Here, e is a value that is greater than 0 in case 1, and 0 in case 2.
-  This function is about 3 times as expensive to evaluate as the high-overhead
-   version, but still an order of magnitude cheaper than a division, since it
-   is composed only of very simple operations.
-  Because we want to fit in 16-bit registers and must use unsigned values to do
-   so, we use saturating subtraction to enforce the maximums with 0.
-
-  Enabling this reduces the measured overhead in ectest from 0.805% to 0.621%
-   (vs. 0.022% for the division-based partition function with r much greater
-   than ft).
-  It improves performance on ntt-short-1 by about 0.3%.*/
-#define OD_EC_REDUCED_OVERHEAD (1)
-
-/*OPT: od_ec_window must be at least 32 bits, but if you have fast arithmetic
-   on a larger type, you can speed up the decoder by using it here.*/
-typedef uint32_t od_ec_window;
-
-#define OD_EC_WINDOW_SIZE ((int)sizeof(od_ec_window) * CHAR_BIT)
-
-/*Unsigned subtraction with unsigned saturation.
-  This implementation of the macro is intentionally chosen to increase the
-   number of common subexpressions in the reduced-overhead partition function.
-  This matters for C code, but it would not for hardware with a saturating
-   subtraction instruction.*/
-#define OD_SUBSATU(a, b) ((a)-OD_MINI(a, b))
-
-/*The number of bits to use for the range-coded part of unsigned integers.*/
-#define OD_EC_UINT_BITS (4)
-
-/*The resolution of fractional-precision bit usage measurements, i.e.,
-   3 => 1/8th bits.*/
-#define OD_BITRES (3)
-
-extern const uint16_t OD_UNIFORM_CDFS_Q15[135];
-
-/*Returns a Q15 CDF for a uniform probability distribution of the given size.
-  n: The size of the distribution.
-     This must be at least 2, and no more than 16.*/
-#define OD_UNIFORM_CDF_Q15(n) (OD_UNIFORM_CDFS_Q15 + ((n) * ((n)-1) >> 1) - 1)
-
-/*See entcode.c for further documentation.*/
-
-OD_WARN_UNUSED_RESULT uint32_t od_ec_tell_frac(uint32_t nbits_total,
-                                               uint32_t rng);
-
-#endif
--- a/aom_dsp/entdec.c
+++ b/aom_dsp/entdec.c
@@ -1,494 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifdef HAVE_CONFIG_H
-#include "./config.h"
-#endif
-
-#include "aom_dsp/entdec.h"
-
-/*A range decoder.
-  This is an entropy decoder based upon \cite{Mar79}, which is itself a
-   rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
-  It is very similar to arithmetic encoding, except that encoding is done with
-   digits in any base, instead of with bits, and so it is faster when using
-   larger bases (i.e.: a byte).
-  The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
-   is the base, longer than the theoretical optimum, but to my knowledge there
-   is no published justification for this claim.
-  This only seems true when using near-infinite precision arithmetic so that
-   the process is carried out with no rounding errors.
-
-  An excellent description of implementation details is available at
-   http://www.arturocampos.com/ac_range.html
-  A recent work \cite{MNW98} which proposes several changes to arithmetic
-   encoding for efficiency actually re-discovers many of the principles
-   behind range encoding, and presents a good theoretical analysis of them.
-
-  End of stream is handled by writing out the smallest number of bits that
-   ensures that the stream will be correctly decoded regardless of the value of
-   any subsequent bits.
-  od_ec_dec_tell() can be used to determine how many bits were needed to decode
-   all the symbols thus far; other data can be packed in the remaining bits of
-   the input buffer.
-  @PHDTHESIS{Pas76,
-    author="Richard Clark Pasco",
-    title="Source coding algorithms for fast data compression",
-    school="Dept. of Electrical Engineering, Stanford University",
-    address="Stanford, CA",
-    month=May,
-    year=1976,
-    URL="http://www.richpasco.org/scaffdc.pdf"
-  }
-  @INPROCEEDINGS{Mar79,
-   author="Martin, G.N.N.",
-   title="Range encoding: an algorithm for removing redundancy from a digitised
-    message",
-   booktitle="Video & Data Recording Conference",
-   year=1979,
-   address="Southampton",
-   month=Jul,
-   URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz"
-  }
-  @ARTICLE{MNW98,
-   author="Alistair Moffat and Radford Neal and Ian H. Witten",
-   title="Arithmetic Coding Revisited",
-   journal="{ACM} Transactions on Information Systems",
-   year=1998,
-   volume=16,
-   number=3,
-   pages="256--294",
-   month=Jul,
-   URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf"
-  }*/
-
-/*This is meant to be a large, positive constant that can still be efficiently
-   loaded as an immediate (on platforms like ARM, for example).
-  Even relatively modest values like 100 would work fine.*/
-#define OD_EC_LOTS_OF_BITS (0x4000)
-
-static void od_ec_dec_refill(od_ec_dec *dec) {
-  int s;
-  od_ec_window dif;
-  int16_t cnt;
-  const unsigned char *bptr;
-  const unsigned char *end;
-  dif = dec->dif;
-  cnt = dec->cnt;
-  bptr = dec->bptr;
-  end = dec->end;
-  s = OD_EC_WINDOW_SIZE - 9 - (cnt + 15);
-  for (; s >= 0 && bptr < end; s -= 8, bptr++) {
-    OD_ASSERT(s <= OD_EC_WINDOW_SIZE - 8);
-    dif |= (od_ec_window)bptr[0] << s;
-    cnt += 8;
-  }
-  if (bptr >= end) {
-    dec->tell_offs += OD_EC_LOTS_OF_BITS - cnt;
-    cnt = OD_EC_LOTS_OF_BITS;
-  }
-  dec->dif = dif;
-  dec->cnt = cnt;
-  dec->bptr = bptr;
-}
-
-/*Takes updated dif and range values, renormalizes them so that
-   32768 <= rng < 65536 (reading more bytes from the stream into dif if
-   necessary), and stores them back in the decoder context.
-  dif: The new value of dif.
-  rng: The new value of the range.
-  ret: The value to return.
-  Return: ret.
-          This allows the compiler to jump to this function via a tail-call.*/
-static int od_ec_dec_normalize(od_ec_dec *dec, od_ec_window dif, unsigned rng,
-                               int ret) {
-  int d;
-  OD_ASSERT(rng <= 65535U);
-  d = 16 - OD_ILOG_NZ(rng);
-  dec->cnt -= d;
-  dec->dif = dif << d;
-  dec->rng = rng << d;
-  if (dec->cnt < 0) od_ec_dec_refill(dec);
-  return ret;
-}
-
-/*Initializes the decoder.
-  buf: The input buffer to use.
-  Return: 0 on success, or a negative value on error.*/
-void od_ec_dec_init(od_ec_dec *dec, const unsigned char *buf,
-                    uint32_t storage) {
-  dec->buf = buf;
-  dec->eptr = buf + storage;
-  dec->end_window = 0;
-  dec->nend_bits = 0;
-  dec->tell_offs = 10 - (OD_EC_WINDOW_SIZE - 8);
-  dec->end = buf + storage;
-  dec->bptr = buf;
-  dec->dif = 0;
-  dec->rng = 0x8000;
-  dec->cnt = -15;
-  dec->error = 0;
-  od_ec_dec_refill(dec);
-}
-
-/*Decode a bit that has an fz/ft probability of being a zero.
-  fz: The probability that the bit is zero, scaled by _ft.
-  ft: The total probability.
-      This must be at least 16384 and no more than 32768.
-  Return: The value decoded (0 or 1).*/
-int od_ec_decode_bool(od_ec_dec *dec, unsigned fz, unsigned ft) {
-  od_ec_window dif;
-  od_ec_window vw;
-  unsigned r;
-  int s;
-  unsigned v;
-  int ret;
-  OD_ASSERT(0 < fz);
-  OD_ASSERT(fz < ft);
-  OD_ASSERT(16384 <= ft);
-  OD_ASSERT(ft <= 32768U);
-  dif = dec->dif;
-  r = dec->rng;
-  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
-  OD_ASSERT(ft <= r);
-  s = r - ft >= ft;
-  ft <<= s;
-  fz <<= s;
-  OD_ASSERT(r - ft < ft);
-#if OD_EC_REDUCED_OVERHEAD
-  {
-    unsigned d;
-    unsigned e;
-    d = r - ft;
-    e = OD_SUBSATU(2 * d, ft);
-    v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d);
-  }
-#else
-  v = fz + OD_MINI(fz, r - ft);
-#endif
-  vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
-  ret = dif >= vw;
-  if (ret) dif -= vw;
-  r = ret ? r - v : v;
-  return od_ec_dec_normalize(dec, dif, r, ret);
-}
-
-/*Decode a bit that has an fz probability of being a zero in Q15.
-  This is a simpler, lower overhead version of od_ec_decode_bool() for use when
-   ft == 32768.
-  To be decoded properly by this function, symbols cannot have been encoded by
-   od_ec_encode(), but must have been encoded with one of the equivalent _q15()
-   or _dyadic() functions instead.
-  fz: The probability that the bit is zero, scaled by 32768.
-  Return: The value decoded (0 or 1).*/
-int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned fz) {
-  od_ec_window dif;
-  od_ec_window vw;
-  unsigned r;
-  unsigned r_new;
-  unsigned v;
-  int ret;
-  OD_ASSERT(0 < fz);
-  OD_ASSERT(fz < 32768U);
-  dif = dec->dif;
-  r = dec->rng;
-  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
-  OD_ASSERT(32768U <= r);
-  v = fz * (uint32_t)r >> 15;
-  vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
-  ret = 0;
-  r_new = v;
-  if (dif >= vw) {
-    r_new = r - v;
-    dif -= vw;
-    ret = 1;
-  }
-  return od_ec_dec_normalize(dec, dif, r_new, ret);
-}
-
-/*Decodes a symbol given a cumulative distribution function (CDF) table.
-  cdf: The CDF, such that symbol s falls in the range
-        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
-       The values must be monotonically non-increasing, and cdf[nsyms - 1]
-        must be at least 16384, and no more than 32768.
-  nsyms: The number of symbols in the alphabet.
-         This should be at most 16.
-  Return: The decoded symbol s.*/
-int od_ec_decode_cdf(od_ec_dec *dec, const uint16_t *cdf, int nsyms) {
-  od_ec_window dif;
-  unsigned r;
-  unsigned c;
-  unsigned d;
-#if OD_EC_REDUCED_OVERHEAD
-  unsigned e;
-#endif
-  int s;
-  unsigned u;
-  unsigned v;
-  unsigned q;
-  unsigned fl;
-  unsigned fh;
-  unsigned ft;
-  int ret;
-  dif = dec->dif;
-  r = dec->rng;
-  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
-  OD_ASSERT(nsyms > 0);
-  ft = cdf[nsyms - 1];
-  OD_ASSERT(16384 <= ft);
-  OD_ASSERT(ft <= 32768U);
-  OD_ASSERT(ft <= r);
-  s = r - ft >= ft;
-  ft <<= s;
-  d = r - ft;
-  OD_ASSERT(d < ft);
-  c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
-  q = OD_MAXI((int)(c >> 1), (int)(c - d));
-#if OD_EC_REDUCED_OVERHEAD
-  e = OD_SUBSATU(2 * d, ft);
-  /*The correctness of this inverse partition function is not obvious, but it
-     was checked exhaustively for all possible values of r, ft, and c.
-    TODO: It should be possible to optimize this better than the compiler,
-     given that we do not care about the accuracy of negative results (as we
-     will not use them).
-    It would also be nice to get rid of the 32-bit dividend, as it requires a
-     32x32->64 bit multiply to invert.*/
-  q = OD_MAXI((int)q, (int)((2 * (int32_t)c + 1 - (int32_t)e) / 3));
-#endif
-  q >>= s;
-  OD_ASSERT(q<ft>> s);
-  fl = 0;
-  ret = 0;
-  for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh;
-  OD_ASSERT(fh <= ft >> s);
-  fl <<= s;
-  fh <<= s;
-#if OD_EC_REDUCED_OVERHEAD
-  u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
-  v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
-#else
-  u = fl + OD_MINI(fl, d);
-  v = fh + OD_MINI(fh, d);
-#endif
-  r = v - u;
-  dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
-  return od_ec_dec_normalize(dec, dif, r, ret);
-}
-
-/*Decodes a symbol given a cumulative distribution function (CDF) table.
-  cdf: The CDF, such that symbol s falls in the range
-        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
-       The values must be monotonically non-increasing, and cdf[nsyms - 1]
-       must be at least 2, and no more than 32768.
-  nsyms: The number of symbols in the alphabet.
-         This should be at most 16.
-  Return: The decoded symbol s.*/
-int od_ec_decode_cdf_unscaled(od_ec_dec *dec, const uint16_t *cdf, int nsyms) {
-  od_ec_window dif;
-  unsigned r;
-  unsigned c;
-  unsigned d;
-#if OD_EC_REDUCED_OVERHEAD
-  unsigned e;
-#endif
-  int s;
-  unsigned u;
-  unsigned v;
-  unsigned q;
-  unsigned fl;
-  unsigned fh;
-  unsigned ft;
-  int ret;
-  dif = dec->dif;
-  r = dec->rng;
-  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
-  OD_ASSERT(nsyms > 0);
-  ft = cdf[nsyms - 1];
-  OD_ASSERT(2 <= ft);
-  OD_ASSERT(ft <= 32768U);
-  s = 15 - OD_ILOG_NZ(ft - 1);
-  ft <<= s;
-  OD_ASSERT(ft <= r);
-  if (r - ft >= ft) {
-    ft <<= 1;
-    s++;
-  }
-  d = r - ft;
-  OD_ASSERT(d < ft);
-  c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
-  q = OD_MAXI((int)(c >> 1), (int)(c - d));
-#if OD_EC_REDUCED_OVERHEAD
-  e = OD_SUBSATU(2 * d, ft);
-  /*TODO: See TODO above.*/
-  q = OD_MAXI((int)q, (int)((2 * (int32_t)c + 1 - (int32_t)e) / 3));
-#endif
-  q >>= s;
-  OD_ASSERT(q<ft>> s);
-  fl = 0;
-  ret = 0;
-  for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh;
-  OD_ASSERT(fh <= ft >> s);
-  fl <<= s;
-  fh <<= s;
-#if OD_EC_REDUCED_OVERHEAD
-  u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
-  v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
-#else
-  u = fl + OD_MINI(fl, d);
-  v = fh + OD_MINI(fh, d);
-#endif
-  r = v - u;
-  dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
-  return od_ec_dec_normalize(dec, dif, r, ret);
-}
-
-/*Decodes a symbol given a cumulative distribution function (CDF) table that
-   sums to a power of two.
-  This is a simpler, lower overhead version of od_ec_decode_cdf() for use when
-   cdf[nsyms - 1] is a power of two.
-  To be decoded properly by this function, symbols cannot have been encoded by
-   od_ec_encode(), but must have been encoded with one of the equivalent _q15()
-   functions instead.
-  cdf: The CDF, such that symbol s falls in the range
-        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
-       The values must be monotonically non-increasing, and cdf[nsyms - 1]
-       must be exactly 1 << ftb.
-  nsyms: The number of symbols in the alphabet.
-         This should be at most 16.
-  ftb: The number of bits of precision in the cumulative distribution.
-       This must be no more than 15.
-  Return: The decoded symbol s.*/
-int od_ec_decode_cdf_unscaled_dyadic(od_ec_dec *dec, const uint16_t *cdf,
-                                     int nsyms, unsigned ftb) {
-  od_ec_window dif;
-  unsigned r;
-  unsigned c;
-  unsigned u;
-  unsigned v;
-  int ret;
-  (void)nsyms;
-  dif = dec->dif;
-  r = dec->rng;
-  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
-  OD_ASSERT(ftb <= 15);
-  OD_ASSERT(cdf[nsyms - 1] == 1U << ftb);
-  OD_ASSERT(32768U <= r);
-  c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
-  v = 0;
-  ret = -1;
-  do {
-    u = v;
-    v = cdf[++ret] * (uint32_t)r >> ftb;
-  } while (v <= c);
-  OD_ASSERT(v <= r);
-  r = v - u;
-  dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
-  return od_ec_dec_normalize(dec, dif, r, ret);
-}
-
-/*Decodes a symbol given a cumulative distribution function (CDF) table in Q15.
-  This is a simpler, lower overhead version of od_ec_decode_cdf() for use when
-   cdf[nsyms - 1] == 32768.
-  To be decoded properly by this function, symbols cannot have been encoded by
-   od_ec_encode(), but must have been encoded with one of the equivalent _q15()
-   or dyadic() functions instead.
-  cdf: The CDF, such that symbol s falls in the range
-        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
-       The values must be monotonically non-increasing, and cdf[nsyms - 1]
-        must be 32768.
-  nsyms: The number of symbols in the alphabet.
-         This should be at most 16.
-  Return: The decoded symbol s.*/
-int od_ec_decode_cdf_q15(od_ec_dec *dec, const uint16_t *cdf, int nsyms) {
-  return od_ec_decode_cdf_unscaled_dyadic(dec, cdf, nsyms, 15);
-}
-
-/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
-  The integer must have been encoded with od_ec_enc_uint().
-  ft: The number of integers that can be decoded (one more than the max).
-      This must be at least 2, and no more than 2**29.
-  Return: The decoded bits.*/
-uint32_t od_ec_dec_uint(od_ec_dec *dec, uint32_t ft) {
-  OD_ASSERT(ft >= 2);
-  OD_ASSERT(ft <= (uint32_t)1 << (25 + OD_EC_UINT_BITS));
-  if (ft > 1U << OD_EC_UINT_BITS) {
-    uint32_t t;
-    int ft1;
-    int ftb;
-    ft--;
-    ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS;
-    ft1 = (int)(ft >> ftb) + 1;
-    t = od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft1), ft1);
-    t = t << ftb | od_ec_dec_bits(dec, ftb, "");
-    if (t <= ft) return t;
-    dec->error = 1;
-    return ft;
-  }
-  return od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft), (int)ft);
-}
-
-/*Extracts a sequence of raw bits from the stream.
-  The bits must have been encoded with od_ec_enc_bits().
-  ftb: The number of bits to extract.
-       This must be between 0 and 25, inclusive.
-  Return: The decoded bits.*/
-uint32_t od_ec_dec_bits_(od_ec_dec *dec, unsigned ftb) {
-  od_ec_window window;
-  int available;
-  uint32_t ret;
-  OD_ASSERT(ftb <= 25);
-  window = dec->end_window;
-  available = dec->nend_bits;
-  if ((unsigned)available < ftb) {
-    const unsigned char *buf;
-    const unsigned char *eptr;
-    buf = dec->buf;
-    eptr = dec->eptr;
-    OD_ASSERT(available <= OD_EC_WINDOW_SIZE - 8);
-    do {
-      if (eptr <= buf) {
-        dec->tell_offs += OD_EC_LOTS_OF_BITS - available;
-        available = OD_EC_LOTS_OF_BITS;
-        break;
-      }
-      window |= (od_ec_window) * --eptr << available;
-      available += 8;
-    } while (available <= OD_EC_WINDOW_SIZE - 8);
-    dec->eptr = eptr;
-  }
-  ret = (uint32_t)window & (((uint32_t)1 << ftb) - 1);
-  window >>= ftb;
-  available -= ftb;
-  dec->end_window = window;
-  dec->nend_bits = available;
-  return ret;
-}
-
-/*Returns the number of bits "used" by the decoded symbols so far.
-  This same number can be computed in either the encoder or the decoder, and is
-   suitable for making coding decisions.
-  Return: The number of bits.
-          This will always be slightly larger than the exact value (e.g., all
-           rounding error is in the positive direction).*/
-int od_ec_dec_tell(const od_ec_dec *dec) {
-  return ((dec->end - dec->eptr) + (dec->bptr - dec->buf)) * 8 - dec->cnt -
-         dec->nend_bits + dec->tell_offs;
-}
-
-/*Returns the number of bits "used" by the decoded symbols so far.
-  This same number can be computed in either the encoder or the decoder, and is
-   suitable for making coding decisions.
-  Return: The number of bits scaled by 2**OD_BITRES.
-          This will always be slightly larger than the exact value (e.g., all
-           rounding error is in the positive direction).*/
-uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec) {
-  return od_ec_tell_frac(od_ec_dec_tell(dec), dec->rng);
-}
--- a/aom_dsp/entdec.h
+++ b/aom_dsp/entdec.h
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if !defined(_entdec_H)
-#define _entdec_H (1)
-#include <limits.h>
-#include "aom_dsp/entcode.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct od_ec_dec od_ec_dec;
-
-#if OD_ACCOUNTING
-#define OD_ACC_STR , char *acc_str
-#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb, str)
-#else
-#define OD_ACC_STR
-#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb)
-#endif
-
-/*The entropy decoder context.*/
-struct od_ec_dec {
-  /*The start of the current input buffer.*/
-  const unsigned char *buf;
-  /*The read pointer for the raw bits.*/
-  const unsigned char *eptr;
-  /*Bits that will be read from/written at the end.*/
-  od_ec_window end_window;
-  /*Number of valid bits in end_window.*/
-  int nend_bits;
-  /*An offset used to keep track of tell after reaching the end of the stream.
-    This is constant throughout most of the decoding process, but becomes
-     important once we hit the end of the buffer and stop incrementing pointers
-     (and instead pretend cnt/nend_bits have lots of bits).*/
-  int32_t tell_offs;
-  /*The end of the current input buffer.*/
-  const unsigned char *end;
-  /*The read pointer for the entropy-coded bits.*/
-  const unsigned char *bptr;
-  /*The difference between the coded value and the low end of the current
-     range.*/
-  od_ec_window dif;
-  /*The number of values in the current range.*/
-  uint16_t rng;
-  /*The number of bits of data in the current value.*/
-  int16_t cnt;
-  /*Nonzero if an error occurred.*/
-  int error;
-};
-
-/*See entdec.c for further documentation.*/
-
-void od_ec_dec_init(od_ec_dec *dec, const unsigned char *buf, uint32_t storage)
-    OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
-
-OD_WARN_UNUSED_RESULT int od_ec_decode_bool(od_ec_dec *dec, unsigned fz,
-                                            unsigned ft) OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned fz)
-    OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT int od_ec_decode_cdf(od_ec_dec *dec, const uint16_t *cdf,
-                                           int nsyms) OD_ARG_NONNULL(1)
-    OD_ARG_NONNULL(2);
-OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_q15(od_ec_dec *dec,
-                                               const uint16_t *cdf, int nsyms)
-    OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
-OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_unscaled(od_ec_dec *dec,
-                                                    const uint16_t *cdf,
-                                                    int nsyms) OD_ARG_NONNULL(1)
-    OD_ARG_NONNULL(2);
-OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_unscaled_dyadic(od_ec_dec *dec,
-                                                           const uint16_t *cdf,
-                                                           int nsyms,
-                                                           unsigned _ftb)
-    OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
-
-OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_uint(od_ec_dec *dec, uint32_t ft)
-    OD_ARG_NONNULL(1);
-
-OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_bits_(od_ec_dec *dec, unsigned ftb)
-    OD_ARG_NONNULL(1);
-
-OD_WARN_UNUSED_RESULT int od_ec_dec_tell(const od_ec_dec *dec)
-    OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec)
-    OD_ARG_NONNULL(1);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif
--- a/aom_dsp/entenc.c
+++ b/aom_dsp/entenc.c
@@ -1,686 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifdef HAVE_CONFIG_H
-#include "./config.h"
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-#include "aom_dsp/entenc.h"
-
-/*A range encoder.
-  See entdec.c and the references for implementation details \cite{Mar79,MNW98}.
-
-  @INPROCEEDINGS{Mar79,
-   author="Martin, G.N.N.",
-   title="Range encoding: an algorithm for removing redundancy from a digitised
-    message",
-   booktitle="Video \& Data Recording Conference",
-   year=1979,
-   address="Southampton",
-   month=Jul,
-   URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz"
-  }
-  @ARTICLE{MNW98,
-   author="Alistair Moffat and Radford Neal and Ian H. Witten",
-   title="Arithmetic Coding Revisited",
-   journal="{ACM} Transactions on Information Systems",
-   year=1998,
-   volume=16,
-   number=3,
-   pages="256--294",
-   month=Jul,
-   URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf"
-  }*/
-
-/*Takes updated low and range values, renormalizes them so that
-   32768 <= rng < 65536 (flushing bytes from low to the pre-carry buffer if
-   necessary), and stores them back in the encoder context.
-  low: The new value of low.
-  rng: The new value of the range.*/
-static void od_ec_enc_normalize(od_ec_enc *enc, od_ec_window low,
-                                unsigned rng) {
-  int d;
-  int c;
-  int s;
-  c = enc->cnt;
-  OD_ASSERT(rng <= 65535U);
-  d = 16 - OD_ILOG_NZ(rng);
-  s = c + d;
-  /*TODO: Right now we flush every time we have at least one byte available.
-    Instead we should use an od_ec_window and flush right before we're about to
-     shift bits off the end of the window.
-    For a 32-bit window this is about the same amount of work, but for a 64-bit
-     window it should be a fair win.*/
-  if (s >= 0) {
-    uint16_t *buf;
-    uint32_t storage;
-    uint32_t offs;
-    unsigned m;
-    buf = enc->precarry_buf;
-    storage = enc->precarry_storage;
-    offs = enc->offs;
-    if (offs + 2 > storage) {
-      storage = 2 * storage + 2;
-      buf = (uint16_t *)realloc(buf, sizeof(*buf) * storage);
-      if (buf == NULL) {
-        enc->error = -1;
-        enc->offs = 0;
-        return;
-      }
-      enc->precarry_buf = buf;
-      enc->precarry_storage = storage;
-    }
-    c += 16;
-    m = (1 << c) - 1;
-    if (s >= 8) {
-      OD_ASSERT(offs < storage);
-      buf[offs++] = (uint16_t)(low >> c);
-      low &= m;
-      c -= 8;
-      m >>= 8;
-    }
-    OD_ASSERT(offs < storage);
-    buf[offs++] = (uint16_t)(low >> c);
-    s = c + d - 24;
-    low &= m;
-    enc->offs = offs;
-  }
-  enc->low = low << d;
-  enc->rng = rng << d;
-  enc->cnt = s;
-}
-
-/*Initializes the encoder.
-  size: The initial size of the buffer, in bytes.*/
-void od_ec_enc_init(od_ec_enc *enc, uint32_t size) {
-  od_ec_enc_reset(enc);
-  enc->buf = (unsigned char *)malloc(sizeof(*enc->buf) * size);
-  enc->storage = size;
-  if (size > 0 && enc->buf == NULL) {
-    enc->storage = 0;
-    enc->error = -1;
-  }
-  enc->precarry_buf = (uint16_t *)malloc(sizeof(*enc->precarry_buf) * size);
-  enc->precarry_storage = size;
-  if (size > 0 && enc->precarry_buf == NULL) {
-    enc->precarry_storage = 0;
-    enc->error = -1;
-  }
-}
-
-/*Reinitializes the encoder.*/
-void od_ec_enc_reset(od_ec_enc *enc) {
-  enc->end_offs = 0;
-  enc->end_window = 0;
-  enc->nend_bits = 0;
-  enc->offs = 0;
-  enc->low = 0;
-  enc->rng = 0x8000;
-  /*This is initialized to -9 so that it crosses zero after we've accumulated
-     one byte + one carry bit.*/
-  enc->cnt = -9;
-  enc->error = 0;
-#if OD_MEASURE_EC_OVERHEAD
-  enc->entropy = 0;
-  enc->nb_symbols = 0;
-#endif
-}
-
-/*Frees the buffers used by the encoder.*/
-void od_ec_enc_clear(od_ec_enc *enc) {
-  free(enc->precarry_buf);
-  free(enc->buf);
-}
-
-/*Encodes a symbol given its scaled frequency information.
-  The frequency information must be discernable by the decoder, assuming it
-   has read only the previous symbols from the stream.
-  You can change the frequency information, or even the entire source alphabet,
-   so long as the decoder can tell from the context of the previously encoded
-   information that it is supposed to do so as well.
-  fl: The cumulative frequency of all symbols that come before the one to be
-       encoded.
-  fh: The cumulative frequency of all symbols up to and including the one to
-       be encoded.
-      Together with fl, this defines the range [fl, fh) in which the decoded
-       value will fall.
-  ft: The sum of the frequencies of all the symbols.
-      This must be at least 16384, and no more than 32768.*/
-static void od_ec_encode(od_ec_enc *enc, unsigned fl, unsigned fh,
-                         unsigned ft) {
-  od_ec_window l;
-  unsigned r;
-  int s;
-  unsigned d;
-  unsigned u;
-  unsigned v;
-  OD_ASSERT(fl < fh);
-  OD_ASSERT(fh <= ft);
-  OD_ASSERT(16384 <= ft);
-  OD_ASSERT(ft <= 32768U);
-  l = enc->low;
-  r = enc->rng;
-  OD_ASSERT(ft <= r);
-  s = r - ft >= ft;
-  ft <<= s;
-  fl <<= s;
-  fh <<= s;
-  d = r - ft;
-  OD_ASSERT(d < ft);
-#if OD_EC_REDUCED_OVERHEAD
-  {
-    unsigned e;
-    e = OD_SUBSATU(2 * d, ft);
-    u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
-    v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
-  }
-#else
-  u = fl + OD_MINI(fl, d);
-  v = fh + OD_MINI(fh, d);
-#endif
-  r = v - u;
-  l += u;
-  od_ec_enc_normalize(enc, l, r);
-#if OD_MEASURE_EC_OVERHEAD
-  enc->entropy -= OD_LOG2((double)(fh - fl) / ft);
-  enc->nb_symbols++;
-#endif
-}
-
-/*Encodes a symbol given its frequency in Q15.
-  This is like od_ec_encode() when ft == 32768, but is simpler and has lower
-   overhead.
-  Symbols encoded with this function cannot be properly decoded with
-   od_ec_decode(), and must be decoded with one of the equivalent _q15()
-   functions instead.
-  fl: The cumulative frequency of all symbols that come before the one to be
-       encoded.
-  fh: The cumulative frequency of all symbols up to and including the one to
-       be encoded.*/
-static void od_ec_encode_q15(od_ec_enc *enc, unsigned fl, unsigned fh) {
-  od_ec_window l;
-  unsigned r;
-  unsigned u;
-  unsigned v;
-  OD_ASSERT(fl < fh);
-  OD_ASSERT(fh <= 32768U);
-  l = enc->low;
-  r = enc->rng;
-  OD_ASSERT(32768U <= r);
-  u = fl * (uint32_t)r >> 15;
-  v = fh * (uint32_t)r >> 15;
-  r = v - u;
-  l += u;
-  od_ec_enc_normalize(enc, l, r);
-#if OD_MEASURE_EC_OVERHEAD
-  enc->entropy -= OD_LOG2((double)(fh - fl) / 32768.);
-  enc->nb_symbols++;
-#endif
-}
-
-/*Encodes a symbol given its frequency information with an arbitrary scale.
-  This operates just like od_ec_encode(), but does not require that ft be at
-   least 16384.
-  fl: The cumulative frequency of all symbols that come before the one to be
-       encoded.
-  fh: The cumulative frequency of all symbols up to and including the one to
-       be encoded.
-  ft: The sum of the frequencies of all the symbols.
-      This must be at least 2 and no more than 32768.*/
-static void od_ec_encode_unscaled(od_ec_enc *enc, unsigned fl, unsigned fh,
-                                  unsigned ft) {
-  int s;
-  OD_ASSERT(fl < fh);
-  OD_ASSERT(fh <= ft);
-  OD_ASSERT(2 <= ft);
-  OD_ASSERT(ft <= 32768U);
-  s = 15 - OD_ILOG_NZ(ft - 1);
-  od_ec_encode(enc, fl << s, fh << s, ft << s);
-}
-
-/*Encode a bit that has an fz/ft probability of being a zero.
-  val: The value to encode (0 or 1).
-  fz: The probability that val is zero, scaled by ft.
-  ft: The total probability.
-      This must be at least 16384 and no more than 32768.*/
-void od_ec_encode_bool(od_ec_enc *enc, int val, unsigned fz, unsigned ft) {
-  od_ec_window l;
-  unsigned r;
-  int s;
-  unsigned v;
-  OD_ASSERT(0 < fz);
-  OD_ASSERT(fz < ft);
-  OD_ASSERT(16384 <= ft);
-  OD_ASSERT(ft <= 32768U);
-  l = enc->low;
-  r = enc->rng;
-  OD_ASSERT(ft <= r);
-  s = r - ft >= ft;
-  ft <<= s;
-  fz <<= s;
-  OD_ASSERT(r - ft < ft);
-#if OD_EC_REDUCED_OVERHEAD
-  {
-    unsigned d;
-    unsigned e;
-    d = r - ft;
-    e = OD_SUBSATU(2 * d, ft);
-    v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d);
-  }
-#else
-  v = fz + OD_MINI(fz, r - ft);
-#endif
-  if (val) l += v;
-  r = val ? r - v : v;
-  od_ec_enc_normalize(enc, l, r);
-#if OD_MEASURE_EC_OVERHEAD
-  enc->entropy -= OD_LOG2((double)(val ? ft - fz : fz) / ft);
-  enc->nb_symbols++;
-#endif
-}
-
-/*Encode a bit that has an fz probability of being a zero in Q15.
-  This is a simpler, lower overhead version of od_ec_encode_bool() for use when
-   ft == 32768.
-  Symbols encoded with this function cannot be properly decoded with
-   od_ec_decode(), and must be decoded with one of the equivalent _q15()
-   functions instead.
-  val: The value to encode (0 or 1).
-  fz: The probability that val is zero, scaled by 32768.*/
-void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned fz) {
-  od_ec_window l;
-  unsigned r;
-  unsigned v;
-  OD_ASSERT(0 < fz);
-  OD_ASSERT(fz < 32768U);
-  l = enc->low;
-  r = enc->rng;
-  OD_ASSERT(32768U <= r);
-  v = fz * (uint32_t)r >> 15;
-  if (val) l += v;
-  r = val ? r - v : v;
-  od_ec_enc_normalize(enc, l, r);
-#if OD_MEASURE_EC_OVERHEAD
-  enc->entropy -= OD_LOG2((double)(val ? 32768 - fz : fz) / 32768.);
-  enc->nb_symbols++;
-#endif
-}
-
-/*Encodes a symbol given a cumulative distribution function (CDF) table.
-  s: The index of the symbol to encode.
-  cdf: The CDF, such that symbol s falls in the range
-        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
-       The values must be monotonically non-decreasing, and the last value
-        must be at least 16384, and no more than 32768.
-  nsyms: The number of symbols in the alphabet.
-         This should be at most 16.*/
-void od_ec_encode_cdf(od_ec_enc *enc, int s, const uint16_t *cdf, int nsyms) {
-  OD_ASSERT(s >= 0);
-  OD_ASSERT(s < nsyms);
-  od_ec_encode(enc, s > 0 ? cdf[s - 1] : 0, cdf[s], cdf[nsyms - 1]);
-}
-
-/*Encodes a symbol given a cumulative distribution function (CDF) table in Q15.
-  This is a simpler, lower overhead version of od_ec_encode_cdf() for use when
-   cdf[nsyms - 1] == 32768.
-  Symbols encoded with this function cannot be properly decoded with
-   od_ec_decode(), and must be decoded with one of the equivalent _q15()
-   functions instead.
-  s: The index of the symbol to encode.
-  cdf: The CDF, such that symbol s falls in the range
-        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
-       The values must be monotonically non-decreasing, and the last value
-        must be exactly 32768.
-  nsyms: The number of symbols in the alphabet.
-         This should be at most 16.*/
-void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, const uint16_t *cdf,
-                          int nsyms) {
-  (void)nsyms;
-  OD_ASSERT(s >= 0);
-  OD_ASSERT(s < nsyms);
-  OD_ASSERT(cdf[nsyms - 1] == 32768U);
-  od_ec_encode_q15(enc, s > 0 ? cdf[s - 1] : 0, cdf[s]);
-}
-
-/*Encodes a symbol given a cumulative distribution function (CDF) table.
-  s: The index of the symbol to encode.
-  cdf: The CDF, such that symbol s falls in the range
-        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
-       The values must be monotonically non-decreasing, and the last value
-        must be at least 2, and no more than 32768.
-  nsyms: The number of symbols in the alphabet.
-         This should be at most 16.*/
-void od_ec_encode_cdf_unscaled(od_ec_enc *enc, int s, const uint16_t *cdf,
-                               int nsyms) {
-  OD_ASSERT(s >= 0);
-  OD_ASSERT(s < nsyms);
-  od_ec_encode_unscaled(enc, s > 0 ? cdf[s - 1] : 0, cdf[s], cdf[nsyms - 1]);
-}
-
-/*Equivalent to od_ec_encode_cdf_q15() with the cdf scaled by
-   (1 << (15 - ftb)).
-  s: The index of the symbol to encode.
-  cdf: The CDF, such that symbol s falls in the range
-        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
-       The values must be monotonically non-decreasing, and the last value
-        must be exactly 1 << ftb.
-  nsyms: The number of symbols in the alphabet.
-         This should be at most 16.
-  ftb: The number of bits of precision in the cumulative distribution.
-       This must be no more than 15.*/
-void od_ec_encode_cdf_unscaled_dyadic(od_ec_enc *enc, int s,
-                                      const uint16_t *cdf, int nsyms,
-                                      unsigned ftb) {
-  (void)nsyms;
-  OD_ASSERT(s >= 0);
-  OD_ASSERT(s < nsyms);
-  OD_ASSERT(ftb <= 15);
-  OD_ASSERT(cdf[nsyms - 1] == 1U << ftb);
-  od_ec_encode_q15(enc, s > 0 ? cdf[s - 1] << (15 - ftb) : 0,
-                   cdf[s] << (15 - ftb));
-}
-
-/*Encodes a raw unsigned integer in the stream.
-  fl: The integer to encode.
-  ft: The number of integers that can be encoded (one more than the max).
-      This must be at least 2, and no more than 2**29.*/
-void od_ec_enc_uint(od_ec_enc *enc, uint32_t fl, uint32_t ft) {
-  OD_ASSERT(ft >= 2);
-  OD_ASSERT(fl < ft);
-  OD_ASSERT(ft <= (uint32_t)1 << (25 + OD_EC_UINT_BITS));
-  if (ft > 1U << OD_EC_UINT_BITS) {
-    int ft1;
-    int ftb;
-    ft--;
-    ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS;
-    ft1 = (int)(ft >> ftb) + 1;
-    od_ec_encode_cdf_q15(enc, (int)(fl >> ftb), OD_UNIFORM_CDF_Q15(ft1), ft1);
-    od_ec_enc_bits(enc, fl & (((uint32_t)1 << ftb) - 1), ftb);
-  } else {
-    od_ec_encode_cdf_q15(enc, (int)fl, OD_UNIFORM_CDF_Q15(ft), (int)ft);
-  }
-}
-
-/*Encodes a sequence of raw bits in the stream.
-  fl: The bits to encode.
-  ftb: The number of bits to encode.
-       This must be between 0 and 25, inclusive.*/
-void od_ec_enc_bits(od_ec_enc *enc, uint32_t fl, unsigned ftb) {
-  od_ec_window end_window;
-  int nend_bits;
-  OD_ASSERT(ftb <= 25);
-  OD_ASSERT(fl < (uint32_t)1 << ftb);
-#if OD_MEASURE_EC_OVERHEAD
-  enc->entropy += ftb;
-#endif
-  end_window = enc->end_window;
-  nend_bits = enc->nend_bits;
-  if (nend_bits + ftb > OD_EC_WINDOW_SIZE) {
-    unsigned char *buf;
-    uint32_t storage;
-    uint32_t end_offs;
-    buf = enc->buf;
-    storage = enc->storage;
-    end_offs = enc->end_offs;
-    if (end_offs + (OD_EC_WINDOW_SIZE >> 3) >= storage) {
-      unsigned char *new_buf;
-      uint32_t new_storage;
-      new_storage = 2 * storage + (OD_EC_WINDOW_SIZE >> 3);
-      new_buf = (unsigned char *)malloc(sizeof(*new_buf) * new_storage);
-      if (new_buf == NULL) {
-        enc->error = -1;
-        enc->end_offs = 0;
-        return;
-      }
-      OD_COPY(new_buf + new_storage - end_offs, buf + storage - end_offs,
-              end_offs);
-      storage = new_storage;
-      free(buf);
-      enc->buf = buf = new_buf;
-      enc->storage = storage;
-    }
-    do {
-      OD_ASSERT(end_offs < storage);
-      buf[storage - ++end_offs] = (unsigned char)end_window;
-      end_window >>= 8;
-      nend_bits -= 8;
-    } while (nend_bits >= 8);
-    enc->end_offs = end_offs;
-  }
-  OD_ASSERT(nend_bits + ftb <= OD_EC_WINDOW_SIZE);
-  end_window |= (od_ec_window)fl << nend_bits;
-  nend_bits += ftb;
-  enc->end_window = end_window;
-  enc->nend_bits = nend_bits;
-}
-
-/*Overwrites a few bits at the very start of an existing stream, after they
-   have already been encoded.
-  This makes it possible to have a few flags up front, where it is easy for
-   decoders to access them without parsing the whole stream, even if their
-   values are not determined until late in the encoding process, without having
-   to buffer all the intermediate symbols in the encoder.
-  In order for this to work, at least nbits bits must have already been encoded
-   using probabilities that are an exact power of two.
-  The encoder can verify the number of encoded bits is sufficient, but cannot
-   check this latter condition.
-  val: The bits to encode (in the least nbits significant bits).
-       They will be decoded in order from most-significant to least.
-  nbits: The number of bits to overwrite.
-         This must be no more than 8.*/
-void od_ec_enc_patch_initial_bits(od_ec_enc *enc, unsigned val, int nbits) {
-  int shift;
-  unsigned mask;
-  OD_ASSERT(nbits >= 0);
-  OD_ASSERT(nbits <= 8);
-  OD_ASSERT(val < 1U << nbits);
-  shift = 8 - nbits;
-  mask = ((1U << nbits) - 1) << shift;
-  if (enc->offs > 0) {
-    /*The first byte has been finalized.*/
-    enc->precarry_buf[0] =
-        (uint16_t)((enc->precarry_buf[0] & ~mask) | val << shift);
-  } else if (9 + enc->cnt + (enc->rng == 0x8000) > nbits) {
-    /*The first byte has yet to be output.*/
-    enc->low = (enc->low & ~((od_ec_window)mask << (16 + enc->cnt))) |
-               (od_ec_window)val << (16 + enc->cnt + shift);
-  } else {
-    /*The encoder hasn't even encoded _nbits of data yet.*/
-    enc->error = -1;
-  }
-}
-
-#if OD_MEASURE_EC_OVERHEAD
-#include <stdio.h>
-#endif
-
-/*Indicates that there are no more symbols to encode.
-  All remaining output bytes are flushed to the output buffer.
-  od_ec_enc_reset() should be called before using the encoder again.
-  bytes: Returns the size of the encoded data in the returned buffer.
-  Return: A pointer to the start of the final buffer, or NULL if there was an
-           encoding error.*/
-unsigned char *od_ec_enc_done(od_ec_enc *enc, uint32_t *nbytes) {
-  unsigned char *out;
-  uint32_t storage;
-  uint16_t *buf;
-  uint32_t offs;
-  uint32_t end_offs;
-  int nend_bits;
-  od_ec_window m;
-  od_ec_window e;
-  od_ec_window l;
-  unsigned r;
-  int c;
-  int s;
-  if (enc->error) return NULL;
-#if OD_MEASURE_EC_OVERHEAD
-  {
-    uint32_t tell;
-    /* Don't count the 1 bit we lose to raw bits as overhead. */
-    tell = od_ec_enc_tell(enc) - 1;
-    fprintf(stderr, "overhead: %f%%\n",
-            100 * (tell - enc->entropy) / enc->entropy);
-    fprintf(stderr, "efficiency: %f bits/symbol\n",
-            (double)tell / enc->nb_symbols);
-  }
-#endif
-  /*We output the minimum number of bits that ensures that the symbols encoded
-     thus far will be decoded correctly regardless of the bits that follow.*/
-  l = enc->low;
-  r = enc->rng;
-  c = enc->cnt;
-  s = 9;
-  m = 0x7FFF;
-  e = (l + m) & ~m;
-  while ((e | m) >= l + r) {
-    s++;
-    m >>= 1;
-    e = (l + m) & ~m;
-  }
-  s += c;
-  offs = enc->offs;
-  buf = enc->precarry_buf;
-  if (s > 0) {
-    unsigned n;
-    storage = enc->precarry_storage;
-    if (offs + ((s + 7) >> 3) > storage) {
-      storage = storage * 2 + ((s + 7) >> 3);
-      buf = (uint16_t *)realloc(buf, sizeof(*buf) * storage);
-      if (buf == NULL) {
-        enc->error = -1;
-        return NULL;
-      }
-      enc->precarry_buf = buf;
-      enc->precarry_storage = storage;
-    }
-    n = (1 << (c + 16)) - 1;
-    do {
-      OD_ASSERT(offs < storage);
-      buf[offs++] = (uint16_t)(e >> (c + 16));
-      e &= n;
-      s -= 8;
-      c -= 8;
-      n >>= 8;
-    } while (s > 0);
-  }
-  /*Make sure there's enough room for the entropy-coded bits and the raw
-     bits.*/
-  out = enc->buf;
-  storage = enc->storage;
-  end_offs = enc->end_offs;
-  e = enc->end_window;
-  nend_bits = enc->nend_bits;
-  s = -s;
-  c = OD_MAXI((nend_bits - s + 7) >> 3, 0);
-  if (offs + end_offs + c > storage) {
-    storage = offs + end_offs + c;
-    out = (unsigned char *)realloc(out, sizeof(*out) * storage);
-    if (out == NULL) {
-      enc->error = -1;
-      return NULL;
-    }
-    OD_MOVE(out + storage - end_offs, out + enc->storage - end_offs, end_offs);
-    enc->buf = out;
-    enc->storage = storage;
-  }
-  /*If we have buffered raw bits, flush them as well.*/
-  while (nend_bits > s) {
-    OD_ASSERT(end_offs < storage);
-    out[storage - ++end_offs] = (unsigned char)e;
-    e >>= 8;
-    nend_bits -= 8;
-  }
-  *nbytes = offs + end_offs;
-  /*Perform carry propagation.*/
-  OD_ASSERT(offs + end_offs <= storage);
-  out = out + storage - (offs + end_offs);
-  c = 0;
-  end_offs = offs;
-  while (offs-- > 0) {
-    c = buf[offs] + c;
-    out[offs] = (unsigned char)c;
-    c >>= 8;
-  }
-  /*Add any remaining raw bits to the last byte.
-    There is guaranteed to be enough room, because nend_bits <= s.*/
-  OD_ASSERT(nend_bits <= 0 || end_offs > 0);
-  if (nend_bits > 0) out[end_offs - 1] |= (unsigned char)e;
-  /*Note: Unless there's an allocation error, if you keep encoding into the
-     current buffer and call this function again later, everything will work
-     just fine (you won't get a new packet out, but you will get a single
-     buffer with the new data appended to the old).
-    However, this function is O(N) where N is the amount of data coded so far,
-     so calling it more than once for a given packet is a bad idea.*/
-  return out;
-}
-
-/*Returns the number of bits "used" by the encoded symbols so far.
-  This same number can be computed in either the encoder or the decoder, and is
-   suitable for making coding decisions.
-  Warning: The value returned by this function can decrease compared to an
-   earlier call, even after encoding more data, if there is an encoding error
-   (i.e., a failure to allocate enough space for the output buffer).
-  Return: The number of bits.
-          This will always be slightly larger than the exact value (e.g., all
-           rounding error is in the positive direction).*/
-int od_ec_enc_tell(const od_ec_enc *enc) {
-  /*The 10 here counteracts the offset of -9 baked into cnt, and adds 1 extra
-     bit, which we reserve for terminating the stream.*/
-  return (enc->offs + enc->end_offs) * 8 + enc->cnt + enc->nend_bits + 10;
-}
-
-/*Returns the number of bits "used" by the encoded symbols so far.
-  This same number can be computed in either the encoder or the decoder, and is
-   suitable for making coding decisions.
-  Warning: The value returned by this function can decrease compared to an
-   earlier call, even after encoding more data, if there is an encoding error
-   (i.e., a failure to allocate enough space for the output buffer).
-  Return: The number of bits scaled by 2**OD_BITRES.
-          This will always be slightly larger than the exact value (e.g., all
-           rounding error is in the positive direction).*/
-uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc) {
-  return od_ec_tell_frac(od_ec_enc_tell(enc), enc->rng);
-}
-
-/*Saves a entropy coder checkpoint to dst.
-  This allows an encoder to reverse a series of entropy coder
-   decisions if it decides that the information would have been
-   better coded some other way.*/
-void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src) {
-  OD_COPY(dst, src, 1);
-}
-
-/*Restores an entropy coder checkpoint saved by od_ec_enc_checkpoint.
-  This can only be used to restore from checkpoints earlier in the target
-   state's history: you can not switch backwards and forwards or otherwise
-   switch to a state which isn't a casual ancestor of the current state.
-  Restore is also incompatible with patching the initial bits, as the
-   changes will remain in the restored version.*/
-void od_ec_enc_rollback(od_ec_enc *dst, const od_ec_enc *src) {
-  unsigned char *buf;
-  uint32_t storage;
-  uint16_t *precarry_buf;
-  uint32_t precarry_storage;
-  OD_ASSERT(dst->storage >= src->storage);
-  OD_ASSERT(dst->precarry_storage >= src->precarry_storage);
-  buf = dst->buf;
-  storage = dst->storage;
-  precarry_buf = dst->precarry_buf;
-  precarry_storage = dst->precarry_storage;
-  OD_COPY(dst, src, 1);
-  dst->buf = buf;
-  dst->storage = storage;
-  dst->precarry_buf = precarry_buf;
-  dst->precarry_storage = precarry_storage;
-}
--- a/aom_dsp/entenc.h
+++ b/aom_dsp/entenc.h
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if !defined(_entenc_H)
-#define _entenc_H (1)
-#include <stddef.h>
-#include "aom_dsp/entcode.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct od_ec_enc od_ec_enc;
-
-#define OD_MEASURE_EC_OVERHEAD (0)
-
-/*The entropy encoder context.*/
-struct od_ec_enc {
-  /*Buffered output.
-    This contains only the raw bits until the final call to od_ec_enc_done(),
-     where all the arithmetic-coded data gets prepended to it.*/
-  unsigned char *buf;
-  /*The size of the buffer.*/
-  uint32_t storage;
-  /*The offset at which the last byte containing raw bits was written.*/
-  uint32_t end_offs;
-  /*Bits that will be read from/written at the end.*/
-  od_ec_window end_window;
-  /*Number of valid bits in end_window.*/
-  int nend_bits;
-  /*A buffer for output bytes with their associated carry flags.*/
-  uint16_t *precarry_buf;
-  /*The size of the pre-carry buffer.*/
-  uint32_t precarry_storage;
-  /*The offset at which the next entropy-coded byte will be written.*/
-  uint32_t offs;
-  /*The low end of the current range.*/
-  od_ec_window low;
-  /*The number of values in the current range.*/
-  uint16_t rng;
-  /*The number of bits of data in the current value.*/
-  int16_t cnt;
-  /*Nonzero if an error occurred.*/
-  int error;
-#if OD_MEASURE_EC_OVERHEAD
-  double entropy;
-  int nb_symbols;
-#endif
-};
-
-/*See entenc.c for further documentation.*/
-
-void od_ec_enc_init(od_ec_enc *enc, uint32_t size) OD_ARG_NONNULL(1);
-void od_ec_enc_reset(od_ec_enc *enc) OD_ARG_NONNULL(1);
-void od_ec_enc_clear(od_ec_enc *enc) OD_ARG_NONNULL(1);
-
-void od_ec_encode_bool(od_ec_enc *enc, int val, unsigned fz, unsigned _ft)
-    OD_ARG_NONNULL(1);
-void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned fz_q15)
-    OD_ARG_NONNULL(1);
-void od_ec_encode_cdf(od_ec_enc *enc, int s, const uint16_t *cdf, int nsyms)
-    OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
-void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, const uint16_t *cdf, int nsyms)
-    OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
-void od_ec_encode_cdf_unscaled(od_ec_enc *enc, int s, const uint16_t *cdf,
-                               int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
-void od_ec_encode_cdf_unscaled_dyadic(od_ec_enc *enc, int s,
-                                      const uint16_t *cdf, int nsyms,
-                                      unsigned ftb) OD_ARG_NONNULL(1)
-    OD_ARG_NONNULL(3);
-
-void od_ec_enc_uint(od_ec_enc *enc, uint32_t fl, uint32_t ft) OD_ARG_NONNULL(1);
-
-void od_ec_enc_bits(od_ec_enc *enc, uint32_t fl, unsigned ftb)
-    OD_ARG_NONNULL(1);
-
-void od_ec_enc_patch_initial_bits(od_ec_enc *enc, unsigned val, int nbits)
-    OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT unsigned char *od_ec_enc_done(od_ec_enc *enc,
-                                                    uint32_t *nbytes)
-    OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
-
-OD_WARN_UNUSED_RESULT int od_ec_enc_tell(const od_ec_enc *enc)
-    OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc)
-    OD_ARG_NONNULL(1);
-
-void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src);
-void od_ec_enc_rollback(od_ec_enc *dst, const od_ec_enc *src);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif
--- a/aom_dsp/fwd_txfm.h
+++ b/aom_dsp/fwd_txfm.h
@@ -1,26 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_FWD_TXFM_H_
-#define AOM_DSP_FWD_TXFM_H_
-
-#include "aom_dsp/txfm_common.h"
-
-static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
-  tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
-  // TODO(debargha, peter.derivaz): Find new bounds for this assert
-  // and make the bounds consts.
-  // assert(INT16_MIN <= rv && rv <= INT16_MAX);
-  return rv;
-}
-
-void aom_fdct32(const tran_high_t *input, tran_high_t *output, int round);
-#endif  // AOM_DSP_FWD_TXFM_H_
--- a/aom_dsp/mips/aom_convolve_msa.h
+++ b/aom_dsp/mips/aom_convolve_msa.h
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_
-#define AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_
-
-#include "aom_dsp/mips/macros_msa.h"
-#include "aom_dsp/aom_filter.h"
-
-extern const uint8_t mc_filt_mask_arr[16 * 3];
-
-#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt0, filt1, filt2,   \
-                            filt3)                                         \
-  ({                                                                       \
-    v8i16 tmp_dpadd_0, tmp_dpadd_1;                                        \
-                                                                           \
-    tmp_dpadd_0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0);               \
-    tmp_dpadd_0 = __msa_dpadd_s_h(tmp_dpadd_0, (v16i8)vec1, (v16i8)filt1); \
-    tmp_dpadd_1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2);               \
-    tmp_dpadd_1 = __msa_dpadd_s_h(tmp_dpadd_1, (v16i8)vec3, (v16i8)filt3); \
-    tmp_dpadd_0 = __msa_adds_s_h(tmp_dpadd_0, tmp_dpadd_1);                \
-                                                                           \
-    tmp_dpadd_0;                                                           \
-  })
-
-#define HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_h0,       \
-                        filt_h1, filt_h2, filt_h3)                             \
-  ({                                                                           \
-    v16i8 vec0_m, vec1_m, vec2_m, vec3_m;                                      \
-    v8i16 hz_out_m;                                                            \
-                                                                               \
-    VSHF_B4_SB(src0, src1, mask0, mask1, mask2, mask3, vec0_m, vec1_m, vec2_m, \
-               vec3_m);                                                        \
-    hz_out_m = FILT_8TAP_DPADD_S_H(vec0_m, vec1_m, vec2_m, vec3_m, filt_h0,    \
-                                   filt_h1, filt_h2, filt_h3);                 \
-                                                                               \
-    hz_out_m = __msa_srari_h(hz_out_m, FILTER_BITS);                           \
-    hz_out_m = __msa_sat_s_h(hz_out_m, 7);                                     \
-                                                                               \
-    hz_out_m;                                                                  \
-  })
-
-#define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,     \
-                                   mask2, mask3, filt0, filt1, filt2, filt3, \
-                                   out0, out1)                               \
-  {                                                                          \
-    v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;    \
-    v8i16 res0_m, res1_m, res2_m, res3_m;                                    \
-                                                                             \
-    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m);        \
-    DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, res0_m, res1_m);               \
-    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m);        \
-    DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, res0_m, res1_m);              \
-    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m);        \
-    DOTP_SB2_SH(vec4_m, vec5_m, filt2, filt2, res2_m, res3_m);               \
-    VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec6_m, vec7_m);        \
-    DPADD_SB2_SH(vec6_m, vec7_m, filt3, filt3, res2_m, res3_m);              \
-    ADDS_SH2_SH(res0_m, res2_m, res1_m, res3_m, out0, out1);                 \
-  }
-
-#define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,     \
-                                   mask2, mask3, filt0, filt1, filt2, filt3, \
-                                   out0, out1, out2, out3)                   \
-  {                                                                          \
-    v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;    \
-    v8i16 res0_m, res1_m, res2_m, res3_m, res4_m, res5_m, res6_m, res7_m;    \
-                                                                             \
-    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);        \
-    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);        \
-    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,  \
-                res0_m, res1_m, res2_m, res3_m);                             \
-    VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0_m, vec1_m);        \
-    VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2_m, vec3_m);        \
-    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt2, filt2, filt2, filt2,  \
-                res4_m, res5_m, res6_m, res7_m);                             \
-    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4_m, vec5_m);        \
-    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6_m, vec7_m);        \
-    DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt1, filt1, filt1, filt1, \
-                 res0_m, res1_m, res2_m, res3_m);                            \
-    VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4_m, vec5_m);        \
-    VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6_m, vec7_m);        \
-    DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt3, filt3, filt3, filt3, \
-                 res4_m, res5_m, res6_m, res7_m);                            \
-    ADDS_SH4_SH(res0_m, res4_m, res1_m, res5_m, res2_m, res6_m, res3_m,      \
-                res7_m, out0, out1, out2, out3);                             \
-  }
-
-#define PCKEV_XORI128_AVG_ST_UB(in0, in1, dst, pdst) \
-  {                                                  \
-    v16u8 tmp_m;                                     \
-                                                     \
-    tmp_m = PCKEV_XORI128_UB(in1, in0);              \
-    tmp_m = __msa_aver_u_b(tmp_m, (v16u8)dst);       \
-    ST_UB(tmp_m, (pdst));                            \
-  }
-
-#define PCKEV_AVG_ST_UB(in0, in1, dst, pdst)              \
-  {                                                       \
-    v16u8 tmp_m;                                          \
-                                                          \
-    tmp_m = (v16u8)__msa_pckev_b((v16i8)in0, (v16i8)in1); \
-    tmp_m = __msa_aver_u_b(tmp_m, (v16u8)dst);            \
-    ST_UB(tmp_m, (pdst));                                 \
-  }
-
-#define PCKEV_AVG_ST8x4_UB(in1, dst0, in2, dst1, in3, dst2, in4, dst3, pdst, \
-                           stride)                                           \
-  {                                                                          \
-    v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                    \
-                                                                             \
-    PCKEV_B2_UB(in2, in1, in4, in3, tmp0_m, tmp1_m);                         \
-    PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m);                     \
-    AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m);             \
-    ST8x4_UB(tmp0_m, tmp1_m, pdst, stride);                                  \
-  }
-#endif /* AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_ */
--- a/aom_dsp/mips/common_dspr2.c
+++ b/aom_dsp/mips/common_dspr2.c
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/common_dspr2.h"
-
-#if HAVE_DSPR2
-uint8_t aom_ff_cropTbl_a[256 + 2 * CROP_WIDTH];
-uint8_t *aom_ff_cropTbl;
-
-void aom_dsputil_static_init(void) {
-  int i;
-
-  for (i = 0; i < 256; i++) aom_ff_cropTbl_a[i + CROP_WIDTH] = i;
-
-  for (i = 0; i < CROP_WIDTH; i++) {
-    aom_ff_cropTbl_a[i] = 0;
-    aom_ff_cropTbl_a[i + CROP_WIDTH + 256] = 255;
-  }
-
-  aom_ff_cropTbl = &aom_ff_cropTbl_a[CROP_WIDTH];
-}
-
-#endif
--- a/aom_dsp/mips/common_dspr2.h
+++ b/aom_dsp/mips/common_dspr2.h
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_COMMON_MIPS_DSPR2_H_
-#define AOM_COMMON_MIPS_DSPR2_H_
-
-#include <assert.h>
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if HAVE_DSPR2
-#define CROP_WIDTH 512
-
-extern uint8_t *aom_ff_cropTbl;  // From "aom_dsp/mips/intrapred4_dspr2.c"
-
-static INLINE void prefetch_load(const unsigned char *src) {
-  __asm__ __volatile__("pref   0,  0(%[src])   \n\t" : : [src] "r"(src));
-}
-
-/* prefetch data for store */
-static INLINE void prefetch_store(unsigned char *dst) {
-  __asm__ __volatile__("pref   1,  0(%[dst])   \n\t" : : [dst] "r"(dst));
-}
-
-static INLINE void prefetch_load_streamed(const unsigned char *src) {
-  __asm__ __volatile__("pref   4,  0(%[src])   \n\t" : : [src] "r"(src));
-}
-
-/* prefetch data for store */
-static INLINE void prefetch_store_streamed(unsigned char *dst) {
-  __asm__ __volatile__("pref   5,  0(%[dst])   \n\t" : : [dst] "r"(dst));
-}
-#endif  // #if HAVE_DSPR2
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_COMMON_MIPS_DSPR2_H_
--- a/aom_dsp/mips/convolve_common_dspr2.h
+++ b/aom_dsp/mips/convolve_common_dspr2.h
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_MIPS_AOM_COMMON_DSPR2_H_
-#define AOM_DSP_MIPS_AOM_COMMON_DSPR2_H_
-
-#include <assert.h>
-
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/mips/common_dspr2.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_DSPR2
-void aom_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4, int w,
-                               int h);
-
-void aom_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
-                                   uint8_t *dst, ptrdiff_t dst_stride,
-                                   const int16_t *filter_x, int x_step_q4,
-                                   const int16_t *filter_y, int y_step_q4,
-                                   int w, int h);
-
-void aom_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
-                                  uint8_t *dst, ptrdiff_t dst_stride,
-                                  const int16_t *filter_x, int x_step_q4,
-                                  const int16_t *filter_y, int y_step_q4, int w,
-                                  int h);
-
-void aom_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
-                         ptrdiff_t dst_stride, const int16_t *filter, int w,
-                         int h);
-
-void aom_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
-                              uint8_t *dst, ptrdiff_t dst_stride,
-                              const int16_t *filter_x, int x_step_q4,
-                              const int16_t *filter_y, int y_step_q4, int w,
-                              int h);
-
-#endif  // #if HAVE_DSPR2
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_MIPS_AOM_COMMON_DSPR2_H_
--- a/aom_dsp/mips/deblock_msa.c
+++ b/aom_dsp/mips/deblock_msa.c
@@ -1,681 +0,0 @@
-/*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stdlib.h>
-#include "./macros_msa.h"
-
-extern const int16_t aom_rv[];
-
-#define AOM_TRANSPOSE8x16_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, out0,  \
-                                out1, out2, out3, out4, out5, out6, out7,      \
-                                out8, out9, out10, out11, out12, out13, out14, \
-                                out15)                                         \
-  {                                                                            \
-    v8i16 temp0, temp1, temp2, temp3, temp4;                                   \
-    v8i16 temp5, temp6, temp7, temp8, temp9;                                   \
-                                                                               \
-    ILVR_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2,    \
-               temp3);                                                         \
-    ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5);                      \
-    ILVRL_W2_SH(temp5, temp4, temp6, temp7);                                   \
-    ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5);                      \
-    ILVRL_W2_SH(temp5, temp4, temp8, temp9);                                   \
-    ILVL_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2,    \
-               temp3);                                                         \
-    ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5);                      \
-    ILVRL_W2_UB(temp5, temp4, out8, out10);                                    \
-    ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5);                      \
-    ILVRL_W2_UB(temp5, temp4, out12, out14);                                   \
-    out0 = (v16u8)temp6;                                                       \
-    out2 = (v16u8)temp7;                                                       \
-    out4 = (v16u8)temp8;                                                       \
-    out6 = (v16u8)temp9;                                                       \
-    out9 = (v16u8)__msa_ilvl_d((v2i64)out8, (v2i64)out8);                      \
-    out11 = (v16u8)__msa_ilvl_d((v2i64)out10, (v2i64)out10);                   \
-    out13 = (v16u8)__msa_ilvl_d((v2i64)out12, (v2i64)out12);                   \
-    out15 = (v16u8)__msa_ilvl_d((v2i64)out14, (v2i64)out14);                   \
-    out1 = (v16u8)__msa_ilvl_d((v2i64)out0, (v2i64)out0);                      \
-    out3 = (v16u8)__msa_ilvl_d((v2i64)out2, (v2i64)out2);                      \
-    out5 = (v16u8)__msa_ilvl_d((v2i64)out4, (v2i64)out4);                      \
-    out7 = (v16u8)__msa_ilvl_d((v2i64)out6, (v2i64)out6);                      \
-  }
-
-#define AOM_AVER_IF_RETAIN(above2_in, above1_in, src_in, below1_in, below2_in, \
-                           ref, out)                                           \
-  {                                                                            \
-    v16u8 temp0, temp1;                                                        \
-                                                                               \
-    temp1 = __msa_aver_u_b(above2_in, above1_in);                              \
-    temp0 = __msa_aver_u_b(below2_in, below1_in);                              \
-    temp1 = __msa_aver_u_b(temp1, temp0);                                      \
-    out = __msa_aver_u_b(src_in, temp1);                                       \
-    temp0 = __msa_asub_u_b(src_in, above2_in);                                 \
-    temp1 = __msa_asub_u_b(src_in, above1_in);                                 \
-    temp0 = (temp0 < ref);                                                     \
-    temp1 = (temp1 < ref);                                                     \
-    temp0 = temp0 & temp1;                                                     \
-    temp1 = __msa_asub_u_b(src_in, below1_in);                                 \
-    temp1 = (temp1 < ref);                                                     \
-    temp0 = temp0 & temp1;                                                     \
-    temp1 = __msa_asub_u_b(src_in, below2_in);                                 \
-    temp1 = (temp1 < ref);                                                     \
-    temp0 = temp0 & temp1;                                                     \
-    out = __msa_bmz_v(out, src_in, temp0);                                     \
-  }
-
-#define TRANSPOSE12x16_B(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9,    \
-                         in10, in11, in12, in13, in14, in15)                  \
-  {                                                                           \
-    v8i16 temp0, temp1, temp2, temp3, temp4;                                  \
-    v8i16 temp5, temp6, temp7, temp8, temp9;                                  \
-                                                                              \
-    ILVR_B2_SH(in1, in0, in3, in2, temp0, temp1);                             \
-    ILVRL_H2_SH(temp1, temp0, temp2, temp3);                                  \
-    ILVR_B2_SH(in5, in4, in7, in6, temp0, temp1);                             \
-    ILVRL_H2_SH(temp1, temp0, temp4, temp5);                                  \
-    ILVRL_W2_SH(temp4, temp2, temp0, temp1);                                  \
-    ILVRL_W2_SH(temp5, temp3, temp2, temp3);                                  \
-    ILVR_B2_SH(in9, in8, in11, in10, temp4, temp5);                           \
-    ILVR_B2_SH(in9, in8, in11, in10, temp4, temp5);                           \
-    ILVRL_H2_SH(temp5, temp4, temp6, temp7);                                  \
-    ILVR_B2_SH(in13, in12, in15, in14, temp4, temp5);                         \
-    ILVRL_H2_SH(temp5, temp4, temp8, temp9);                                  \
-    ILVRL_W2_SH(temp8, temp6, temp4, temp5);                                  \
-    ILVRL_W2_SH(temp9, temp7, temp6, temp7);                                  \
-    ILVL_B2_SH(in1, in0, in3, in2, temp8, temp9);                             \
-    ILVR_D2_UB(temp4, temp0, temp5, temp1, in0, in2);                         \
-    in1 = (v16u8)__msa_ilvl_d((v2i64)temp4, (v2i64)temp0);                    \
-    in3 = (v16u8)__msa_ilvl_d((v2i64)temp5, (v2i64)temp1);                    \
-    ILVL_B2_SH(in5, in4, in7, in6, temp0, temp1);                             \
-    ILVR_D2_UB(temp6, temp2, temp7, temp3, in4, in6);                         \
-    in5 = (v16u8)__msa_ilvl_d((v2i64)temp6, (v2i64)temp2);                    \
-    in7 = (v16u8)__msa_ilvl_d((v2i64)temp7, (v2i64)temp3);                    \
-    ILVL_B4_SH(in9, in8, in11, in10, in13, in12, in15, in14, temp2, temp3,    \
-               temp4, temp5);                                                 \
-    ILVR_H4_SH(temp9, temp8, temp1, temp0, temp3, temp2, temp5, temp4, temp6, \
-               temp7, temp8, temp9);                                          \
-    ILVR_W2_SH(temp7, temp6, temp9, temp8, temp0, temp1);                     \
-    in8 = (v16u8)__msa_ilvr_d((v2i64)temp1, (v2i64)temp0);                    \
-    in9 = (v16u8)__msa_ilvl_d((v2i64)temp1, (v2i64)temp0);                    \
-    ILVL_W2_SH(temp7, temp6, temp9, temp8, temp2, temp3);                     \
-    in10 = (v16u8)__msa_ilvr_d((v2i64)temp3, (v2i64)temp2);                   \
-    in11 = (v16u8)__msa_ilvl_d((v2i64)temp3, (v2i64)temp2);                   \
-  }
-
-#define AOM_TRANSPOSE12x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, in8, \
-                                in9, in10, in11)                             \
-  {                                                                          \
-    v8i16 temp0, temp1, temp2, temp3;                                        \
-    v8i16 temp4, temp5, temp6, temp7;                                        \
-                                                                             \
-    ILVR_B2_SH(in1, in0, in3, in2, temp0, temp1);                            \
-    ILVRL_H2_SH(temp1, temp0, temp2, temp3);                                 \
-    ILVR_B2_SH(in5, in4, in7, in6, temp0, temp1);                            \
-    ILVRL_H2_SH(temp1, temp0, temp4, temp5);                                 \
-    ILVRL_W2_SH(temp4, temp2, temp0, temp1);                                 \
-    ILVRL_W2_SH(temp5, temp3, temp2, temp3);                                 \
-    ILVL_B2_SH(in1, in0, in3, in2, temp4, temp5);                            \
-    temp4 = __msa_ilvr_h(temp5, temp4);                                      \
-    ILVL_B2_SH(in5, in4, in7, in6, temp6, temp7);                            \
-    temp5 = __msa_ilvr_h(temp7, temp6);                                      \
-    ILVRL_W2_SH(temp5, temp4, temp6, temp7);                                 \
-    in0 = (v16u8)temp0;                                                      \
-    in2 = (v16u8)temp1;                                                      \
-    in4 = (v16u8)temp2;                                                      \
-    in6 = (v16u8)temp3;                                                      \
-    in8 = (v16u8)temp6;                                                      \
-    in10 = (v16u8)temp7;                                                     \
-    in1 = (v16u8)__msa_ilvl_d((v2i64)temp0, (v2i64)temp0);                   \
-    in3 = (v16u8)__msa_ilvl_d((v2i64)temp1, (v2i64)temp1);                   \
-    in5 = (v16u8)__msa_ilvl_d((v2i64)temp2, (v2i64)temp2);                   \
-    in7 = (v16u8)__msa_ilvl_d((v2i64)temp3, (v2i64)temp3);                   \
-    in9 = (v16u8)__msa_ilvl_d((v2i64)temp6, (v2i64)temp6);                   \
-    in11 = (v16u8)__msa_ilvl_d((v2i64)temp7, (v2i64)temp7);                  \
-  }
-
-static void postproc_down_across_chroma_msa(uint8_t *src_ptr, uint8_t *dst_ptr,
-                                            int32_t src_stride,
-                                            int32_t dst_stride, int32_t cols,
-                                            uint8_t *f) {
-  uint8_t *p_src = src_ptr;
-  uint8_t *p_dst = dst_ptr;
-  uint8_t *f_orig = f;
-  uint8_t *p_dst_st = dst_ptr;
-  uint16_t col;
-  uint64_t out0, out1, out2, out3;
-  v16u8 above2, above1, below2, below1, src, ref, ref_temp;
-  v16u8 inter0, inter1, inter2, inter3, inter4, inter5;
-  v16u8 inter6, inter7, inter8, inter9, inter10, inter11;
-
-  for (col = (cols / 16); col--;) {
-    ref = LD_UB(f);
-    LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
-    src = LD_UB(p_src);
-    LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
-    AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
-    above2 = LD_UB(p_src + 3 * src_stride);
-    AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
-    above1 = LD_UB(p_src + 4 * src_stride);
-    AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
-    src = LD_UB(p_src + 5 * src_stride);
-    AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
-    below1 = LD_UB(p_src + 6 * src_stride);
-    AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
-    below2 = LD_UB(p_src + 7 * src_stride);
-    AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
-    above2 = LD_UB(p_src + 8 * src_stride);
-    AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
-    above1 = LD_UB(p_src + 9 * src_stride);
-    AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
-    ST_UB8(inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7,
-           p_dst, dst_stride);
-
-    p_dst += 16;
-    p_src += 16;
-    f += 16;
-  }
-
-  if (0 != (cols / 16)) {
-    ref = LD_UB(f);
-    LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
-    src = LD_UB(p_src);
-    LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
-    AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
-    above2 = LD_UB(p_src + 3 * src_stride);
-    AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
-    above1 = LD_UB(p_src + 4 * src_stride);
-    AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
-    src = LD_UB(p_src + 5 * src_stride);
-    AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
-    below1 = LD_UB(p_src + 6 * src_stride);
-    AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
-    below2 = LD_UB(p_src + 7 * src_stride);
-    AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
-    above2 = LD_UB(p_src + 8 * src_stride);
-    AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
-    above1 = LD_UB(p_src + 9 * src_stride);
-    AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
-    out0 = __msa_copy_u_d((v2i64)inter0, 0);
-    out1 = __msa_copy_u_d((v2i64)inter1, 0);
-    out2 = __msa_copy_u_d((v2i64)inter2, 0);
-    out3 = __msa_copy_u_d((v2i64)inter3, 0);
-    SD4(out0, out1, out2, out3, p_dst, dst_stride);
-
-    out0 = __msa_copy_u_d((v2i64)inter4, 0);
-    out1 = __msa_copy_u_d((v2i64)inter5, 0);
-    out2 = __msa_copy_u_d((v2i64)inter6, 0);
-    out3 = __msa_copy_u_d((v2i64)inter7, 0);
-    SD4(out0, out1, out2, out3, p_dst + 4 * dst_stride, dst_stride);
-  }
-
-  f = f_orig;
-  p_dst = dst_ptr - 2;
-  LD_UB8(p_dst, dst_stride, inter0, inter1, inter2, inter3, inter4, inter5,
-         inter6, inter7);
-
-  for (col = 0; col < (cols / 8); ++col) {
-    ref = LD_UB(f);
-    f += 8;
-    AOM_TRANSPOSE12x8_UB_UB(inter0, inter1, inter2, inter3, inter4, inter5,
-                            inter6, inter7, inter8, inter9, inter10, inter11);
-    if (0 == col) {
-      above2 = inter2;
-      above1 = inter2;
-    } else {
-      above2 = inter0;
-      above1 = inter1;
-    }
-    src = inter2;
-    below1 = inter3;
-    below2 = inter4;
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 0);
-    AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref_temp, inter2);
-    above2 = inter5;
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 1);
-    AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref_temp, inter3);
-    above1 = inter6;
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 2);
-    AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref_temp, inter4);
-    src = inter7;
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 3);
-    AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref_temp, inter5);
-    below1 = inter8;
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 4);
-    AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref_temp, inter6);
-    below2 = inter9;
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 5);
-    AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref_temp, inter7);
-    if (col == (cols / 8 - 1)) {
-      above2 = inter9;
-    } else {
-      above2 = inter10;
-    }
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 6);
-    AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref_temp, inter8);
-    if (col == (cols / 8 - 1)) {
-      above1 = inter9;
-    } else {
-      above1 = inter11;
-    }
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 7);
-    AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref_temp, inter9);
-    TRANSPOSE8x8_UB_UB(inter2, inter3, inter4, inter5, inter6, inter7, inter8,
-                       inter9, inter2, inter3, inter4, inter5, inter6, inter7,
-                       inter8, inter9);
-    p_dst += 8;
-    LD_UB2(p_dst, dst_stride, inter0, inter1);
-    ST8x1_UB(inter2, p_dst_st);
-    ST8x1_UB(inter3, (p_dst_st + 1 * dst_stride));
-    LD_UB2(p_dst + 2 * dst_stride, dst_stride, inter2, inter3);
-    ST8x1_UB(inter4, (p_dst_st + 2 * dst_stride));
-    ST8x1_UB(inter5, (p_dst_st + 3 * dst_stride));
-    LD_UB2(p_dst + 4 * dst_stride, dst_stride, inter4, inter5);
-    ST8x1_UB(inter6, (p_dst_st + 4 * dst_stride));
-    ST8x1_UB(inter7, (p_dst_st + 5 * dst_stride));
-    LD_UB2(p_dst + 6 * dst_stride, dst_stride, inter6, inter7);
-    ST8x1_UB(inter8, (p_dst_st + 6 * dst_stride));
-    ST8x1_UB(inter9, (p_dst_st + 7 * dst_stride));
-    p_dst_st += 8;
-  }
-}
-
-static void postproc_down_across_luma_msa(uint8_t *src_ptr, uint8_t *dst_ptr,
-                                          int32_t src_stride,
-                                          int32_t dst_stride, int32_t cols,
-                                          uint8_t *f) {
-  uint8_t *p_src = src_ptr;
-  uint8_t *p_dst = dst_ptr;
-  uint8_t *p_dst_st = dst_ptr;
-  uint8_t *f_orig = f;
-  uint16_t col;
-  v16u8 above2, above1, below2, below1;
-  v16u8 src, ref, ref_temp;
-  v16u8 inter0, inter1, inter2, inter3, inter4, inter5, inter6;
-  v16u8 inter7, inter8, inter9, inter10, inter11;
-  v16u8 inter12, inter13, inter14, inter15;
-
-  for (col = (cols / 16); col--;) {
-    ref = LD_UB(f);
-    LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
-    src = LD_UB(p_src);
-    LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
-    AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
-    above2 = LD_UB(p_src + 3 * src_stride);
-    AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
-    above1 = LD_UB(p_src + 4 * src_stride);
-    AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
-    src = LD_UB(p_src + 5 * src_stride);
-    AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
-    below1 = LD_UB(p_src + 6 * src_stride);
-    AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
-    below2 = LD_UB(p_src + 7 * src_stride);
-    AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
-    above2 = LD_UB(p_src + 8 * src_stride);
-    AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
-    above1 = LD_UB(p_src + 9 * src_stride);
-    AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
-    src = LD_UB(p_src + 10 * src_stride);
-    AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter8);
-    below1 = LD_UB(p_src + 11 * src_stride);
-    AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter9);
-    below2 = LD_UB(p_src + 12 * src_stride);
-    AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter10);
-    above2 = LD_UB(p_src + 13 * src_stride);
-    AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter11);
-    above1 = LD_UB(p_src + 14 * src_stride);
-    AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter12);
-    src = LD_UB(p_src + 15 * src_stride);
-    AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter13);
-    below1 = LD_UB(p_src + 16 * src_stride);
-    AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter14);
-    below2 = LD_UB(p_src + 17 * src_stride);
-    AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter15);
-    ST_UB8(inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7,
-           p_dst, dst_stride);
-    ST_UB8(inter8, inter9, inter10, inter11, inter12, inter13, inter14, inter15,
-           p_dst + 8 * dst_stride, dst_stride);
-    p_src += 16;
-    p_dst += 16;
-    f += 16;
-  }
-
-  f = f_orig;
-  p_dst = dst_ptr - 2;
-  LD_UB8(p_dst, dst_stride, inter0, inter1, inter2, inter3, inter4, inter5,
-         inter6, inter7);
-  LD_UB8(p_dst + 8 * dst_stride, dst_stride, inter8, inter9, inter10, inter11,
-         inter12, inter13, inter14, inter15);
-
-  for (col = 0; col < cols / 8; ++col) {
-    ref = LD_UB(f);
-    f += 8;
-    TRANSPOSE12x16_B(inter0, inter1, inter2, inter3, inter4, inter5, inter6,
-                     inter7, inter8, inter9, inter10, inter11, inter12, inter13,
-                     inter14, inter15);
-    if (0 == col) {
-      above2 = inter2;
-      above1 = inter2;
-    } else {
-      above2 = inter0;
-      above1 = inter1;
-    }
-
-    src = inter2;
-    below1 = inter3;
-    below2 = inter4;
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 0);
-    AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref_temp, inter2);
-    above2 = inter5;
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 1);
-    AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref_temp, inter3);
-    above1 = inter6;
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 2);
-    AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref_temp, inter4);
-    src = inter7;
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 3);
-    AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref_temp, inter5);
-    below1 = inter8;
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 4);
-    AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref_temp, inter6);
-    below2 = inter9;
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 5);
-    AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref_temp, inter7);
-    if (col == (cols / 8 - 1)) {
-      above2 = inter9;
-    } else {
-      above2 = inter10;
-    }
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 6);
-    AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref_temp, inter8);
-    if (col == (cols / 8 - 1)) {
-      above1 = inter9;
-    } else {
-      above1 = inter11;
-    }
-    ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 7);
-    AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref_temp, inter9);
-    AOM_TRANSPOSE8x16_UB_UB(inter2, inter3, inter4, inter5, inter6, inter7,
-                            inter8, inter9, inter2, inter3, inter4, inter5,
-                            inter6, inter7, inter8, inter9, inter10, inter11,
-                            inter12, inter13, inter14, inter15, above2, above1);
-
-    p_dst += 8;
-    LD_UB2(p_dst, dst_stride, inter0, inter1);
-    ST8x1_UB(inter2, p_dst_st);
-    ST8x1_UB(inter3, (p_dst_st + 1 * dst_stride));
-    LD_UB2(p_dst + 2 * dst_stride, dst_stride, inter2, inter3);
-    ST8x1_UB(inter4, (p_dst_st + 2 * dst_stride));
-    ST8x1_UB(inter5, (p_dst_st + 3 * dst_stride));
-    LD_UB2(p_dst + 4 * dst_stride, dst_stride, inter4, inter5);
-    ST8x1_UB(inter6, (p_dst_st + 4 * dst_stride));
-    ST8x1_UB(inter7, (p_dst_st + 5 * dst_stride));
-    LD_UB2(p_dst + 6 * dst_stride, dst_stride, inter6, inter7);
-    ST8x1_UB(inter8, (p_dst_st + 6 * dst_stride));
-    ST8x1_UB(inter9, (p_dst_st + 7 * dst_stride));
-    LD_UB2(p_dst + 8 * dst_stride, dst_stride, inter8, inter9);
-    ST8x1_UB(inter10, (p_dst_st + 8 * dst_stride));
-    ST8x1_UB(inter11, (p_dst_st + 9 * dst_stride));
-    LD_UB2(p_dst + 10 * dst_stride, dst_stride, inter10, inter11);
-    ST8x1_UB(inter12, (p_dst_st + 10 * dst_stride));
-    ST8x1_UB(inter13, (p_dst_st + 11 * dst_stride));
-    LD_UB2(p_dst + 12 * dst_stride, dst_stride, inter12, inter13);
-    ST8x1_UB(inter14, (p_dst_st + 12 * dst_stride));
-    ST8x1_UB(inter15, (p_dst_st + 13 * dst_stride));
-    LD_UB2(p_dst + 14 * dst_stride, dst_stride, inter14, inter15);
-    ST8x1_UB(above2, (p_dst_st + 14 * dst_stride));
-    ST8x1_UB(above1, (p_dst_st + 15 * dst_stride));
-    p_dst_st += 8;
-  }
-}
-
-void aom_post_proc_down_and_across_mb_row_msa(uint8_t *src, uint8_t *dst,
-                                              int32_t src_stride,
-                                              int32_t dst_stride, int32_t cols,
-                                              uint8_t *f, int32_t size) {
-  if (8 == size) {
-    postproc_down_across_chroma_msa(src, dst, src_stride, dst_stride, cols, f);
-  } else if (16 == size) {
-    postproc_down_across_luma_msa(src, dst, src_stride, dst_stride, cols, f);
-  }
-}
-
-void aom_mbpost_proc_across_ip_msa(uint8_t *src_ptr, int32_t pitch,
-                                   int32_t rows, int32_t cols, int32_t flimit) {
-  int32_t row, col, cnt;
-  uint8_t *src_dup = src_ptr;
-  v16u8 src0, src, tmp_orig;
-  v16u8 tmp = { 0 };
-  v16i8 zero = { 0 };
-  v8u16 sum_h, src_r_h, src_l_h;
-  v4u32 src_r_w, src_l_w;
-  v4i32 flimit_vec;
-
-  flimit_vec = __msa_fill_w(flimit);
-  for (row = rows; row--;) {
-    int32_t sum_sq = 0;
-    int32_t sum = 0;
-    src0 = (v16u8)__msa_fill_b(src_dup[0]);
-    ST8x1_UB(src0, (src_dup - 8));
-
-    src0 = (v16u8)__msa_fill_b(src_dup[cols - 1]);
-    ST_UB(src0, src_dup + cols);
-    src_dup[cols + 16] = src_dup[cols - 1];
-    tmp_orig = (v16u8)__msa_ldi_b(0);
-    tmp_orig[15] = tmp[15];
-    src = LD_UB(src_dup - 8);
-    src[15] = 0;
-    ILVRL_B2_UH(zero, src, src_r_h, src_l_h);
-    src_r_w = __msa_dotp_u_w(src_r_h, src_r_h);
-    src_l_w = __msa_dotp_u_w(src_l_h, src_l_h);
-    sum_sq = HADD_SW_S32(src_r_w);
-    sum_sq += HADD_SW_S32(src_l_w);
-    sum_h = __msa_hadd_u_h(src, src);
-    sum = HADD_UH_U32(sum_h);
-    {
-      v16u8 src7, src8, src_r, src_l;
-      v16i8 mask;
-      v8u16 add_r, add_l;
-      v8i16 sub_r, sub_l, sum_r, sum_l, mask0, mask1;
-      v4i32 sum_sq0, sum_sq1, sum_sq2, sum_sq3;
-      v4i32 sub0, sub1, sub2, sub3;
-      v4i32 sum0_w, sum1_w, sum2_w, sum3_w;
-      v4i32 mul0, mul1, mul2, mul3;
-      v4i32 total0, total1, total2, total3;
-      v8i16 const8 = __msa_fill_h(8);
-
-      src7 = LD_UB(src_dup + 7);
-      src8 = LD_UB(src_dup - 8);
-      for (col = 0; col < (cols >> 4); ++col) {
-        ILVRL_B2_UB(src7, src8, src_r, src_l);
-        HSUB_UB2_SH(src_r, src_l, sub_r, sub_l);
-
-        sum_r[0] = sum + sub_r[0];
-        for (cnt = 0; cnt < 7; ++cnt) {
-          sum_r[cnt + 1] = sum_r[cnt] + sub_r[cnt + 1];
-        }
-        sum_l[0] = sum_r[7] + sub_l[0];
-        for (cnt = 0; cnt < 7; ++cnt) {
-          sum_l[cnt + 1] = sum_l[cnt] + sub_l[cnt + 1];
-        }
-        sum = sum_l[7];
-        src = LD_UB(src_dup + 16 * col);
-        ILVRL_B2_UH(zero, src, src_r_h, src_l_h);
-        src7 = (v16u8)((const8 + sum_r + (v8i16)src_r_h) >> 4);
-        src8 = (v16u8)((const8 + sum_l + (v8i16)src_l_h) >> 4);
-        tmp = (v16u8)__msa_pckev_b((v16i8)src8, (v16i8)src7);
-
-        HADD_UB2_UH(src_r, src_l, add_r, add_l);
-        UNPCK_SH_SW(sub_r, sub0, sub1);
-        UNPCK_SH_SW(sub_l, sub2, sub3);
-        ILVR_H2_SW(zero, add_r, zero, add_l, sum0_w, sum2_w);
-        ILVL_H2_SW(zero, add_r, zero, add_l, sum1_w, sum3_w);
-        MUL4(sum0_w, sub0, sum1_w, sub1, sum2_w, sub2, sum3_w, sub3, mul0, mul1,
-             mul2, mul3);
-        sum_sq0[0] = sum_sq + mul0[0];
-        for (cnt = 0; cnt < 3; ++cnt) {
-          sum_sq0[cnt + 1] = sum_sq0[cnt] + mul0[cnt + 1];
-        }
-        sum_sq1[0] = sum_sq0[3] + mul1[0];
-        for (cnt = 0; cnt < 3; ++cnt) {
-          sum_sq1[cnt + 1] = sum_sq1[cnt] + mul1[cnt + 1];
-        }
-        sum_sq2[0] = sum_sq1[3] + mul2[0];
-        for (cnt = 0; cnt < 3; ++cnt) {
-          sum_sq2[cnt + 1] = sum_sq2[cnt] + mul2[cnt + 1];
-        }
-        sum_sq3[0] = sum_sq2[3] + mul3[0];
-        for (cnt = 0; cnt < 3; ++cnt) {
-          sum_sq3[cnt + 1] = sum_sq3[cnt] + mul3[cnt + 1];
-        }
-        sum_sq = sum_sq3[3];
-
-        UNPCK_SH_SW(sum_r, sum0_w, sum1_w);
-        UNPCK_SH_SW(sum_l, sum2_w, sum3_w);
-        total0 = sum_sq0 * __msa_ldi_w(15);
-        total0 -= sum0_w * sum0_w;
-        total1 = sum_sq1 * __msa_ldi_w(15);
-        total1 -= sum1_w * sum1_w;
-        total2 = sum_sq2 * __msa_ldi_w(15);
-        total2 -= sum2_w * sum2_w;
-        total3 = sum_sq3 * __msa_ldi_w(15);
-        total3 -= sum3_w * sum3_w;
-        total0 = (total0 < flimit_vec);
-        total1 = (total1 < flimit_vec);
-        total2 = (total2 < flimit_vec);
-        total3 = (total3 < flimit_vec);
-        PCKEV_H2_SH(total1, total0, total3, total2, mask0, mask1);
-        mask = __msa_pckev_b((v16i8)mask1, (v16i8)mask0);
-        tmp = __msa_bmz_v(tmp, src, (v16u8)mask);
-
-        if (col == 0) {
-          uint64_t src_d;
-
-          src_d = __msa_copy_u_d((v2i64)tmp_orig, 1);
-          SD(src_d, (src_dup - 8));
-        }
-
-        src7 = LD_UB(src_dup + 16 * (col + 1) + 7);
-        src8 = LD_UB(src_dup + 16 * (col + 1) - 8);
-        ST_UB(tmp, (src_dup + (16 * col)));
-      }
-
-      src_dup += pitch;
-    }
-  }
-}
-
-void aom_mbpost_proc_down_msa(uint8_t *dst_ptr, int32_t pitch, int32_t rows,
-                              int32_t cols, int32_t flimit) {
-  int32_t row, col, cnt, i;
-  const int16_t *rv3 = &aom_rv[63 & rand()];
-  v4i32 flimit_vec;
-  v16u8 dst7, dst8, dst_r_b, dst_l_b;
-  v16i8 mask;
-  v8u16 add_r, add_l;
-  v8i16 dst_r_h, dst_l_h, sub_r, sub_l, mask0, mask1;
-  v4i32 sub0, sub1, sub2, sub3, total0, total1, total2, total3;
-
-  flimit_vec = __msa_fill_w(flimit);
-
-  for (col = 0; col < (cols >> 4); ++col) {
-    uint8_t *dst_tmp = &dst_ptr[col << 4];
-    v16u8 dst;
-    v16i8 zero = { 0 };
-    v16u8 tmp[16];
-    v8i16 mult0, mult1, rv2_0, rv2_1;
-    v8i16 sum0_h = { 0 };
-    v8i16 sum1_h = { 0 };
-    v4i32 mul0 = { 0 };
-    v4i32 mul1 = { 0 };
-    v4i32 mul2 = { 0 };
-    v4i32 mul3 = { 0 };
-    v4i32 sum0_w, sum1_w, sum2_w, sum3_w;
-    v4i32 add0, add1, add2, add3;
-    const int16_t *rv2[16];
-
-    dst = LD_UB(dst_tmp);
-    for (cnt = (col << 4), i = 0; i < 16; ++cnt) {
-      rv2[i] = rv3 + ((cnt * 17) & 127);
-      ++i;
-    }
-    for (cnt = -8; cnt < 0; ++cnt) {
-      ST_UB(dst, dst_tmp + cnt * pitch);
-    }
-
-    dst = LD_UB((dst_tmp + (rows - 1) * pitch));
-    for (cnt = rows; cnt < rows + 17; ++cnt) {
-      ST_UB(dst, dst_tmp + cnt * pitch);
-    }
-    for (cnt = -8; cnt <= 6; ++cnt) {
-      dst = LD_UB(dst_tmp + (cnt * pitch));
-      UNPCK_UB_SH(dst, dst_r_h, dst_l_h);
-      MUL2(dst_r_h, dst_r_h, dst_l_h, dst_l_h, mult0, mult1);
-      mul0 += (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)mult0);
-      mul1 += (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)mult0);
-      mul2 += (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)mult1);
-      mul3 += (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)mult1);
-      ADD2(sum0_h, dst_r_h, sum1_h, dst_l_h, sum0_h, sum1_h);
-    }
-
-    for (row = 0; row < (rows + 8); ++row) {
-      for (i = 0; i < 8; ++i) {
-        rv2_0[i] = *(rv2[i] + (row & 127));
-        rv2_1[i] = *(rv2[i + 8] + (row & 127));
-      }
-      dst7 = LD_UB(dst_tmp + (7 * pitch));
-      dst8 = LD_UB(dst_tmp - (8 * pitch));
-      ILVRL_B2_UB(dst7, dst8, dst_r_b, dst_l_b);
-
-      HSUB_UB2_SH(dst_r_b, dst_l_b, sub_r, sub_l);
-      UNPCK_SH_SW(sub_r, sub0, sub1);
-      UNPCK_SH_SW(sub_l, sub2, sub3);
-      sum0_h += sub_r;
-      sum1_h += sub_l;
-
-      HADD_UB2_UH(dst_r_b, dst_l_b, add_r, add_l);
-
-      ILVRL_H2_SW(zero, add_r, add0, add1);
-      ILVRL_H2_SW(zero, add_l, add2, add3);
-      mul0 += add0 * sub0;
-      mul1 += add1 * sub1;
-      mul2 += add2 * sub2;
-      mul3 += add3 * sub3;
-      dst = LD_UB(dst_tmp);
-      ILVRL_B2_SH(zero, dst, dst_r_h, dst_l_h);
-      dst7 = (v16u8)((rv2_0 + sum0_h + dst_r_h) >> 4);
-      dst8 = (v16u8)((rv2_1 + sum1_h + dst_l_h) >> 4);
-      tmp[row & 15] = (v16u8)__msa_pckev_b((v16i8)dst8, (v16i8)dst7);
-
-      UNPCK_SH_SW(sum0_h, sum0_w, sum1_w);
-      UNPCK_SH_SW(sum1_h, sum2_w, sum3_w);
-      total0 = mul0 * __msa_ldi_w(15);
-      total0 -= sum0_w * sum0_w;
-      total1 = mul1 * __msa_ldi_w(15);
-      total1 -= sum1_w * sum1_w;
-      total2 = mul2 * __msa_ldi_w(15);
-      total2 -= sum2_w * sum2_w;
-      total3 = mul3 * __msa_ldi_w(15);
-      total3 -= sum3_w * sum3_w;
-      total0 = (total0 < flimit_vec);
-      total1 = (total1 < flimit_vec);
-      total2 = (total2 < flimit_vec);
-      total3 = (total3 < flimit_vec);
-      PCKEV_H2_SH(total1, total0, total3, total2, mask0, mask1);
-      mask = __msa_pckev_b((v16i8)mask1, (v16i8)mask0);
-      tmp[row & 15] = __msa_bmz_v(tmp[row & 15], dst, (v16u8)mask);
-
-      if (row >= 8) {
-        ST_UB(tmp[(row - 8) & 15], (dst_tmp - 8 * pitch));
-      }
-
-      dst_tmp += pitch;
-    }
-  }
-}
--- a/aom_dsp/mips/fwd_txfm_msa.h
+++ b/aom_dsp/mips/fwd_txfm_msa.h
@@ -1,381 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_MIPS_FWD_TXFM_MSA_H_
-#define AOM_DSP_MIPS_FWD_TXFM_MSA_H_
-
-#include "aom_dsp/mips/txfm_macros_msa.h"
-#include "aom_dsp/txfm_common.h"
-
-#define LD_HADD(psrc, stride)                                                  \
-  ({                                                                           \
-    v8i16 in0_m, in1_m, in2_m, in3_m, in4_m, in5_m, in6_m, in7_m;              \
-    v4i32 vec_w_m;                                                             \
-                                                                               \
-    LD_SH4((psrc), stride, in0_m, in1_m, in2_m, in3_m);                        \
-    ADD2(in0_m, in1_m, in2_m, in3_m, in0_m, in2_m);                            \
-    LD_SH4(((psrc) + 4 * stride), stride, in4_m, in5_m, in6_m, in7_m);         \
-    ADD4(in4_m, in5_m, in6_m, in7_m, in0_m, in2_m, in4_m, in6_m, in4_m, in6_m, \
-         in0_m, in4_m);                                                        \
-    in0_m += in4_m;                                                            \
-                                                                               \
-    vec_w_m = __msa_hadd_s_w(in0_m, in0_m);                                    \
-    HADD_SW_S32(vec_w_m);                                                      \
-  })
-
-#define AOM_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3)                  \
-  {                                                                            \
-    v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m;                                  \
-    v8i16 vec0_m, vec1_m, vec2_m, vec3_m;                                      \
-    v4i32 vec4_m, vec5_m, vec6_m, vec7_m;                                      \
-    v8i16 coeff_m = {                                                          \
-      cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, -cospi_8_64, 0, 0, 0 \
-    };                                                                         \
-                                                                               \
-    BUTTERFLY_4(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m);           \
-    ILVR_H2_SH(vec1_m, vec0_m, vec3_m, vec2_m, vec0_m, vec2_m);                \
-    SPLATI_H2_SH(coeff_m, 0, 1, cnst0_m, cnst1_m);                             \
-    cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                                 \
-    vec5_m = __msa_dotp_s_w(vec0_m, cnst1_m);                                  \
-                                                                               \
-    SPLATI_H2_SH(coeff_m, 4, 3, cnst2_m, cnst3_m);                             \
-    cnst2_m = __msa_ilvev_h(cnst3_m, cnst2_m);                                 \
-    vec7_m = __msa_dotp_s_w(vec2_m, cnst2_m);                                  \
-                                                                               \
-    vec4_m = __msa_dotp_s_w(vec0_m, cnst0_m);                                  \
-    cnst2_m = __msa_splati_h(coeff_m, 2);                                      \
-    cnst2_m = __msa_ilvev_h(cnst2_m, cnst3_m);                                 \
-    vec6_m = __msa_dotp_s_w(vec2_m, cnst2_m);                                  \
-                                                                               \
-    SRARI_W4_SW(vec4_m, vec5_m, vec6_m, vec7_m, DCT_CONST_BITS);               \
-    PCKEV_H4_SH(vec4_m, vec4_m, vec5_m, vec5_m, vec6_m, vec6_m, vec7_m,        \
-                vec7_m, out0, out2, out1, out3);                               \
-  }
-
-#define SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7)              \
-  {                                                                          \
-    v8i16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;    \
-                                                                             \
-    SRLI_H4_SH(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m, 15);      \
-    SRLI_H4_SH(in4, in5, in6, in7, vec4_m, vec5_m, vec6_m, vec7_m, 15);      \
-    AVE_SH4_SH(vec0_m, in0, vec1_m, in1, vec2_m, in2, vec3_m, in3, in0, in1, \
-               in2, in3);                                                    \
-    AVE_SH4_SH(vec4_m, in4, vec5_m, in5, vec6_m, in6, vec7_m, in7, in4, in5, \
-               in6, in7);                                                    \
-  }
-
-#define AOM_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2,  \
-                  out3, out4, out5, out6, out7)                              \
-  {                                                                          \
-    v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m;                          \
-    v8i16 s7_m, x0_m, x1_m, x2_m, x3_m;                                      \
-    v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64,  cospi_24_64,   \
-                      cospi_4_64,  cospi_28_64,  cospi_12_64, cospi_20_64 }; \
-                                                                             \
-    /* FDCT stage1 */                                                        \
-    BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, s0_m, s1_m, s2_m,    \
-                s3_m, s4_m, s5_m, s6_m, s7_m);                               \
-    BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m);             \
-    ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m);                          \
-    ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m);                          \
-    SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m);                                 \
-    x1_m = __msa_ilvev_h(x1_m, x0_m);                                        \
-    out4 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m);                          \
-                                                                             \
-    SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m);                                 \
-    x2_m = -x2_m;                                                            \
-    x2_m = __msa_ilvev_h(x3_m, x2_m);                                        \
-    out6 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m);                          \
-                                                                             \
-    out0 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m);                          \
-    x2_m = __msa_splati_h(coeff_m, 2);                                       \
-    x2_m = __msa_ilvev_h(x2_m, x3_m);                                        \
-    out2 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m);                          \
-                                                                             \
-    /* stage2 */                                                             \
-    ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m);                                     \
-                                                                             \
-    s6_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m);                          \
-    s5_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m);                          \
-                                                                             \
-    /* stage3 */                                                             \
-    BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m);             \
-                                                                             \
-    /* stage4 */                                                             \
-    ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m);                          \
-    ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m);                          \
-                                                                             \
-    SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m);                                 \
-    x1_m = __msa_ilvev_h(x0_m, x1_m);                                        \
-    out1 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m);                          \
-                                                                             \
-    SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m);                                 \
-    x2_m = __msa_ilvev_h(x3_m, x2_m);                                        \
-    out5 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m);                          \
-                                                                             \
-    x1_m = __msa_splati_h(coeff_m, 5);                                       \
-    x0_m = -x0_m;                                                            \
-    x0_m = __msa_ilvev_h(x1_m, x0_m);                                        \
-    out7 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m);                          \
-                                                                             \
-    x2_m = __msa_splati_h(coeff_m, 6);                                       \
-    x3_m = -x3_m;                                                            \
-    x2_m = __msa_ilvev_h(x2_m, x3_m);                                        \
-    out3 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m);                          \
-  }
-
-#define FDCT8x16_EVEN(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1,    \
-                      out2, out3, out4, out5, out6, out7)                    \
-  {                                                                          \
-    v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m;                    \
-    v8i16 x0_m, x1_m, x2_m, x3_m;                                            \
-    v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64,  cospi_24_64,   \
-                      cospi_4_64,  cospi_28_64,  cospi_12_64, cospi_20_64 }; \
-                                                                             \
-    /* FDCT stage1 */                                                        \
-    BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, s0_m, s1_m, s2_m,    \
-                s3_m, s4_m, s5_m, s6_m, s7_m);                               \
-    BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m);             \
-    ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m);                          \
-    ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m);                          \
-    SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m);                                 \
-    x1_m = __msa_ilvev_h(x1_m, x0_m);                                        \
-    out4 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m);                          \
-                                                                             \
-    SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m);                                 \
-    x2_m = -x2_m;                                                            \
-    x2_m = __msa_ilvev_h(x3_m, x2_m);                                        \
-    out6 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m);                          \
-                                                                             \
-    out0 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m);                          \
-    x2_m = __msa_splati_h(coeff_m, 2);                                       \
-    x2_m = __msa_ilvev_h(x2_m, x3_m);                                        \
-    out2 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m);                          \
-                                                                             \
-    /* stage2 */                                                             \
-    ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m);                                     \
-                                                                             \
-    s6_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m);                          \
-    s5_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m);                          \
-                                                                             \
-    /* stage3 */                                                             \
-    BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m);             \
-                                                                             \
-    /* stage4 */                                                             \
-    ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m);                          \
-    ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m);                          \
-                                                                             \
-    SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m);                                 \
-    x1_m = __msa_ilvev_h(x0_m, x1_m);                                        \
-    out1 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m);                          \
-                                                                             \
-    SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m);                                 \
-    x2_m = __msa_ilvev_h(x3_m, x2_m);                                        \
-    out5 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m);                          \
-                                                                             \
-    x1_m = __msa_splati_h(coeff_m, 5);                                       \
-    x0_m = -x0_m;                                                            \
-    x0_m = __msa_ilvev_h(x1_m, x0_m);                                        \
-    out7 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m);                          \
-                                                                             \
-    x2_m = __msa_splati_h(coeff_m, 6);                                       \
-    x3_m = -x3_m;                                                            \
-    x2_m = __msa_ilvev_h(x2_m, x3_m);                                        \
-    out3 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m);                          \
-  }
-
-#define FDCT8x16_ODD(input0, input1, input2, input3, input4, input5, input6,   \
-                     input7, out1, out3, out5, out7, out9, out11, out13,       \
-                     out15)                                                    \
-  {                                                                            \
-    v8i16 stp21_m, stp22_m, stp23_m, stp24_m, stp25_m, stp26_m;                \
-    v8i16 stp30_m, stp31_m, stp32_m, stp33_m, stp34_m, stp35_m;                \
-    v8i16 stp36_m, stp37_m, vec0_m, vec1_m;                                    \
-    v8i16 vec2_m, vec3_m, vec4_m, vec5_m, vec6_m;                              \
-    v8i16 cnst0_m, cnst1_m, cnst4_m, cnst5_m;                                  \
-    v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64,  cospi_24_64,     \
-                      -cospi_8_64, -cospi_24_64, cospi_12_64, cospi_20_64 };   \
-    v8i16 coeff1_m = { cospi_2_64,  cospi_30_64, cospi_14_64, cospi_18_64,     \
-                       cospi_10_64, cospi_22_64, cospi_6_64,  cospi_26_64 };   \
-    v8i16 coeff2_m = {                                                         \
-      -cospi_2_64, -cospi_10_64, -cospi_18_64, -cospi_26_64, 0, 0, 0, 0        \
-    };                                                                         \
-                                                                               \
-    /* stp 1 */                                                                \
-    ILVL_H2_SH(input2, input5, input3, input4, vec2_m, vec4_m);                \
-    ILVR_H2_SH(input2, input5, input3, input4, vec3_m, vec5_m);                \
-                                                                               \
-    cnst4_m = __msa_splati_h(coeff_m, 0);                                      \
-    stp25_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst4_m);                  \
-                                                                               \
-    cnst5_m = __msa_splati_h(coeff_m, 1);                                      \
-    cnst5_m = __msa_ilvev_h(cnst5_m, cnst4_m);                                 \
-    stp22_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst5_m);                  \
-    stp24_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst4_m);                  \
-    stp23_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst5_m);                  \
-                                                                               \
-    /* stp2 */                                                                 \
-    BUTTERFLY_4(input0, input1, stp22_m, stp23_m, stp30_m, stp31_m, stp32_m,   \
-                stp33_m);                                                      \
-    BUTTERFLY_4(input7, input6, stp25_m, stp24_m, stp37_m, stp36_m, stp35_m,   \
-                stp34_m);                                                      \
-                                                                               \
-    ILVL_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec2_m, vec4_m);            \
-    ILVR_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec3_m, vec5_m);            \
-                                                                               \
-    SPLATI_H2_SH(coeff_m, 2, 3, cnst0_m, cnst1_m);                             \
-    cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m);                                 \
-    stp26_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m);                  \
-                                                                               \
-    cnst0_m = __msa_splati_h(coeff_m, 4);                                      \
-    cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                                 \
-    stp21_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m);                  \
-                                                                               \
-    SPLATI_H2_SH(coeff_m, 5, 2, cnst0_m, cnst1_m);                             \
-    cnst1_m = __msa_ilvev_h(cnst0_m, cnst1_m);                                 \
-    stp25_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m);                  \
-                                                                               \
-    cnst0_m = __msa_splati_h(coeff_m, 3);                                      \
-    cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                                 \
-    stp22_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m);                  \
-                                                                               \
-    /* stp4 */                                                                 \
-    BUTTERFLY_4(stp30_m, stp37_m, stp26_m, stp21_m, vec6_m, vec2_m, vec4_m,    \
-                vec5_m);                                                       \
-    BUTTERFLY_4(stp33_m, stp34_m, stp25_m, stp22_m, stp21_m, stp23_m, stp24_m, \
-                stp31_m);                                                      \
-                                                                               \
-    ILVRL_H2_SH(vec2_m, vec6_m, vec1_m, vec0_m);                               \
-    SPLATI_H2_SH(coeff1_m, 0, 1, cnst0_m, cnst1_m);                            \
-    cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m);                                 \
-                                                                               \
-    out1 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);                     \
-                                                                               \
-    cnst0_m = __msa_splati_h(coeff2_m, 0);                                     \
-    cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m);                                 \
-    out15 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);                    \
-                                                                               \
-    ILVRL_H2_SH(vec4_m, vec5_m, vec1_m, vec0_m);                               \
-    SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m);                            \
-    cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                                 \
-                                                                               \
-    out9 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m);                     \
-                                                                               \
-    cnst1_m = __msa_splati_h(coeff2_m, 2);                                     \
-    cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m);                                 \
-    out7 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);                     \
-                                                                               \
-    ILVRL_H2_SH(stp23_m, stp21_m, vec1_m, vec0_m);                             \
-    SPLATI_H2_SH(coeff1_m, 4, 5, cnst0_m, cnst1_m);                            \
-    cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m);                                 \
-    out5 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);                     \
-                                                                               \
-    cnst0_m = __msa_splati_h(coeff2_m, 1);                                     \
-    cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m);                                 \
-    out11 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);                    \
-                                                                               \
-    ILVRL_H2_SH(stp24_m, stp31_m, vec1_m, vec0_m);                             \
-    SPLATI_H2_SH(coeff1_m, 6, 7, cnst0_m, cnst1_m);                            \
-    cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                                 \
-                                                                               \
-    out13 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m);                    \
-                                                                               \
-    cnst1_m = __msa_splati_h(coeff2_m, 3);                                     \
-    cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m);                                 \
-    out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);                     \
-  }
-
-#define FDCT_POSTPROC_2V_NEG_H(vec0, vec1) \
-  {                                        \
-    v8i16 tp0_m, tp1_m;                    \
-    v8i16 one_m = __msa_ldi_h(1);          \
-                                           \
-    tp0_m = __msa_clti_s_h(vec0, 0);       \
-    tp1_m = __msa_clti_s_h(vec1, 0);       \
-    vec0 += 1;                             \
-    vec1 += 1;                             \
-    tp0_m = one_m & tp0_m;                 \
-    tp1_m = one_m & tp1_m;                 \
-    vec0 += tp0_m;                         \
-    vec1 += tp1_m;                         \
-    vec0 >>= 2;                            \
-    vec1 >>= 2;                            \
-  }
-
-#define FDCT32_POSTPROC_NEG_W(vec)   \
-  {                                  \
-    v4i32 temp_m;                    \
-    v4i32 one_m = __msa_ldi_w(1);    \
-                                     \
-    temp_m = __msa_clti_s_w(vec, 0); \
-    vec += 1;                        \
-    temp_m = one_m & temp_m;         \
-    vec += temp_m;                   \
-    vec >>= 2;                       \
-  }
-
-#define FDCT32_POSTPROC_2V_POS_H(vec0, vec1)        \
-  {                                                 \
-    v8i16 tp0_m, tp1_m;                             \
-    v8i16 one = __msa_ldi_h(1);                     \
-                                                    \
-    tp0_m = __msa_clei_s_h(vec0, 0);                \
-    tp1_m = __msa_clei_s_h(vec1, 0);                \
-    tp0_m = (v8i16)__msa_xori_b((v16u8)tp0_m, 255); \
-    tp1_m = (v8i16)__msa_xori_b((v16u8)tp1_m, 255); \
-    vec0 += 1;                                      \
-    vec1 += 1;                                      \
-    tp0_m = one & tp0_m;                            \
-    tp1_m = one & tp1_m;                            \
-    vec0 += tp0_m;                                  \
-    vec1 += tp1_m;                                  \
-    vec0 >>= 2;                                     \
-    vec1 >>= 2;                                     \
-  }
-
-#define DOTP_CONST_PAIR_W(reg0_left, reg1_left, reg0_right, reg1_right, \
-                          const0, const1, out0, out1, out2, out3)       \
-  {                                                                     \
-    v4i32 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m;               \
-    v2i64 tp0_m, tp1_m, tp2_m, tp3_m;                                   \
-    v4i32 k0_m = __msa_fill_w((int32_t)const0);                         \
-                                                                        \
-    s0_m = __msa_fill_w((int32_t)const1);                               \
-    k0_m = __msa_ilvev_w(s0_m, k0_m);                                   \
-                                                                        \
-    ILVRL_W2_SW(-reg1_left, reg0_left, s1_m, s0_m);                     \
-    ILVRL_W2_SW(reg0_left, reg1_left, s3_m, s2_m);                      \
-    ILVRL_W2_SW(-reg1_right, reg0_right, s5_m, s4_m);                   \
-    ILVRL_W2_SW(reg0_right, reg1_right, s7_m, s6_m);                    \
-                                                                        \
-    DOTP_SW2_SD(s0_m, s1_m, k0_m, k0_m, tp0_m, tp1_m);                  \
-    DOTP_SW2_SD(s4_m, s5_m, k0_m, k0_m, tp2_m, tp3_m);                  \
-    tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS);                       \
-    tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS);                       \
-    tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS);                       \
-    tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS);                       \
-    out0 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m);                   \
-    out1 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m);                   \
-                                                                        \
-    DOTP_SW2_SD(s2_m, s3_m, k0_m, k0_m, tp0_m, tp1_m);                  \
-    DOTP_SW2_SD(s6_m, s7_m, k0_m, k0_m, tp2_m, tp3_m);                  \
-    tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS);                       \
-    tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS);                       \
-    tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS);                       \
-    tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS);                       \
-    out2 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m);                   \
-    out3 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m);                   \
-  }
-
-void fdct8x16_1d_column(const int16_t *input, int16_t *tmp_ptr,
-                        int32_t src_stride);
-void fdct16x8_1d_row(int16_t *input, int16_t *output);
-#endif  // AOM_DSP_MIPS_FWD_TXFM_MSA_H_
--- a/aom_dsp/mips/idct8x8_msa.c
+++ b/aom_dsp/mips/idct8x8_msa.c
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/inv_txfm_msa.h"
-
-void aom_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst,
-                            int32_t dst_stride) {
-  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
-
-  /* load vector elements of 8x8 block */
-  LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
-
-  /* rows transform */
-  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
-                     in4, in5, in6, in7);
-  /* 1D idct8x8 */
-  AOM_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
-                 in4, in5, in6, in7);
-  /* columns transform */
-  TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
-                     in4, in5, in6, in7);
-  /* 1D idct8x8 */
-  AOM_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
-                 in4, in5, in6, in7);
-  /* final rounding (add 2^4, divide by 2^5) and shift */
-  SRARI_H4_SH(in0, in1, in2, in3, 5);
-  SRARI_H4_SH(in4, in5, in6, in7, 5);
-  /* add block and store 8x8 */
-  AOM_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
-  dst += (4 * dst_stride);
-  AOM_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
-}
-
-void aom_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
-                            int32_t dst_stride) {
-  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
-  v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3;
-  v4i32 tmp0, tmp1, tmp2, tmp3;
-  v8i16 zero = { 0 };
-
-  /* load vector elements of 8x8 block */
-  LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
-  TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
-
-  /* stage1 */
-  ILVL_H2_SH(in3, in0, in2, in1, s0, s1);
-  k0 = AOM_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
-  k1 = AOM_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
-  k2 = AOM_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
-  k3 = AOM_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
-  DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3);
-  SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, DCT_CONST_BITS);
-  PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1);
-  PCKEV_H2_SH(zero, tmp2, zero, tmp3, s2, s3);
-  BUTTERFLY_4(s0, s1, s3, s2, s4, s7, s6, s5);
-
-  /* stage2 */
-  ILVR_H2_SH(in3, in1, in2, in0, s1, s0);
-  k0 = AOM_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
-  k1 = AOM_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
-  k2 = AOM_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
-  k3 = AOM_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
-  DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3);
-  SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, DCT_CONST_BITS);
-  PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1);
-  PCKEV_H2_SH(zero, tmp2, zero, tmp3, s2, s3);
-  BUTTERFLY_4(s0, s1, s2, s3, m0, m1, m2, m3);
-
-  /* stage3 */
-  s0 = __msa_ilvr_h(s6, s5);
-
-  k1 = AOM_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
-  DOTP_SH2_SW(s0, s0, k1, k0, tmp0, tmp1);
-  SRARI_W2_SW(tmp0, tmp1, DCT_CONST_BITS);
-  PCKEV_H2_SH(zero, tmp0, zero, tmp1, s2, s3);
-
-  /* stage4 */
-  BUTTERFLY_8(m0, m1, m2, m3, s4, s2, s3, s7, in0, in1, in2, in3, in4, in5, in6,
-              in7);
-  TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
-                     in4, in5, in6, in7);
-  AOM_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
-                 in4, in5, in6, in7);
-
-  /* final rounding (add 2^4, divide by 2^5) and shift */
-  SRARI_H4_SH(in0, in1, in2, in3, 5);
-  SRARI_H4_SH(in4, in5, in6, in7, 5);
-
-  /* add block and store 8x8 */
-  AOM_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
-  dst += (4 * dst_stride);
-  AOM_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
-}
-
-void aom_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst,
-                           int32_t dst_stride) {
-  int16_t out;
-  int32_t val;
-  v8i16 vec;
-
-  out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);
-  out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);
-  val = ROUND_POWER_OF_TWO(out, 5);
-  vec = __msa_fill_h(val);
-
-  AOM_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
-  dst += (4 * dst_stride);
-  AOM_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
-}
--- a/aom_dsp/mips/inv_txfm_dspr2.h
+++ b/aom_dsp/mips/inv_txfm_dspr2.h
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_MIPS_INV_TXFM_DSPR2_H_
-#define AOM_DSP_MIPS_INV_TXFM_DSPR2_H_
-
-#include <assert.h>
-
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/inv_txfm.h"
-#include "aom_dsp/mips/common_dspr2.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_DSPR2
-#define DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input)                         \
-  ({                                                                           \
-                                                                               \
-    int32_t tmp, out;                                                          \
-    int dct_cost_rounding = DCT_CONST_ROUNDING;                                \
-    int in = input;                                                            \
-                                                                               \
-    __asm__ __volatile__(/* out = dct_const_round_shift(dc *  cospi_16_64); */ \
-                         "mtlo     %[dct_cost_rounding],   $ac1              " \
-                         "                \n\t"                                \
-                         "mthi     $zero,                  $ac1              " \
-                         "                \n\t"                                \
-                         "madd     $ac1,                   %[in],            " \
-                         "%[cospi_16_64]  \n\t"                                \
-                         "extp     %[tmp],                 $ac1,             " \
-                         "31              \n\t"                                \
-                                                                               \
-                         /* out = dct_const_round_shift(out * cospi_16_64); */ \
-                         "mtlo     %[dct_cost_rounding],   $ac2              " \
-                         "                \n\t"                                \
-                         "mthi     $zero,                  $ac2              " \
-                         "                \n\t"                                \
-                         "madd     $ac2,                   %[tmp],           " \
-                         "%[cospi_16_64]  \n\t"                                \
-                         "extp     %[out],                 $ac2,             " \
-                         "31              \n\t"                                \
-                                                                               \
-                         : [tmp] "=&r"(tmp), [out] "=r"(out)                   \
-                         : [in] "r"(in),                                       \
-                           [dct_cost_rounding] "r"(dct_cost_rounding),         \
-                           [cospi_16_64] "r"(cospi_16_64));                    \
-    out;                                                                       \
-  })
-
-void aom_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
-                                   int dest_stride);
-void aom_idct4_rows_dspr2(const int16_t *input, int16_t *output);
-void aom_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
-                                     int dest_stride);
-void iadst4_dspr2(const int16_t *input, int16_t *output);
-void idct8_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows);
-void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
-                                 int dest_stride);
-void iadst8_dspr2(const int16_t *input, int16_t *output);
-void idct16_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows);
-void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride);
-void iadst16_dspr2(const int16_t *input, int16_t *output);
-
-#endif  // #if HAVE_DSPR2
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_MIPS_INV_TXFM_DSPR2_H_
--- a/aom_dsp/mips/inv_txfm_msa.h
+++ b/aom_dsp/mips/inv_txfm_msa.h
@@ -1,412 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_MIPS_INV_TXFM_MSA_H_
-#define AOM_DSP_MIPS_INV_TXFM_MSA_H_
-
-#include "aom_dsp/mips/macros_msa.h"
-#include "aom_dsp/mips/txfm_macros_msa.h"
-#include "aom_dsp/txfm_common.h"
-
-#define AOM_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2,  \
-                  out3, out4, out5, out6, out7)                              \
-  {                                                                          \
-    v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m;                       \
-    v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m;                        \
-    v8i16 coeff0_m = { cospi_2_64,  cospi_6_64,  cospi_10_64, cospi_14_64,   \
-                       cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \
-    v8i16 coeff1_m = { cospi_8_64,  -cospi_8_64,  cospi_16_64, -cospi_16_64, \
-                       cospi_24_64, -cospi_24_64, 0,           0 };          \
-                                                                             \
-    SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m);                          \
-    cnst2_m = -cnst0_m;                                                      \
-    ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m);       \
-    SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m);                          \
-    cnst4_m = -cnst2_m;                                                      \
-    ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m);       \
-                                                                             \
-    ILVRL_H2_SH(in0, in7, vec1_m, vec0_m);                                   \
-    ILVRL_H2_SH(in4, in3, vec3_m, vec2_m);                                   \
-    DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m,  \
-                          cnst2_m, cnst3_m, in7, in0, in4, in3);             \
-                                                                             \
-    SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m);                          \
-    cnst2_m = -cnst0_m;                                                      \
-    ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m);       \
-    SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m);                          \
-    cnst4_m = -cnst2_m;                                                      \
-    ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m);       \
-                                                                             \
-    ILVRL_H2_SH(in2, in5, vec1_m, vec0_m);                                   \
-    ILVRL_H2_SH(in6, in1, vec3_m, vec2_m);                                   \
-                                                                             \
-    DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m,  \
-                          cnst2_m, cnst3_m, in5, in2, in6, in1);             \
-    BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5);                   \
-    out7 = -s0_m;                                                            \
-    out0 = s1_m;                                                             \
-                                                                             \
-    SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m);  \
-                                                                             \
-    ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m);       \
-    cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m);                               \
-    cnst1_m = cnst0_m;                                                       \
-                                                                             \
-    ILVRL_H2_SH(in4, in3, vec1_m, vec0_m);                                   \
-    ILVRL_H2_SH(in6, in1, vec3_m, vec2_m);                                   \
-    DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst2_m,  \
-                          cnst3_m, cnst1_m, out1, out6, s0_m, s1_m);         \
-                                                                             \
-    SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m);                          \
-    cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                               \
-                                                                             \
-    ILVRL_H2_SH(in2, in5, vec1_m, vec0_m);                                   \
-    ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m);                                 \
-    out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);                   \
-    out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m);                   \
-    out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m);                   \
-    out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m);                   \
-                                                                             \
-    out1 = -out1;                                                            \
-    out3 = -out3;                                                            \
-    out5 = -out5;                                                            \
-  }
-
-#define AOM_SET_COSPI_PAIR(c0_h, c1_h)  \
-  ({                                    \
-    v8i16 out0_m, r0_m, r1_m;           \
-                                        \
-    r0_m = __msa_fill_h(c0_h);          \
-    r1_m = __msa_fill_h(c1_h);          \
-    out0_m = __msa_ilvev_h(r1_m, r0_m); \
-                                        \
-    out0_m;                             \
-  })
-
-#define AOM_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3)               \
-  {                                                                            \
-    uint8_t *dst_m = (uint8_t *)(dst);                                         \
-    v16u8 dst0_m, dst1_m, dst2_m, dst3_m;                                      \
-    v16i8 tmp0_m, tmp1_m;                                                      \
-    v16i8 zero_m = { 0 };                                                      \
-    v8i16 res0_m, res1_m, res2_m, res3_m;                                      \
-                                                                               \
-    LD_UB4(dst_m, dst_stride, dst0_m, dst1_m, dst2_m, dst3_m);                 \
-    ILVR_B4_SH(zero_m, dst0_m, zero_m, dst1_m, zero_m, dst2_m, zero_m, dst3_m, \
-               res0_m, res1_m, res2_m, res3_m);                                \
-    ADD4(res0_m, in0, res1_m, in1, res2_m, in2, res3_m, in3, res0_m, res1_m,   \
-         res2_m, res3_m);                                                      \
-    CLIP_SH4_0_255(res0_m, res1_m, res2_m, res3_m);                            \
-    PCKEV_B2_SB(res1_m, res0_m, res3_m, res2_m, tmp0_m, tmp1_m);               \
-    ST8x4_UB(tmp0_m, tmp1_m, dst_m, dst_stride);                               \
-  }
-
-#define AOM_IDCT4x4(in0, in1, in2, in3, out0, out1, out2, out3)             \
-  {                                                                         \
-    v8i16 c0_m, c1_m, c2_m, c3_m;                                           \
-    v8i16 step0_m, step1_m;                                                 \
-    v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                   \
-                                                                            \
-    c0_m = AOM_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);                    \
-    c1_m = AOM_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);                   \
-    step0_m = __msa_ilvr_h(in2, in0);                                       \
-    DOTP_SH2_SW(step0_m, step0_m, c0_m, c1_m, tmp0_m, tmp1_m);              \
-                                                                            \
-    c2_m = AOM_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);                    \
-    c3_m = AOM_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);                     \
-    step1_m = __msa_ilvr_h(in3, in1);                                       \
-    DOTP_SH2_SW(step1_m, step1_m, c2_m, c3_m, tmp2_m, tmp3_m);              \
-    SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS);            \
-                                                                            \
-    PCKEV_H2_SW(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp0_m, tmp2_m);            \
-    SLDI_B2_0_SW(tmp0_m, tmp2_m, tmp1_m, tmp3_m, 8);                        \
-    BUTTERFLY_4((v8i16)tmp0_m, (v8i16)tmp1_m, (v8i16)tmp2_m, (v8i16)tmp3_m, \
-                out0, out1, out2, out3);                                    \
-  }
-
-#define AOM_IADST4x4(in0, in1, in2, in3, out0, out1, out2, out3)       \
-  {                                                                    \
-    v8i16 res0_m, res1_m, c0_m, c1_m;                                  \
-    v8i16 k1_m, k2_m, k3_m, k4_m;                                      \
-    v8i16 zero_m = { 0 };                                              \
-    v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                              \
-    v4i32 int0_m, int1_m, int2_m, int3_m;                              \
-    v8i16 mask_m = { sinpi_1_9,  sinpi_2_9,  sinpi_3_9,  sinpi_4_9,    \
-                     -sinpi_1_9, -sinpi_2_9, -sinpi_3_9, -sinpi_4_9 }; \
-                                                                       \
-    SPLATI_H4_SH(mask_m, 3, 0, 1, 2, c0_m, c1_m, k1_m, k2_m);          \
-    ILVEV_H2_SH(c0_m, c1_m, k1_m, k2_m, c0_m, c1_m);                   \
-    ILVR_H2_SH(in0, in2, in1, in3, res0_m, res1_m);                    \
-    DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp2_m, tmp1_m);           \
-    int0_m = tmp2_m + tmp1_m;                                          \
-                                                                       \
-    SPLATI_H2_SH(mask_m, 4, 7, k4_m, k3_m);                            \
-    ILVEV_H2_SH(k4_m, k1_m, k3_m, k2_m, c0_m, c1_m);                   \
-    DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp0_m, tmp1_m);           \
-    int1_m = tmp0_m + tmp1_m;                                          \
-                                                                       \
-    c0_m = __msa_splati_h(mask_m, 6);                                  \
-    ILVL_H2_SH(k2_m, c0_m, zero_m, k2_m, c0_m, c1_m);                  \
-    ILVR_H2_SH(in0, in2, in1, in3, res0_m, res1_m);                    \
-    DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp0_m, tmp1_m);           \
-    int2_m = tmp0_m + tmp1_m;                                          \
-                                                                       \
-    c0_m = __msa_splati_h(mask_m, 6);                                  \
-    c0_m = __msa_ilvev_h(c0_m, k1_m);                                  \
-                                                                       \
-    res0_m = __msa_ilvr_h((in1), (in3));                               \
-    tmp0_m = __msa_dotp_s_w(res0_m, c0_m);                             \
-    int3_m = tmp2_m + tmp0_m;                                          \
-                                                                       \
-    res0_m = __msa_ilvr_h((in2), (in3));                               \
-    c1_m = __msa_ilvev_h(k4_m, k3_m);                                  \
-                                                                       \
-    tmp2_m = __msa_dotp_s_w(res0_m, c1_m);                             \
-    res1_m = __msa_ilvr_h((in0), (in2));                               \
-    c1_m = __msa_ilvev_h(k1_m, zero_m);                                \
-                                                                       \
-    tmp3_m = __msa_dotp_s_w(res1_m, c1_m);                             \
-    int3_m += tmp2_m;                                                  \
-    int3_m += tmp3_m;                                                  \
-                                                                       \
-    SRARI_W4_SW(int0_m, int1_m, int2_m, int3_m, DCT_CONST_BITS);       \
-    PCKEV_H2_SH(int0_m, int0_m, int1_m, int1_m, out0, out1);           \
-    PCKEV_H2_SH(int2_m, int2_m, int3_m, int3_m, out2, out3);           \
-  }
-
-#define AV1_SET_CONST_PAIR(mask_h, idx1_h, idx2_h)    \
-  ({                                                  \
-    v8i16 c0_m, c1_m;                                 \
-                                                      \
-    SPLATI_H2_SH(mask_h, idx1_h, idx2_h, c0_m, c1_m); \
-    c0_m = __msa_ilvev_h(c1_m, c0_m);                 \
-                                                      \
-    c0_m;                                             \
-  })
-
-/* multiply and add macro */
-#define AV1_MADD(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3, out0, out1,  \
-                 out2, out3)                                                  \
-  {                                                                           \
-    v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m;                         \
-    v4i32 tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd;                         \
-                                                                              \
-    ILVRL_H2_SH(inp1, inp0, madd_s1_m, madd_s0_m);                            \
-    ILVRL_H2_SH(inp3, inp2, madd_s3_m, madd_s2_m);                            \
-    DOTP_SH4_SW(madd_s1_m, madd_s0_m, madd_s1_m, madd_s0_m, cst0, cst0, cst1, \
-                cst1, tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd);            \
-    SRARI_W4_SW(tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd, DCT_CONST_BITS);  \
-    PCKEV_H2_SH(tmp1_madd, tmp0_madd, tmp3_madd, tmp2_madd, out0, out1);      \
-    DOTP_SH4_SW(madd_s3_m, madd_s2_m, madd_s3_m, madd_s2_m, cst2, cst2, cst3, \
-                cst3, tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd);            \
-    SRARI_W4_SW(tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd, DCT_CONST_BITS);  \
-    PCKEV_H2_SH(tmp1_madd, tmp0_madd, tmp3_madd, tmp2_madd, out2, out3);      \
-  }
-
-/* idct 8x8 macro */
-#define AOM_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1,    \
-                       out2, out3, out4, out5, out6, out7)                    \
-  {                                                                           \
-    v8i16 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m;             \
-    v8i16 k0_m, k1_m, k2_m, k3_m, res0_m, res1_m, res2_m, res3_m;             \
-    v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                     \
-    v8i16 mask_m = { cospi_28_64, cospi_4_64,  cospi_20_64,  cospi_12_64,     \
-                     cospi_16_64, -cospi_4_64, -cospi_20_64, -cospi_16_64 };  \
-                                                                              \
-    k0_m = AV1_SET_CONST_PAIR(mask_m, 0, 5);                                  \
-    k1_m = AV1_SET_CONST_PAIR(mask_m, 1, 0);                                  \
-    k2_m = AV1_SET_CONST_PAIR(mask_m, 6, 3);                                  \
-    k3_m = AV1_SET_CONST_PAIR(mask_m, 3, 2);                                  \
-    AV1_MADD(in1, in7, in3, in5, k0_m, k1_m, k2_m, k3_m, in1, in7, in3, in5); \
-    SUB2(in1, in3, in7, in5, res0_m, res1_m);                                 \
-    k0_m = AV1_SET_CONST_PAIR(mask_m, 4, 7);                                  \
-    k1_m = __msa_splati_h(mask_m, 4);                                         \
-                                                                              \
-    ILVRL_H2_SH(res0_m, res1_m, res2_m, res3_m);                              \
-    DOTP_SH4_SW(res2_m, res3_m, res2_m, res3_m, k0_m, k0_m, k1_m, k1_m,       \
-                tmp0_m, tmp1_m, tmp2_m, tmp3_m);                              \
-    SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS);              \
-    tp4_m = in1 + in3;                                                        \
-    PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tp5_m, tp6_m);                \
-    tp7_m = in7 + in5;                                                        \
-    k2_m = AOM_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);                      \
-    k3_m = AOM_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);                       \
-    AV1_MADD(in0, in4, in2, in6, k1_m, k0_m, k2_m, k3_m, in0, in4, in2, in6); \
-    BUTTERFLY_4(in0, in4, in2, in6, tp0_m, tp1_m, tp2_m, tp3_m);              \
-    BUTTERFLY_8(tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m, out0, \
-                out1, out2, out3, out4, out5, out6, out7);                    \
-  }
-
-#define AV1_IADST8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1,   \
-                        out2, out3, out4, out5, out6, out7)                   \
-  {                                                                           \
-    v4i32 r0_m, r1_m, r2_m, r3_m, r4_m, r5_m, r6_m, r7_m;                     \
-    v4i32 m0_m, m1_m, m2_m, m3_m, t0_m, t1_m;                                 \
-    v8i16 res0_m, res1_m, res2_m, res3_m, k0_m, k1_m, in_s0, in_s1;           \
-    v8i16 mask1_m = { cospi_2_64,  cospi_30_64,  -cospi_2_64, cospi_10_64,    \
-                      cospi_22_64, -cospi_10_64, cospi_18_64, cospi_14_64 };  \
-    v8i16 mask2_m = { cospi_14_64,  -cospi_18_64, cospi_26_64, cospi_6_64,    \
-                      -cospi_26_64, cospi_8_64,   cospi_24_64, -cospi_8_64 }; \
-    v8i16 mask3_m = {                                                         \
-      -cospi_24_64, cospi_8_64, cospi_16_64, -cospi_16_64, 0, 0, 0, 0         \
-    };                                                                        \
-                                                                              \
-    k0_m = AV1_SET_CONST_PAIR(mask1_m, 0, 1);                                 \
-    k1_m = AV1_SET_CONST_PAIR(mask1_m, 1, 2);                                 \
-    ILVRL_H2_SH(in1, in0, in_s1, in_s0);                                      \
-    DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r0_m,     \
-                r1_m, r2_m, r3_m);                                            \
-    k0_m = AV1_SET_CONST_PAIR(mask1_m, 6, 7);                                 \
-    k1_m = AV1_SET_CONST_PAIR(mask2_m, 0, 1);                                 \
-    ILVRL_H2_SH(in5, in4, in_s1, in_s0);                                      \
-    DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r4_m,     \
-                r5_m, r6_m, r7_m);                                            \
-    ADD4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, m0_m, m1_m, m2_m,    \
-         m3_m);                                                               \
-    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                      \
-    PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, res0_m, res1_m);                      \
-    SUB4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, m0_m, m1_m, m2_m,    \
-         m3_m);                                                               \
-    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                      \
-    PCKEV_H2_SW(m1_m, m0_m, m3_m, m2_m, t0_m, t1_m);                          \
-    k0_m = AV1_SET_CONST_PAIR(mask1_m, 3, 4);                                 \
-    k1_m = AV1_SET_CONST_PAIR(mask1_m, 4, 5);                                 \
-    ILVRL_H2_SH(in3, in2, in_s1, in_s0);                                      \
-    DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r0_m,     \
-                r1_m, r2_m, r3_m);                                            \
-    k0_m = AV1_SET_CONST_PAIR(mask2_m, 2, 3);                                 \
-    k1_m = AV1_SET_CONST_PAIR(mask2_m, 3, 4);                                 \
-    ILVRL_H2_SH(in7, in6, in_s1, in_s0);                                      \
-    DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r4_m,     \
-                r5_m, r6_m, r7_m);                                            \
-    ADD4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, m0_m, m1_m, m2_m,    \
-         m3_m);                                                               \
-    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                      \
-    PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, res2_m, res3_m);                      \
-    SUB4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, m0_m, m1_m, m2_m,    \
-         m3_m);                                                               \
-    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                      \
-    PCKEV_H2_SW(m1_m, m0_m, m3_m, m2_m, r2_m, r3_m);                          \
-    ILVRL_H2_SW(r3_m, r2_m, m2_m, m3_m);                                      \
-    BUTTERFLY_4(res0_m, res1_m, res3_m, res2_m, out0, in7, in4, in3);         \
-    k0_m = AV1_SET_CONST_PAIR(mask2_m, 5, 6);                                 \
-    k1_m = AV1_SET_CONST_PAIR(mask2_m, 6, 7);                                 \
-    ILVRL_H2_SH(t1_m, t0_m, in_s1, in_s0);                                    \
-    DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r0_m,     \
-                r1_m, r2_m, r3_m);                                            \
-    k1_m = AV1_SET_CONST_PAIR(mask3_m, 0, 1);                                 \
-    DOTP_SH4_SW(m2_m, m3_m, m2_m, m3_m, k0_m, k0_m, k1_m, k1_m, r4_m, r5_m,   \
-                r6_m, r7_m);                                                  \
-    ADD4(r0_m, r6_m, r1_m, r7_m, r2_m, r4_m, r3_m, r5_m, m0_m, m1_m, m2_m,    \
-         m3_m);                                                               \
-    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                      \
-    PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in1, out6);                           \
-    SUB4(r0_m, r6_m, r1_m, r7_m, r2_m, r4_m, r3_m, r5_m, m0_m, m1_m, m2_m,    \
-         m3_m);                                                               \
-    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                      \
-    PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in2, in5);                            \
-    k0_m = AV1_SET_CONST_PAIR(mask3_m, 2, 2);                                 \
-    k1_m = AV1_SET_CONST_PAIR(mask3_m, 2, 3);                                 \
-    ILVRL_H2_SH(in4, in3, in_s1, in_s0);                                      \
-    DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, m0_m,     \
-                m1_m, m2_m, m3_m);                                            \
-    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                      \
-    PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in3, out4);                           \
-    ILVRL_H2_SW(in5, in2, m2_m, m3_m);                                        \
-    DOTP_SH4_SW(m2_m, m3_m, m2_m, m3_m, k0_m, k0_m, k1_m, k1_m, m0_m, m1_m,   \
-                m2_m, m3_m);                                                  \
-    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS);                      \
-    PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, out2, in5);                           \
-                                                                              \
-    out1 = -in1;                                                              \
-    out3 = -in3;                                                              \
-    out5 = -in5;                                                              \
-    out7 = -in7;                                                              \
-  }
-
-#define AOM_IADST8x16_1D(r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11,     \
-                         r12, r13, r14, r15, out0, out1, out2, out3, out4,     \
-                         out5, out6, out7, out8, out9, out10, out11, out12,    \
-                         out13, out14, out15)                                  \
-  {                                                                            \
-    v8i16 g0_m, g1_m, g2_m, g3_m, g4_m, g5_m, g6_m, g7_m;                      \
-    v8i16 g8_m, g9_m, g10_m, g11_m, g12_m, g13_m, g14_m, g15_m;                \
-    v8i16 h0_m, h1_m, h2_m, h3_m, h4_m, h5_m, h6_m, h7_m;                      \
-    v8i16 h8_m, h9_m, h10_m, h11_m;                                            \
-    v8i16 k0_m, k1_m, k2_m, k3_m;                                              \
-                                                                               \
-    /* stage 1 */                                                              \
-    k0_m = AOM_SET_COSPI_PAIR(cospi_1_64, cospi_31_64);                        \
-    k1_m = AOM_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64);                       \
-    k2_m = AOM_SET_COSPI_PAIR(cospi_17_64, cospi_15_64);                       \
-    k3_m = AOM_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64);                      \
-    MADD_BF(r15, r0, r7, r8, k0_m, k1_m, k2_m, k3_m, g0_m, g1_m, g2_m, g3_m);  \
-    k0_m = AOM_SET_COSPI_PAIR(cospi_5_64, cospi_27_64);                        \
-    k1_m = AOM_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64);                       \
-    k2_m = AOM_SET_COSPI_PAIR(cospi_21_64, cospi_11_64);                       \
-    k3_m = AOM_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64);                      \
-    MADD_BF(r13, r2, r5, r10, k0_m, k1_m, k2_m, k3_m, g4_m, g5_m, g6_m, g7_m); \
-    k0_m = AOM_SET_COSPI_PAIR(cospi_9_64, cospi_23_64);                        \
-    k1_m = AOM_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64);                       \
-    k2_m = AOM_SET_COSPI_PAIR(cospi_25_64, cospi_7_64);                        \
-    k3_m = AOM_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64);                       \
-    MADD_BF(r11, r4, r3, r12, k0_m, k1_m, k2_m, k3_m, g8_m, g9_m, g10_m,       \
-            g11_m);                                                            \
-    k0_m = AOM_SET_COSPI_PAIR(cospi_13_64, cospi_19_64);                       \
-    k1_m = AOM_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64);                      \
-    k2_m = AOM_SET_COSPI_PAIR(cospi_29_64, cospi_3_64);                        \
-    k3_m = AOM_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64);                       \
-    MADD_BF(r9, r6, r1, r14, k0_m, k1_m, k2_m, k3_m, g12_m, g13_m, g14_m,      \
-            g15_m);                                                            \
-                                                                               \
-    /* stage 2 */                                                              \
-    k0_m = AOM_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);                        \
-    k1_m = AOM_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);                       \
-    k2_m = AOM_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64);                       \
-    MADD_BF(g1_m, g3_m, g9_m, g11_m, k0_m, k1_m, k2_m, k0_m, h0_m, h1_m, h2_m, \
-            h3_m);                                                             \
-    k0_m = AOM_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);                       \
-    k1_m = AOM_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);                      \
-    k2_m = AOM_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64);                      \
-    MADD_BF(g7_m, g5_m, g15_m, g13_m, k0_m, k1_m, k2_m, k0_m, h4_m, h5_m,      \
-            h6_m, h7_m);                                                       \
-    BUTTERFLY_4(h0_m, h2_m, h6_m, h4_m, out8, out9, out11, out10);             \
-    BUTTERFLY_8(g0_m, g2_m, g4_m, g6_m, g14_m, g12_m, g10_m, g8_m, h8_m, h9_m, \
-                h10_m, h11_m, h6_m, h4_m, h2_m, h0_m);                         \
-                                                                               \
-    /* stage 3 */                                                              \
-    BUTTERFLY_4(h8_m, h9_m, h11_m, h10_m, out0, out1, h11_m, h10_m);           \
-    k0_m = AOM_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);                        \
-    k1_m = AOM_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);                       \
-    k2_m = AOM_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64);                       \
-    MADD_BF(h0_m, h2_m, h4_m, h6_m, k0_m, k1_m, k2_m, k0_m, out4, out6, out5,  \
-            out7);                                                             \
-    MADD_BF(h1_m, h3_m, h5_m, h7_m, k0_m, k1_m, k2_m, k0_m, out12, out14,      \
-            out13, out15);                                                     \
-                                                                               \
-    /* stage 4 */                                                              \
-    k0_m = AOM_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);                       \
-    k1_m = AOM_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64);                     \
-    k2_m = AOM_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);                      \
-    k3_m = AOM_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);                      \
-    MADD_SHORT(h10_m, h11_m, k1_m, k2_m, out2, out3);                          \
-    MADD_SHORT(out6, out7, k0_m, k3_m, out6, out7);                            \
-    MADD_SHORT(out10, out11, k0_m, k3_m, out10, out11);                        \
-    MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15);                        \
-  }
-
-void aom_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
-                                      int32_t dst_stride);
-void aom_idct16_1d_rows_msa(const int16_t *input, int16_t *output);
-void aom_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
-                                       int32_t dst_stride);
-void aom_iadst16_1d_rows_msa(const int16_t *input, int16_t *output);
-#endif  // AOM_DSP_MIPS_INV_TXFM_MSA_H_
--- a/aom_dsp/mips/loopfilter_filters_dspr2.c
+++ b/aom_dsp/mips/loopfilter_filters_dspr2.c
@@ -1,327 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "./aom_dsp_rtcd.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/mips/common_dspr2.h"
-#include "aom_dsp/mips/loopfilter_filters_dspr2.h"
-#include "aom_dsp/mips/loopfilter_macros_dspr2.h"
-#include "aom_dsp/mips/loopfilter_masks_dspr2.h"
-#include "aom_mem/aom_mem.h"
-
-#if HAVE_DSPR2
-void aom_lpf_horizontal_4_dspr2(unsigned char *s, int pitch,
-                                const uint8_t *blimit, const uint8_t *limit,
-                                const uint8_t *thresh) {
-  uint8_t i;
-  uint32_t mask;
-  uint32_t hev;
-  uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
-  uint8_t *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6;
-  uint32_t thresh_vec, flimit_vec, limit_vec;
-  uint32_t uflimit, ulimit, uthresh;
-
-  uflimit = *blimit;
-  ulimit = *limit;
-  uthresh = *thresh;
-
-  /* create quad-byte */
-  __asm__ __volatile__(
-      "replv.qb       %[thresh_vec],    %[uthresh]    \n\t"
-      "replv.qb       %[flimit_vec],    %[uflimit]    \n\t"
-      "replv.qb       %[limit_vec],     %[ulimit]     \n\t"
-
-      : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
-        [limit_vec] "=r"(limit_vec)
-      : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
-  /* prefetch data for store */
-  prefetch_store(s);
-
-  /* loop filter designed to work using chars so that we can make maximum use
-     of 8 bit simd instructions. */
-  for (i = 0; i < 2; i++) {
-    sm1 = s - (pitch << 2);
-    s0 = sm1 + pitch;
-    s1 = s0 + pitch;
-    s2 = s - pitch;
-    s3 = s;
-    s4 = s + pitch;
-    s5 = s4 + pitch;
-    s6 = s5 + pitch;
-
-    __asm__ __volatile__(
-        "lw     %[p1],  (%[s1])    \n\t"
-        "lw     %[p2],  (%[s2])    \n\t"
-        "lw     %[p3],  (%[s3])    \n\t"
-        "lw     %[p4],  (%[s4])    \n\t"
-
-        : [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4)
-        : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
-
-    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-       mask will be zero and filtering is not needed */
-    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
-      __asm__ __volatile__(
-          "lw       %[pm1], (%[sm1])   \n\t"
-          "lw       %[p0],  (%[s0])    \n\t"
-          "lw       %[p5],  (%[s5])    \n\t"
-          "lw       %[p6],  (%[s6])    \n\t"
-
-          : [pm1] "=&r"(pm1), [p0] "=&r"(p0), [p5] "=&r"(p5), [p6] "=&r"(p6)
-          : [sm1] "r"(sm1), [s0] "r"(s0), [s5] "r"(s5), [s6] "r"(s6));
-
-      filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, p0, p3, p4, p5,
-                            p6, thresh_vec, &hev, &mask);
-
-      /* if mask == 0 do filtering is not needed */
-      if (mask) {
-        /* filtering */
-        filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
-
-        __asm__ __volatile__(
-            "sw     %[p1],  (%[s1])    \n\t"
-            "sw     %[p2],  (%[s2])    \n\t"
-            "sw     %[p3],  (%[s3])    \n\t"
-            "sw     %[p4],  (%[s4])    \n\t"
-
-            :
-            : [p1] "r"(p1), [p2] "r"(p2), [p3] "r"(p3), [p4] "r"(p4),
-              [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
-      }
-    }
-
-    s = s + 4;
-  }
-}
-
-void aom_lpf_vertical_4_dspr2(unsigned char *s, int pitch,
-                              const uint8_t *blimit, const uint8_t *limit,
-                              const uint8_t *thresh) {
-  uint8_t i;
-  uint32_t mask, hev;
-  uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
-  uint8_t *s1, *s2, *s3, *s4;
-  uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
-  uint32_t thresh_vec, flimit_vec, limit_vec;
-  uint32_t uflimit, ulimit, uthresh;
-
-  uflimit = *blimit;
-  ulimit = *limit;
-  uthresh = *thresh;
-
-  /* create quad-byte */
-  __asm__ __volatile__(
-      "replv.qb       %[thresh_vec],    %[uthresh]    \n\t"
-      "replv.qb       %[flimit_vec],    %[uflimit]    \n\t"
-      "replv.qb       %[limit_vec],     %[ulimit]     \n\t"
-
-      : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
-        [limit_vec] "=r"(limit_vec)
-      : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
-  /* prefetch data for store */
-  prefetch_store(s + pitch);
-
-  for (i = 0; i < 2; i++) {
-    s1 = s;
-    s2 = s + pitch;
-    s3 = s2 + pitch;
-    s4 = s3 + pitch;
-    s = s4 + pitch;
-
-    /* load quad-byte vectors
-     * memory is 4 byte aligned
-     */
-    p2 = *((uint32_t *)(s1 - 4));
-    p6 = *((uint32_t *)(s1));
-    p1 = *((uint32_t *)(s2 - 4));
-    p5 = *((uint32_t *)(s2));
-    p0 = *((uint32_t *)(s3 - 4));
-    p4 = *((uint32_t *)(s3));
-    pm1 = *((uint32_t *)(s4 - 4));
-    p3 = *((uint32_t *)(s4));
-
-    /* transpose pm1, p0, p1, p2 */
-    __asm__ __volatile__(
-        "precrq.qb.ph   %[prim1],   %[p2],      %[p1]       \n\t"
-        "precr.qb.ph    %[prim2],   %[p2],      %[p1]       \n\t"
-        "precrq.qb.ph   %[prim3],   %[p0],      %[pm1]      \n\t"
-        "precr.qb.ph    %[prim4],   %[p0],      %[pm1]      \n\t"
-
-        "precrq.qb.ph   %[p1],      %[prim1],   %[prim2]    \n\t"
-        "precr.qb.ph    %[pm1],     %[prim1],   %[prim2]    \n\t"
-        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-
-        "precrq.ph.w    %[p2],      %[p1],      %[sec3]     \n\t"
-        "precrq.ph.w    %[p0],      %[pm1],     %[sec4]     \n\t"
-        "append         %[p1],      %[sec3],    16          \n\t"
-        "append         %[pm1],     %[sec4],    16          \n\t"
-
-        : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
-          [prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0),
-          [pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
-        :);
-
-    /* transpose p3, p4, p5, p6 */
-    __asm__ __volatile__(
-        "precrq.qb.ph   %[prim1],   %[p6],      %[p5]       \n\t"
-        "precr.qb.ph    %[prim2],   %[p6],      %[p5]       \n\t"
-        "precrq.qb.ph   %[prim3],   %[p4],      %[p3]       \n\t"
-        "precr.qb.ph    %[prim4],   %[p4],      %[p3]       \n\t"
-
-        "precrq.qb.ph   %[p5],      %[prim1],   %[prim2]    \n\t"
-        "precr.qb.ph    %[p3],      %[prim1],   %[prim2]    \n\t"
-        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-
-        "precrq.ph.w    %[p6],      %[p5],      %[sec3]     \n\t"
-        "precrq.ph.w    %[p4],      %[p3],      %[sec4]     \n\t"
-        "append         %[p5],      %[sec3],    16          \n\t"
-        "append         %[p3],      %[sec4],    16          \n\t"
-
-        : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
-          [prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4),
-          [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
-        :);
-
-    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-     * mask will be zero and filtering is not needed
-     */
-    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
-      filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, p0, p3, p4, p5,
-                            p6, thresh_vec, &hev, &mask);
-
-      /* if mask == 0 do filtering is not needed */
-      if (mask) {
-        /* filtering */
-        filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
-
-        /* unpack processed 4x4 neighborhood
-         * don't use transpose on output data
-         * because memory isn't aligned
-         */
-        __asm__ __volatile__(
-            "sb     %[p4],   1(%[s4])    \n\t"
-            "sb     %[p3],   0(%[s4])    \n\t"
-            "sb     %[p2],  -1(%[s4])    \n\t"
-            "sb     %[p1],  -2(%[s4])    \n\t"
-
-            :
-            : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
-              [s4] "r"(s4));
-
-        __asm__ __volatile__(
-            "srl    %[p4],  %[p4],  8     \n\t"
-            "srl    %[p3],  %[p3],  8     \n\t"
-            "srl    %[p2],  %[p2],  8     \n\t"
-            "srl    %[p1],  %[p1],  8     \n\t"
-
-            : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1)
-            :);
-
-        __asm__ __volatile__(
-            "sb     %[p4],   1(%[s3])    \n\t"
-            "sb     %[p3],   0(%[s3])    \n\t"
-            "sb     %[p2],  -1(%[s3])    \n\t"
-            "sb     %[p1],  -2(%[s3])    \n\t"
-
-            : [p1] "+r"(p1)
-            : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [s3] "r"(s3));
-
-        __asm__ __volatile__(
-            "srl    %[p4],  %[p4],  8     \n\t"
-            "srl    %[p3],  %[p3],  8     \n\t"
-            "srl    %[p2],  %[p2],  8     \n\t"
-            "srl    %[p1],  %[p1],  8     \n\t"
-
-            : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1)
-            :);
-
-        __asm__ __volatile__(
-            "sb     %[p4],   1(%[s2])    \n\t"
-            "sb     %[p3],   0(%[s2])    \n\t"
-            "sb     %[p2],  -1(%[s2])    \n\t"
-            "sb     %[p1],  -2(%[s2])    \n\t"
-
-            :
-            : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
-              [s2] "r"(s2));
-
-        __asm__ __volatile__(
-            "srl    %[p4],  %[p4],  8     \n\t"
-            "srl    %[p3],  %[p3],  8     \n\t"
-            "srl    %[p2],  %[p2],  8     \n\t"
-            "srl    %[p1],  %[p1],  8     \n\t"
-
-            : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1)
-            :);
-
-        __asm__ __volatile__(
-            "sb     %[p4],   1(%[s1])    \n\t"
-            "sb     %[p3],   0(%[s1])    \n\t"
-            "sb     %[p2],  -1(%[s1])    \n\t"
-            "sb     %[p1],  -2(%[s1])    \n\t"
-
-            :
-            : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
-              [s1] "r"(s1));
-      }
-    }
-  }
-}
-
-void aom_lpf_horizontal_4_dual_dspr2(
-    uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
-    const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
-    const uint8_t *limit1, const uint8_t *thresh1) {
-  aom_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0);
-  aom_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_horizontal_8_dual_dspr2(
-    uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
-    const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
-    const uint8_t *limit1, const uint8_t *thresh1) {
-  aom_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0);
-  aom_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0,
-                                   const uint8_t *limit0,
-                                   const uint8_t *thresh0,
-                                   const uint8_t *blimit1,
-                                   const uint8_t *limit1,
-                                   const uint8_t *thresh1) {
-  aom_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0);
-  aom_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0,
-                                   const uint8_t *limit0,
-                                   const uint8_t *thresh0,
-                                   const uint8_t *blimit1,
-                                   const uint8_t *limit1,
-                                   const uint8_t *thresh1) {
-  aom_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0);
-  aom_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_16_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit,
-                                    const uint8_t *limit,
-                                    const uint8_t *thresh) {
-  aom_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
-  aom_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
-}
-#endif  // #if HAVE_DSPR2
--- a/aom_dsp/mips/loopfilter_macros_dspr2.h
+++ b/aom_dsp/mips/loopfilter_macros_dspr2.h
@@ -1,436 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
-#define AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
-
-#include <stdlib.h>
-
-#include "./aom_dsp_rtcd.h"
-#include "aom/aom_integer.h"
-#include "aom_mem/aom_mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_DSPR2
-#define STORE_F0()                                                       \
-  {                                                                      \
-    __asm__ __volatile__(                                                \
-        "sb     %[q1_f0],    1(%[s4])           \n\t"                    \
-        "sb     %[q0_f0],    0(%[s4])           \n\t"                    \
-        "sb     %[p0_f0],   -1(%[s4])           \n\t"                    \
-        "sb     %[p1_f0],   -2(%[s4])           \n\t"                    \
-                                                                         \
-        :                                                                \
-        : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
-          [p1_f0] "r"(p1_f0), [s4] "r"(s4));                             \
-                                                                         \
-    __asm__ __volatile__(                                                \
-        "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
-        "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
-        "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
-        "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
-                                                                         \
-        : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
-          [p1_f0] "+r"(p1_f0)                                            \
-        :);                                                              \
-                                                                         \
-    __asm__ __volatile__(                                                \
-        "sb     %[q1_f0],    1(%[s3])           \n\t"                    \
-        "sb     %[q0_f0],    0(%[s3])           \n\t"                    \
-        "sb     %[p0_f0],   -1(%[s3])           \n\t"                    \
-        "sb     %[p1_f0],   -2(%[s3])           \n\t"                    \
-                                                                         \
-        : [p1_f0] "+r"(p1_f0)                                            \
-        : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [s3] "r"(s3),          \
-          [p0_f0] "r"(p0_f0));                                           \
-                                                                         \
-    __asm__ __volatile__(                                                \
-        "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
-        "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
-        "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
-        "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
-                                                                         \
-        : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
-          [p1_f0] "+r"(p1_f0)                                            \
-        :);                                                              \
-                                                                         \
-    __asm__ __volatile__(                                                \
-        "sb     %[q1_f0],    1(%[s2])           \n\t"                    \
-        "sb     %[q0_f0],    0(%[s2])           \n\t"                    \
-        "sb     %[p0_f0],   -1(%[s2])           \n\t"                    \
-        "sb     %[p1_f0],   -2(%[s2])           \n\t"                    \
-                                                                         \
-        :                                                                \
-        : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
-          [p1_f0] "r"(p1_f0), [s2] "r"(s2));                             \
-                                                                         \
-    __asm__ __volatile__(                                                \
-        "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
-        "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
-        "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
-        "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
-                                                                         \
-        : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
-          [p1_f0] "+r"(p1_f0)                                            \
-        :);                                                              \
-                                                                         \
-    __asm__ __volatile__(                                                \
-        "sb     %[q1_f0],    1(%[s1])           \n\t"                    \
-        "sb     %[q0_f0],    0(%[s1])           \n\t"                    \
-        "sb     %[p0_f0],   -1(%[s1])           \n\t"                    \
-        "sb     %[p1_f0],   -2(%[s1])           \n\t"                    \
-                                                                         \
-        :                                                                \
-        : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
-          [p1_f0] "r"(p1_f0), [s1] "r"(s1));                             \
-  }
-
-#define STORE_F1()                                                             \
-  {                                                                            \
-    __asm__ __volatile__(                                                      \
-        "sb     %[q2_r],     2(%[s4])           \n\t"                          \
-        "sb     %[q1_r],     1(%[s4])           \n\t"                          \
-        "sb     %[q0_r],     0(%[s4])           \n\t"                          \
-        "sb     %[p0_r],    -1(%[s4])           \n\t"                          \
-        "sb     %[p1_r],    -2(%[s4])           \n\t"                          \
-        "sb     %[p2_r],    -3(%[s4])           \n\t"                          \
-                                                                               \
-        :                                                                      \
-        : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r),                \
-          [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s4] "r"(s4)); \
-                                                                               \
-    __asm__ __volatile__(                                                      \
-        "srl    %[q2_r],    %[q2_r],    16      \n\t"                          \
-        "srl    %[q1_r],    %[q1_r],    16      \n\t"                          \
-        "srl    %[q0_r],    %[q0_r],    16      \n\t"                          \
-        "srl    %[p0_r],    %[p0_r],    16      \n\t"                          \
-        "srl    %[p1_r],    %[p1_r],    16      \n\t"                          \
-        "srl    %[p2_r],    %[p2_r],    16      \n\t"                          \
-                                                                               \
-        : [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), [q0_r] "+r"(q0_r),             \
-          [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), [p2_r] "+r"(p2_r)              \
-        :);                                                                    \
-                                                                               \
-    __asm__ __volatile__(                                                      \
-        "sb     %[q2_r],     2(%[s3])           \n\t"                          \
-        "sb     %[q1_r],     1(%[s3])           \n\t"                          \
-        "sb     %[q0_r],     0(%[s3])           \n\t"                          \
-        "sb     %[p0_r],    -1(%[s3])           \n\t"                          \
-        "sb     %[p1_r],    -2(%[s3])           \n\t"                          \
-        "sb     %[p2_r],    -3(%[s3])           \n\t"                          \
-                                                                               \
-        :                                                                      \
-        : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r),                \
-          [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s3] "r"(s3)); \
-                                                                               \
-    __asm__ __volatile__(                                                      \
-        "sb     %[q2_l],     2(%[s2])           \n\t"                          \
-        "sb     %[q1_l],     1(%[s2])           \n\t"                          \
-        "sb     %[q0_l],     0(%[s2])           \n\t"                          \
-        "sb     %[p0_l],    -1(%[s2])           \n\t"                          \
-        "sb     %[p1_l],    -2(%[s2])           \n\t"                          \
-        "sb     %[p2_l],    -3(%[s2])           \n\t"                          \
-                                                                               \
-        :                                                                      \
-        : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l),                \
-          [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s2] "r"(s2)); \
-                                                                               \
-    __asm__ __volatile__(                                                      \
-        "srl    %[q2_l],    %[q2_l],    16      \n\t"                          \
-        "srl    %[q1_l],    %[q1_l],    16      \n\t"                          \
-        "srl    %[q0_l],    %[q0_l],    16      \n\t"                          \
-        "srl    %[p0_l],    %[p0_l],    16      \n\t"                          \
-        "srl    %[p1_l],    %[p1_l],    16      \n\t"                          \
-        "srl    %[p2_l],    %[p2_l],    16      \n\t"                          \
-                                                                               \
-        : [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), [q0_l] "+r"(q0_l),             \
-          [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), [p2_l] "+r"(p2_l)              \
-        :);                                                                    \
-                                                                               \
-    __asm__ __volatile__(                                                      \
-        "sb     %[q2_l],     2(%[s1])           \n\t"                          \
-        "sb     %[q1_l],     1(%[s1])           \n\t"                          \
-        "sb     %[q0_l],     0(%[s1])           \n\t"                          \
-        "sb     %[p0_l],    -1(%[s1])           \n\t"                          \
-        "sb     %[p1_l],    -2(%[s1])           \n\t"                          \
-        "sb     %[p2_l],    -3(%[s1])           \n\t"                          \
-                                                                               \
-        :                                                                      \
-        : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l),                \
-          [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s1] "r"(s1)); \
-  }
-
-#define STORE_F2()                                                 \
-  {                                                                \
-    __asm__ __volatile__(                                          \
-        "sb     %[q6_r],     6(%[s4])           \n\t"              \
-        "sb     %[q5_r],     5(%[s4])           \n\t"              \
-        "sb     %[q4_r],     4(%[s4])           \n\t"              \
-        "sb     %[q3_r],     3(%[s4])           \n\t"              \
-        "sb     %[q2_r],     2(%[s4])           \n\t"              \
-        "sb     %[q1_r],     1(%[s4])           \n\t"              \
-        "sb     %[q0_r],     0(%[s4])           \n\t"              \
-        "sb     %[p0_r],    -1(%[s4])           \n\t"              \
-        "sb     %[p1_r],    -2(%[s4])           \n\t"              \
-        "sb     %[p2_r],    -3(%[s4])           \n\t"              \
-        "sb     %[p3_r],    -4(%[s4])           \n\t"              \
-        "sb     %[p4_r],    -5(%[s4])           \n\t"              \
-        "sb     %[p5_r],    -6(%[s4])           \n\t"              \
-        "sb     %[p6_r],    -7(%[s4])           \n\t"              \
-                                                                   \
-        :                                                          \
-        : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r),    \
-          [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r),    \
-          [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r),    \
-          [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r),    \
-          [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s4] "r"(s4));       \
-                                                                   \
-    __asm__ __volatile__(                                          \
-        "srl    %[q6_r],    %[q6_r],    16      \n\t"              \
-        "srl    %[q5_r],    %[q5_r],    16      \n\t"              \
-        "srl    %[q4_r],    %[q4_r],    16      \n\t"              \
-        "srl    %[q3_r],    %[q3_r],    16      \n\t"              \
-        "srl    %[q2_r],    %[q2_r],    16      \n\t"              \
-        "srl    %[q1_r],    %[q1_r],    16      \n\t"              \
-        "srl    %[q0_r],    %[q0_r],    16      \n\t"              \
-        "srl    %[p0_r],    %[p0_r],    16      \n\t"              \
-        "srl    %[p1_r],    %[p1_r],    16      \n\t"              \
-        "srl    %[p2_r],    %[p2_r],    16      \n\t"              \
-        "srl    %[p3_r],    %[p3_r],    16      \n\t"              \
-        "srl    %[p4_r],    %[p4_r],    16      \n\t"              \
-        "srl    %[p5_r],    %[p5_r],    16      \n\t"              \
-        "srl    %[p6_r],    %[p6_r],    16      \n\t"              \
-                                                                   \
-        : [q6_r] "+r"(q6_r), [q5_r] "+r"(q5_r), [q4_r] "+r"(q4_r), \
-          [q3_r] "+r"(q3_r), [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), \
-          [q0_r] "+r"(q0_r), [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), \
-          [p2_r] "+r"(p2_r), [p3_r] "+r"(p3_r), [p4_r] "+r"(p4_r), \
-          [p5_r] "+r"(p5_r), [p6_r] "+r"(p6_r)                     \
-        :);                                                        \
-                                                                   \
-    __asm__ __volatile__(                                          \
-        "sb     %[q6_r],     6(%[s3])           \n\t"              \
-        "sb     %[q5_r],     5(%[s3])           \n\t"              \
-        "sb     %[q4_r],     4(%[s3])           \n\t"              \
-        "sb     %[q3_r],     3(%[s3])           \n\t"              \
-        "sb     %[q2_r],     2(%[s3])           \n\t"              \
-        "sb     %[q1_r],     1(%[s3])           \n\t"              \
-        "sb     %[q0_r],     0(%[s3])           \n\t"              \
-        "sb     %[p0_r],    -1(%[s3])           \n\t"              \
-        "sb     %[p1_r],    -2(%[s3])           \n\t"              \
-        "sb     %[p2_r],    -3(%[s3])           \n\t"              \
-        "sb     %[p3_r],    -4(%[s3])           \n\t"              \
-        "sb     %[p4_r],    -5(%[s3])           \n\t"              \
-        "sb     %[p5_r],    -6(%[s3])           \n\t"              \
-        "sb     %[p6_r],    -7(%[s3])           \n\t"              \
-                                                                   \
-        :                                                          \
-        : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r),    \
-          [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r),    \
-          [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r),    \
-          [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r),    \
-          [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s3] "r"(s3));       \
-                                                                   \
-    __asm__ __volatile__(                                          \
-        "sb     %[q6_l],     6(%[s2])           \n\t"              \
-        "sb     %[q5_l],     5(%[s2])           \n\t"              \
-        "sb     %[q4_l],     4(%[s2])           \n\t"              \
-        "sb     %[q3_l],     3(%[s2])           \n\t"              \
-        "sb     %[q2_l],     2(%[s2])           \n\t"              \
-        "sb     %[q1_l],     1(%[s2])           \n\t"              \
-        "sb     %[q0_l],     0(%[s2])           \n\t"              \
-        "sb     %[p0_l],    -1(%[s2])           \n\t"              \
-        "sb     %[p1_l],    -2(%[s2])           \n\t"              \
-        "sb     %[p2_l],    -3(%[s2])           \n\t"              \
-        "sb     %[p3_l],    -4(%[s2])           \n\t"              \
-        "sb     %[p4_l],    -5(%[s2])           \n\t"              \
-        "sb     %[p5_l],    -6(%[s2])           \n\t"              \
-        "sb     %[p6_l],    -7(%[s2])           \n\t"              \
-                                                                   \
-        :                                                          \
-        : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l),    \
-          [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l),    \
-          [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l),    \
-          [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l),    \
-          [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s2] "r"(s2));       \
-                                                                   \
-    __asm__ __volatile__(                                          \
-        "srl    %[q6_l],    %[q6_l],    16     \n\t"               \
-        "srl    %[q5_l],    %[q5_l],    16     \n\t"               \
-        "srl    %[q4_l],    %[q4_l],    16     \n\t"               \
-        "srl    %[q3_l],    %[q3_l],    16     \n\t"               \
-        "srl    %[q2_l],    %[q2_l],    16     \n\t"               \
-        "srl    %[q1_l],    %[q1_l],    16     \n\t"               \
-        "srl    %[q0_l],    %[q0_l],    16     \n\t"               \
-        "srl    %[p0_l],    %[p0_l],    16     \n\t"               \
-        "srl    %[p1_l],    %[p1_l],    16     \n\t"               \
-        "srl    %[p2_l],    %[p2_l],    16     \n\t"               \
-        "srl    %[p3_l],    %[p3_l],    16     \n\t"               \
-        "srl    %[p4_l],    %[p4_l],    16     \n\t"               \
-        "srl    %[p5_l],    %[p5_l],    16     \n\t"               \
-        "srl    %[p6_l],    %[p6_l],    16     \n\t"               \
-                                                                   \
-        : [q6_l] "+r"(q6_l), [q5_l] "+r"(q5_l), [q4_l] "+r"(q4_l), \
-          [q3_l] "+r"(q3_l), [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), \
-          [q0_l] "+r"(q0_l), [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), \
-          [p2_l] "+r"(p2_l), [p3_l] "+r"(p3_l), [p4_l] "+r"(p4_l), \
-          [p5_l] "+r"(p5_l), [p6_l] "+r"(p6_l)                     \
-        :);                                                        \
-                                                                   \
-    __asm__ __volatile__(                                          \
-        "sb     %[q6_l],     6(%[s1])           \n\t"              \
-        "sb     %[q5_l],     5(%[s1])           \n\t"              \
-        "sb     %[q4_l],     4(%[s1])           \n\t"              \
-        "sb     %[q3_l],     3(%[s1])           \n\t"              \
-        "sb     %[q2_l],     2(%[s1])           \n\t"              \
-        "sb     %[q1_l],     1(%[s1])           \n\t"              \
-        "sb     %[q0_l],     0(%[s1])           \n\t"              \
-        "sb     %[p0_l],    -1(%[s1])           \n\t"              \
-        "sb     %[p1_l],    -2(%[s1])           \n\t"              \
-        "sb     %[p2_l],    -3(%[s1])           \n\t"              \
-        "sb     %[p3_l],    -4(%[s1])           \n\t"              \
-        "sb     %[p4_l],    -5(%[s1])           \n\t"              \
-        "sb     %[p5_l],    -6(%[s1])           \n\t"              \
-        "sb     %[p6_l],    -7(%[s1])           \n\t"              \
-                                                                   \
-        :                                                          \
-        : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l),    \
-          [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l),    \
-          [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l),    \
-          [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l),    \
-          [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s1] "r"(s1));       \
-  }
-
-#define PACK_LEFT_0TO3()                                              \
-  {                                                                   \
-    __asm__ __volatile__(                                             \
-        "preceu.ph.qbl   %[p3_l],   %[p3]   \n\t"                     \
-        "preceu.ph.qbl   %[p2_l],   %[p2]   \n\t"                     \
-        "preceu.ph.qbl   %[p1_l],   %[p1]   \n\t"                     \
-        "preceu.ph.qbl   %[p0_l],   %[p0]   \n\t"                     \
-        "preceu.ph.qbl   %[q0_l],   %[q0]   \n\t"                     \
-        "preceu.ph.qbl   %[q1_l],   %[q1]   \n\t"                     \
-        "preceu.ph.qbl   %[q2_l],   %[q2]   \n\t"                     \
-        "preceu.ph.qbl   %[q3_l],   %[q3]   \n\t"                     \
-                                                                      \
-        : [p3_l] "=&r"(p3_l), [p2_l] "=&r"(p2_l), [p1_l] "=&r"(p1_l), \
-          [p0_l] "=&r"(p0_l), [q0_l] "=&r"(q0_l), [q1_l] "=&r"(q1_l), \
-          [q2_l] "=&r"(q2_l), [q3_l] "=&r"(q3_l)                      \
-        : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),     \
-          [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3));    \
-  }
-
-#define PACK_LEFT_4TO7()                                              \
-  {                                                                   \
-    __asm__ __volatile__(                                             \
-        "preceu.ph.qbl   %[p7_l],   %[p7]   \n\t"                     \
-        "preceu.ph.qbl   %[p6_l],   %[p6]   \n\t"                     \
-        "preceu.ph.qbl   %[p5_l],   %[p5]   \n\t"                     \
-        "preceu.ph.qbl   %[p4_l],   %[p4]   \n\t"                     \
-        "preceu.ph.qbl   %[q4_l],   %[q4]   \n\t"                     \
-        "preceu.ph.qbl   %[q5_l],   %[q5]   \n\t"                     \
-        "preceu.ph.qbl   %[q6_l],   %[q6]   \n\t"                     \
-        "preceu.ph.qbl   %[q7_l],   %[q7]   \n\t"                     \
-                                                                      \
-        : [p7_l] "=&r"(p7_l), [p6_l] "=&r"(p6_l), [p5_l] "=&r"(p5_l), \
-          [p4_l] "=&r"(p4_l), [q4_l] "=&r"(q4_l), [q5_l] "=&r"(q5_l), \
-          [q6_l] "=&r"(q6_l), [q7_l] "=&r"(q7_l)                      \
-        : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4),     \
-          [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7));    \
-  }
-
-#define PACK_RIGHT_0TO3()                                             \
-  {                                                                   \
-    __asm__ __volatile__(                                             \
-        "preceu.ph.qbr   %[p3_r],   %[p3]  \n\t"                      \
-        "preceu.ph.qbr   %[p2_r],   %[p2]   \n\t"                     \
-        "preceu.ph.qbr   %[p1_r],   %[p1]   \n\t"                     \
-        "preceu.ph.qbr   %[p0_r],   %[p0]   \n\t"                     \
-        "preceu.ph.qbr   %[q0_r],   %[q0]   \n\t"                     \
-        "preceu.ph.qbr   %[q1_r],   %[q1]   \n\t"                     \
-        "preceu.ph.qbr   %[q2_r],   %[q2]   \n\t"                     \
-        "preceu.ph.qbr   %[q3_r],   %[q3]   \n\t"                     \
-                                                                      \
-        : [p3_r] "=&r"(p3_r), [p2_r] "=&r"(p2_r), [p1_r] "=&r"(p1_r), \
-          [p0_r] "=&r"(p0_r), [q0_r] "=&r"(q0_r), [q1_r] "=&r"(q1_r), \
-          [q2_r] "=&r"(q2_r), [q3_r] "=&r"(q3_r)                      \
-        : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),     \
-          [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3));    \
-  }
-
-#define PACK_RIGHT_4TO7()                                             \
-  {                                                                   \
-    __asm__ __volatile__(                                             \
-        "preceu.ph.qbr   %[p7_r],   %[p7]   \n\t"                     \
-        "preceu.ph.qbr   %[p6_r],   %[p6]   \n\t"                     \
-        "preceu.ph.qbr   %[p5_r],   %[p5]   \n\t"                     \
-        "preceu.ph.qbr   %[p4_r],   %[p4]   \n\t"                     \
-        "preceu.ph.qbr   %[q4_r],   %[q4]   \n\t"                     \
-        "preceu.ph.qbr   %[q5_r],   %[q5]   \n\t"                     \
-        "preceu.ph.qbr   %[q6_r],   %[q6]   \n\t"                     \
-        "preceu.ph.qbr   %[q7_r],   %[q7]   \n\t"                     \
-                                                                      \
-        : [p7_r] "=&r"(p7_r), [p6_r] "=&r"(p6_r), [p5_r] "=&r"(p5_r), \
-          [p4_r] "=&r"(p4_r), [q4_r] "=&r"(q4_r), [q5_r] "=&r"(q5_r), \
-          [q6_r] "=&r"(q6_r), [q7_r] "=&r"(q7_r)                      \
-        : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4),     \
-          [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7));    \
-  }
-
-#define COMBINE_LEFT_RIGHT_0TO2()                                         \
-  {                                                                       \
-    __asm__ __volatile__(                                                 \
-        "precr.qb.ph    %[p2],  %[p2_l],    %[p2_r]    \n\t"              \
-        "precr.qb.ph    %[p1],  %[p1_l],    %[p1_r]    \n\t"              \
-        "precr.qb.ph    %[p0],  %[p0_l],    %[p0_r]    \n\t"              \
-        "precr.qb.ph    %[q0],  %[q0_l],    %[q0_r]    \n\t"              \
-        "precr.qb.ph    %[q1],  %[q1_l],    %[q1_r]    \n\t"              \
-        "precr.qb.ph    %[q2],  %[q2_l],    %[q2_r]    \n\t"              \
-                                                                          \
-        : [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0), [q0] "=&r"(q0), \
-          [q1] "=&r"(q1), [q2] "=&r"(q2)                                  \
-        : [p2_l] "r"(p2_l), [p2_r] "r"(p2_r), [p1_l] "r"(p1_l),           \
-          [p1_r] "r"(p1_r), [p0_l] "r"(p0_l), [p0_r] "r"(p0_r),           \
-          [q0_l] "r"(q0_l), [q0_r] "r"(q0_r), [q1_l] "r"(q1_l),           \
-          [q1_r] "r"(q1_r), [q2_l] "r"(q2_l), [q2_r] "r"(q2_r));          \
-  }
-
-#define COMBINE_LEFT_RIGHT_3TO6()                                         \
-  {                                                                       \
-    __asm__ __volatile__(                                                 \
-        "precr.qb.ph    %[p6],  %[p6_l],    %[p6_r]    \n\t"              \
-        "precr.qb.ph    %[p5],  %[p5_l],    %[p5_r]    \n\t"              \
-        "precr.qb.ph    %[p4],  %[p4_l],    %[p4_r]    \n\t"              \
-        "precr.qb.ph    %[p3],  %[p3_l],    %[p3_r]    \n\t"              \
-        "precr.qb.ph    %[q3],  %[q3_l],    %[q3_r]    \n\t"              \
-        "precr.qb.ph    %[q4],  %[q4_l],    %[q4_r]    \n\t"              \
-        "precr.qb.ph    %[q5],  %[q5_l],    %[q5_r]    \n\t"              \
-        "precr.qb.ph    %[q6],  %[q6_l],    %[q6_r]    \n\t"              \
-                                                                          \
-        : [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4), [p3] "=&r"(p3), \
-          [q3] "=&r"(q3), [q4] "=&r"(q4), [q5] "=&r"(q5), [q6] "=&r"(q6)  \
-        : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),           \
-          [p3_l] "r"(p3_l), [p6_r] "r"(p6_r), [p5_r] "r"(p5_r),           \
-          [p4_r] "r"(p4_r), [p3_r] "r"(p3_r), [q3_l] "r"(q3_l),           \
-          [q4_l] "r"(q4_l), [q5_l] "r"(q5_l), [q6_l] "r"(q6_l),           \
-          [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),           \
-          [q6_r] "r"(q6_r));                                              \
-  }
-
-#endif  // #if HAVE_DSPR2
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
--- a/aom_dsp/mips/loopfilter_msa.h
+++ b/aom_dsp/mips/loopfilter_msa.h
@@ -1,251 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_LOOPFILTER_MSA_H_
-#define AOM_DSP_LOOPFILTER_MSA_H_
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define AOM_LPF_FILTER4_8W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in, \
-                           p1_out, p0_out, q0_out, q1_out)              \
-  {                                                                     \
-    v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign;                 \
-    v16i8 filt, filt1, filt2, cnst4b, cnst3b;                           \
-    v8i16 q0_sub_p0_r, filt_r, cnst3h;                                  \
-                                                                        \
-    p1_m = (v16i8)__msa_xori_b(p1_in, 0x80);                            \
-    p0_m = (v16i8)__msa_xori_b(p0_in, 0x80);                            \
-    q0_m = (v16i8)__msa_xori_b(q0_in, 0x80);                            \
-    q1_m = (v16i8)__msa_xori_b(q1_in, 0x80);                            \
-                                                                        \
-    filt = __msa_subs_s_b(p1_m, q1_m);                                  \
-    filt = filt & (v16i8)hev_in;                                        \
-    q0_sub_p0 = q0_m - p0_m;                                            \
-    filt_sign = __msa_clti_s_b(filt, 0);                                \
-                                                                        \
-    cnst3h = __msa_ldi_h(3);                                            \
-    q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0);            \
-    q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h);    \
-    filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt);                      \
-    filt_r += q0_sub_p0_r;                                              \
-    filt_r = __msa_sat_s_h(filt_r, 7);                                  \
-                                                                        \
-    /* combine left and right part */                                   \
-    filt = __msa_pckev_b((v16i8)filt_r, (v16i8)filt_r);                 \
-                                                                        \
-    filt = filt & (v16i8)mask_in;                                       \
-    cnst4b = __msa_ldi_b(4);                                            \
-    filt1 = __msa_adds_s_b(filt, cnst4b);                               \
-    filt1 >>= 3;                                                        \
-                                                                        \
-    cnst3b = __msa_ldi_b(3);                                            \
-    filt2 = __msa_adds_s_b(filt, cnst3b);                               \
-    filt2 >>= 3;                                                        \
-                                                                        \
-    q0_m = __msa_subs_s_b(q0_m, filt1);                                 \
-    q0_out = __msa_xori_b((v16u8)q0_m, 0x80);                           \
-    p0_m = __msa_adds_s_b(p0_m, filt2);                                 \
-    p0_out = __msa_xori_b((v16u8)p0_m, 0x80);                           \
-                                                                        \
-    filt = __msa_srari_b(filt1, 1);                                     \
-    hev_in = __msa_xori_b((v16u8)hev_in, 0xff);                         \
-    filt = filt & (v16i8)hev_in;                                        \
-                                                                        \
-    q1_m = __msa_subs_s_b(q1_m, filt);                                  \
-    q1_out = __msa_xori_b((v16u8)q1_m, 0x80);                           \
-    p1_m = __msa_adds_s_b(p1_m, filt);                                  \
-    p1_out = __msa_xori_b((v16u8)p1_m, 0x80);                           \
-  }
-
-#define AOM_LPF_FILTER4_4W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in, \
-                           p1_out, p0_out, q0_out, q1_out)              \
-  {                                                                     \
-    v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign;                 \
-    v16i8 filt, filt1, filt2, cnst4b, cnst3b;                           \
-    v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h;             \
-                                                                        \
-    p1_m = (v16i8)__msa_xori_b(p1_in, 0x80);                            \
-    p0_m = (v16i8)__msa_xori_b(p0_in, 0x80);                            \
-    q0_m = (v16i8)__msa_xori_b(q0_in, 0x80);                            \
-    q1_m = (v16i8)__msa_xori_b(q1_in, 0x80);                            \
-                                                                        \
-    filt = __msa_subs_s_b(p1_m, q1_m);                                  \
-                                                                        \
-    filt = filt & (v16i8)hev_in;                                        \
-                                                                        \
-    q0_sub_p0 = q0_m - p0_m;                                            \
-    filt_sign = __msa_clti_s_b(filt, 0);                                \
-                                                                        \
-    cnst3h = __msa_ldi_h(3);                                            \
-    q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0);            \
-    q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h);    \
-    filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt);                      \
-    filt_r += q0_sub_p0_r;                                              \
-    filt_r = __msa_sat_s_h(filt_r, 7);                                  \
-                                                                        \
-    q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0, q0_sub_p0);            \
-    q0_sub_p0_l = __msa_dotp_s_h((v16i8)q0_sub_p0_l, (v16i8)cnst3h);    \
-    filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt);                      \
-    filt_l += q0_sub_p0_l;                                              \
-    filt_l = __msa_sat_s_h(filt_l, 7);                                  \
-                                                                        \
-    filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r);                 \
-    filt = filt & (v16i8)mask_in;                                       \
-                                                                        \
-    cnst4b = __msa_ldi_b(4);                                            \
-    filt1 = __msa_adds_s_b(filt, cnst4b);                               \
-    filt1 >>= 3;                                                        \
-                                                                        \
-    cnst3b = __msa_ldi_b(3);                                            \
-    filt2 = __msa_adds_s_b(filt, cnst3b);                               \
-    filt2 >>= 3;                                                        \
-                                                                        \
-    q0_m = __msa_subs_s_b(q0_m, filt1);                                 \
-    q0_out = __msa_xori_b((v16u8)q0_m, 0x80);                           \
-    p0_m = __msa_adds_s_b(p0_m, filt2);                                 \
-    p0_out = __msa_xori_b((v16u8)p0_m, 0x80);                           \
-                                                                        \
-    filt = __msa_srari_b(filt1, 1);                                     \
-    hev_in = __msa_xori_b((v16u8)hev_in, 0xff);                         \
-    filt = filt & (v16i8)hev_in;                                        \
-                                                                        \
-    q1_m = __msa_subs_s_b(q1_m, filt);                                  \
-    q1_out = __msa_xori_b((v16u8)q1_m, 0x80);                           \
-    p1_m = __msa_adds_s_b(p1_m, filt);                                  \
-    p1_out = __msa_xori_b((v16u8)p1_m, 0x80);                           \
-  }
-
-#define AOM_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out)    \
-  {                                                                      \
-    v16u8 tmp_flat4, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0; \
-    v16u8 zero_in = { 0 };                                               \
-                                                                         \
-    tmp_flat4 = __msa_ori_b(zero_in, 1);                                 \
-    p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in);                          \
-    q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in);                          \
-    p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in);                          \
-    q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in);                          \
-                                                                         \
-    p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0);               \
-    flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out);                     \
-    p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0);               \
-    flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out);                     \
-                                                                         \
-    flat_out = (tmp_flat4 < (v16u8)flat_out);                            \
-    flat_out = __msa_xori_b(flat_out, 0xff);                             \
-    flat_out = flat_out & (mask);                                        \
-  }
-
-#define AOM_FLAT5(p7_in, p6_in, p5_in, p4_in, p0_in, q0_in, q4_in, q5_in, \
-                  q6_in, q7_in, flat_in, flat2_out)                       \
-  {                                                                       \
-    v16u8 tmp_flat5, zero_in = { 0 };                                     \
-    v16u8 p4_a_sub_p0, q4_a_sub_q0, p5_a_sub_p0, q5_a_sub_q0;             \
-    v16u8 p6_a_sub_p0, q6_a_sub_q0, p7_a_sub_p0, q7_a_sub_q0;             \
-                                                                          \
-    tmp_flat5 = __msa_ori_b(zero_in, 1);                                  \
-    p4_a_sub_p0 = __msa_asub_u_b(p4_in, p0_in);                           \
-    q4_a_sub_q0 = __msa_asub_u_b(q4_in, q0_in);                           \
-    p5_a_sub_p0 = __msa_asub_u_b(p5_in, p0_in);                           \
-    q5_a_sub_q0 = __msa_asub_u_b(q5_in, q0_in);                           \
-    p6_a_sub_p0 = __msa_asub_u_b(p6_in, p0_in);                           \
-    q6_a_sub_q0 = __msa_asub_u_b(q6_in, q0_in);                           \
-    p7_a_sub_p0 = __msa_asub_u_b(p7_in, p0_in);                           \
-    q7_a_sub_q0 = __msa_asub_u_b(q7_in, q0_in);                           \
-                                                                          \
-    p4_a_sub_p0 = __msa_max_u_b(p4_a_sub_p0, q4_a_sub_q0);                \
-    flat2_out = __msa_max_u_b(p5_a_sub_p0, q5_a_sub_q0);                  \
-    flat2_out = __msa_max_u_b(p4_a_sub_p0, flat2_out);                    \
-    p6_a_sub_p0 = __msa_max_u_b(p6_a_sub_p0, q6_a_sub_q0);                \
-    flat2_out = __msa_max_u_b(p6_a_sub_p0, flat2_out);                    \
-    p7_a_sub_p0 = __msa_max_u_b(p7_a_sub_p0, q7_a_sub_q0);                \
-    flat2_out = __msa_max_u_b(p7_a_sub_p0, flat2_out);                    \
-                                                                          \
-    flat2_out = (tmp_flat5 < (v16u8)flat2_out);                           \
-    flat2_out = __msa_xori_b(flat2_out, 0xff);                            \
-    flat2_out = flat2_out & flat_in;                                      \
-  }
-
-#define AOM_FILTER8(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
-                    p2_filt8_out, p1_filt8_out, p0_filt8_out, q0_filt8_out, \
-                    q1_filt8_out, q2_filt8_out)                             \
-  {                                                                         \
-    v8u16 tmp_filt8_0, tmp_filt8_1, tmp_filt8_2;                            \
-                                                                            \
-    tmp_filt8_2 = p2_in + p1_in + p0_in;                                    \
-    tmp_filt8_0 = p3_in << 1;                                               \
-                                                                            \
-    tmp_filt8_0 = tmp_filt8_0 + tmp_filt8_2 + q0_in;                        \
-    tmp_filt8_1 = tmp_filt8_0 + p3_in + p2_in;                              \
-    p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
-                                                                            \
-    tmp_filt8_1 = tmp_filt8_0 + p1_in + q1_in;                              \
-    p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
-                                                                            \
-    tmp_filt8_1 = q2_in + q1_in + q0_in;                                    \
-    tmp_filt8_2 = tmp_filt8_2 + tmp_filt8_1;                                \
-    tmp_filt8_0 = tmp_filt8_2 + (p0_in);                                    \
-    tmp_filt8_0 = tmp_filt8_0 + (p3_in);                                    \
-    p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_0, 3);             \
-                                                                            \
-    tmp_filt8_0 = q2_in + q3_in;                                            \
-    tmp_filt8_0 = p0_in + tmp_filt8_1 + tmp_filt8_0;                        \
-    tmp_filt8_1 = q3_in + q3_in;                                            \
-    tmp_filt8_1 = tmp_filt8_1 + tmp_filt8_0;                                \
-    q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
-                                                                            \
-    tmp_filt8_0 = tmp_filt8_2 + q3_in;                                      \
-    tmp_filt8_1 = tmp_filt8_0 + q0_in;                                      \
-    q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
-                                                                            \
-    tmp_filt8_1 = tmp_filt8_0 - p2_in;                                      \
-    tmp_filt8_0 = q1_in + q3_in;                                            \
-    tmp_filt8_1 = tmp_filt8_0 + tmp_filt8_1;                                \
-    q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
-  }
-
-#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
-                     limit_in, b_limit_in, thresh_in, hev_out, mask_out,     \
-                     flat_out)                                               \
-  {                                                                          \
-    v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m;            \
-    v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m;            \
-                                                                             \
-    /* absolute subtraction of pixel values */                               \
-    p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in);                             \
-    p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in);                             \
-    p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in);                             \
-    q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in);                             \
-    q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in);                             \
-    q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in);                             \
-    p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in);                             \
-    p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in);                             \
-                                                                             \
-    /* calculation of hev */                                                 \
-    flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m);                    \
-    hev_out = thresh_in < (v16u8)flat_out;                                   \
-                                                                             \
-    /* calculation of mask */                                                \
-    p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m);               \
-    p1_asub_q1_m >>= 1;                                                      \
-    p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m);               \
-                                                                             \
-    mask_out = b_limit_in < p0_asub_q0_m;                                    \
-    mask_out = __msa_max_u_b(flat_out, mask_out);                            \
-    p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m);                \
-    mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out);                        \
-    q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m);                \
-    mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out);                        \
-                                                                             \
-    mask_out = limit_in < (v16u8)mask_out;                                   \
-    mask_out = __msa_xori_b(mask_out, 0xff);                                 \
-  }
-#endif /* AOM_DSP_LOOPFILTER_MSA_H_ */
--- a/aom_dsp/mips/macros_msa.h
+++ b/aom_dsp/mips/macros_msa.h
--- a/aom_dsp/mips/txfm_macros_msa.h
+++ b/aom_dsp/mips/txfm_macros_msa.h
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_MIPS_TXFM_MACROS_MIPS_MSA_H_
-#define AOM_DSP_MIPS_TXFM_MACROS_MIPS_MSA_H_
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1) \
-  {                                                           \
-    v8i16 k0_m = __msa_fill_h(cnst0);                         \
-    v4i32 s0_m, s1_m, s2_m, s3_m;                             \
-                                                              \
-    s0_m = (v4i32)__msa_fill_h(cnst1);                        \
-    k0_m = __msa_ilvev_h((v8i16)s0_m, k0_m);                  \
-                                                              \
-    ILVRL_H2_SW((-reg1), reg0, s1_m, s0_m);                   \
-    ILVRL_H2_SW(reg0, reg1, s3_m, s2_m);                      \
-    DOTP_SH2_SW(s1_m, s0_m, k0_m, k0_m, s1_m, s0_m);          \
-    SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS);                  \
-    out0 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m);           \
-                                                              \
-    DOTP_SH2_SW(s3_m, s2_m, k0_m, k0_m, s1_m, s0_m);          \
-    SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS);                  \
-    out1 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m);           \
-  }
-
-#define DOT_ADD_SUB_SRARI_PCK(in0, in1, in2, in3, in4, in5, in6, in7, dst0,   \
-                              dst1, dst2, dst3)                               \
-  {                                                                           \
-    v4i32 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m;                                  \
-    v4i32 tp5_m, tp6_m, tp7_m, tp8_m, tp9_m;                                  \
-                                                                              \
-    DOTP_SH4_SW(in0, in1, in0, in1, in4, in4, in5, in5, tp0_m, tp2_m, tp3_m,  \
-                tp4_m);                                                       \
-    DOTP_SH4_SW(in2, in3, in2, in3, in6, in6, in7, in7, tp5_m, tp6_m, tp7_m,  \
-                tp8_m);                                                       \
-    BUTTERFLY_4(tp0_m, tp3_m, tp7_m, tp5_m, tp1_m, tp9_m, tp7_m, tp5_m);      \
-    BUTTERFLY_4(tp2_m, tp4_m, tp8_m, tp6_m, tp3_m, tp0_m, tp4_m, tp2_m);      \
-    SRARI_W4_SW(tp1_m, tp9_m, tp7_m, tp5_m, DCT_CONST_BITS);                  \
-    SRARI_W4_SW(tp3_m, tp0_m, tp4_m, tp2_m, DCT_CONST_BITS);                  \
-    PCKEV_H4_SH(tp1_m, tp3_m, tp9_m, tp0_m, tp7_m, tp4_m, tp5_m, tp2_m, dst0, \
-                dst1, dst2, dst3);                                            \
-  }
-
-#define DOT_SHIFT_RIGHT_PCK_H(in0, in1, in2)           \
-  ({                                                   \
-    v8i16 dst_m;                                       \
-    v4i32 tp0_m, tp1_m;                                \
-                                                       \
-    DOTP_SH2_SW(in0, in1, in2, in2, tp1_m, tp0_m);     \
-    SRARI_W2_SW(tp1_m, tp0_m, DCT_CONST_BITS);         \
-    dst_m = __msa_pckev_h((v8i16)tp1_m, (v8i16)tp0_m); \
-                                                       \
-    dst_m;                                             \
-  })
-
-#define MADD_SHORT(m0, m1, c0, c1, res0, res1)                              \
-  {                                                                         \
-    v4i32 madd0_m, madd1_m, madd2_m, madd3_m;                               \
-    v8i16 madd_s0_m, madd_s1_m;                                             \
-                                                                            \
-    ILVRL_H2_SH(m1, m0, madd_s0_m, madd_s1_m);                              \
-    DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s0_m, madd_s1_m, c0, c0, c1, c1, \
-                madd0_m, madd1_m, madd2_m, madd3_m);                        \
-    SRARI_W4_SW(madd0_m, madd1_m, madd2_m, madd3_m, DCT_CONST_BITS);        \
-    PCKEV_H2_SH(madd1_m, madd0_m, madd3_m, madd2_m, res0, res1);            \
-  }
-
-#define MADD_BF(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3, out0, out1,   \
-                out2, out3)                                                   \
-  {                                                                           \
-    v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m;                         \
-    v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m, m4_m, m5_m;                         \
-                                                                              \
-    ILVRL_H2_SH(inp1, inp0, madd_s0_m, madd_s1_m);                            \
-    ILVRL_H2_SH(inp3, inp2, madd_s2_m, madd_s3_m);                            \
-    DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, cst0, cst0, cst2, \
-                cst2, tmp0_m, tmp1_m, tmp2_m, tmp3_m);                        \
-    BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, m4_m, m5_m, tmp3_m, tmp2_m);  \
-    SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS);                  \
-    PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1);                      \
-    DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, cst1, cst1, cst3, \
-                cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m);                        \
-    BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, m4_m, m5_m, tmp3_m, tmp2_m);  \
-    SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS);                  \
-    PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3);                      \
-  }
-#endif  // AOM_DSP_MIPS_TXFM_MACROS_MIPS_MSA_H_
--- a/aom_dsp/prob.c
+++ b/aom_dsp/prob.c
@@ -1,226 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "./aom_config.h"
-
-#if CONFIG_EC_MULTISYMBOL
-#include <string.h>
-#endif
-
-#include "aom_dsp/prob.h"
-
-#if CONFIG_DAALA_EC
-#include "aom_dsp/entcode.h"
-#endif
-
-const uint8_t aom_norm[256] = {
-  0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-  3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-static unsigned int tree_merge_probs_impl(unsigned int i,
-                                          const aom_tree_index *tree,
-                                          const aom_prob *pre_probs,
-                                          const unsigned int *counts,
-                                          aom_prob *probs) {
-  const int l = tree[i];
-  const unsigned int left_count =
-      (l <= 0) ? counts[-l]
-               : tree_merge_probs_impl(l, tree, pre_probs, counts, probs);
-  const int r = tree[i + 1];
-  const unsigned int right_count =
-      (r <= 0) ? counts[-r]
-               : tree_merge_probs_impl(r, tree, pre_probs, counts, probs);
-  const unsigned int ct[2] = { left_count, right_count };
-  probs[i >> 1] = mode_mv_merge_probs(pre_probs[i >> 1], ct);
-  return left_count + right_count;
-}
-
-void aom_tree_merge_probs(const aom_tree_index *tree, const aom_prob *pre_probs,
-                          const unsigned int *counts, aom_prob *probs) {
-  tree_merge_probs_impl(0, tree, pre_probs, counts, probs);
-}
-
-#if CONFIG_EC_MULTISYMBOL
-typedef struct tree_node tree_node;
-
-struct tree_node {
-  aom_tree_index index;
-  uint8_t probs[16];
-  uint8_t prob;
-  int path;
-  int len;
-  int l;
-  int r;
-  aom_cdf_prob pdf;
-};
-
-/* Compute the probability of this node in Q23 */
-static uint32_t tree_node_prob(tree_node n, int i) {
-  uint32_t prob;
-  /* 1.0 in Q23 */
-  prob = 16777216;
-  for (; i < n.len; i++) {
-    prob = prob * n.probs[i] >> 8;
-  }
-  return prob;
-}
-
-static int tree_node_cmp(tree_node a, tree_node b) {
-  int i;
-  uint32_t pa;
-  uint32_t pb;
-  for (i = 0; i < AOMMIN(a.len, b.len) && a.probs[i] == b.probs[i]; i++) {
-  }
-  pa = tree_node_prob(a, i);
-  pb = tree_node_prob(b, i);
-  return pa > pb ? 1 : pa < pb ? -1 : 0;
-}
-
-/* Given a Q15 probability for symbol subtree rooted at tree[n], this function
-    computes the probability of each symbol (defined as a node that has no
-    children). */
-static aom_cdf_prob tree_node_compute_probs(tree_node *tree, int n,
-                                            aom_cdf_prob pdf) {
-  if (tree[n].l == 0) {
-    /* This prevents probability computations in Q15 that underflow from
-        producing a symbol that has zero probability. */
-    if (pdf == 0) pdf = 1;
-    tree[n].pdf = pdf;
-    return pdf;
-  } else {
-    /* We process the smaller probability first,  */
-    if (tree[n].prob < 128) {
-      aom_cdf_prob lp;
-      aom_cdf_prob rp;
-      lp = (((uint32_t)pdf) * tree[n].prob + 128) >> 8;
-      lp = tree_node_compute_probs(tree, tree[n].l, lp);
-      rp = tree_node_compute_probs(tree, tree[n].r, lp > pdf ? 0 : pdf - lp);
-      return lp + rp;
-    } else {
-      aom_cdf_prob rp;
-      aom_cdf_prob lp;
-      rp = (((uint32_t)pdf) * (256 - tree[n].prob) + 128) >> 8;
-      rp = tree_node_compute_probs(tree, tree[n].r, rp);
-      lp = tree_node_compute_probs(tree, tree[n].l, rp > pdf ? 0 : pdf - rp);
-      return lp + rp;
-    }
-  }
-}
-
-static int tree_node_extract(tree_node *tree, int n, int symb,
-                             aom_cdf_prob *pdf, aom_tree_index *index,
-                             int *path, int *len) {
-  if (tree[n].l == 0) {
-    pdf[symb] = tree[n].pdf;
-    if (index != NULL) index[symb] = tree[n].index;
-    if (path != NULL) path[symb] = tree[n].path;
-    if (len != NULL) len[symb] = tree[n].len;
-    return symb + 1;
-  } else {
-    symb = tree_node_extract(tree, tree[n].l, symb, pdf, index, path, len);
-    return tree_node_extract(tree, tree[n].r, symb, pdf, index, path, len);
-  }
-}
-
-int tree_to_cdf(const aom_tree_index *tree, const aom_prob *probs,
-                aom_tree_index root, aom_cdf_prob *cdf, aom_tree_index *index,
-                int *path, int *len) {
-  tree_node symb[2 * 16 - 1];
-  int nodes;
-  int next[16];
-  int size;
-  int nsymbs;
-  int i;
-  /* Create the root node with probability 1 in Q15. */
-  symb[0].index = root;
-  symb[0].path = 0;
-  symb[0].len = 0;
-  symb[0].l = symb[0].r = 0;
-  nodes = 1;
-  next[0] = 0;
-  size = 1;
-  nsymbs = 1;
-  while (size > 0 && nsymbs < 16) {
-    int m;
-    tree_node n;
-    aom_tree_index j;
-    uint8_t prob;
-    m = 0;
-    /* Find the internal node with the largest probability. */
-    for (i = 1; i < size; i++) {
-      if (tree_node_cmp(symb[next[i]], symb[next[m]]) > 0) m = i;
-    }
-    i = next[m];
-    memmove(&next[m], &next[m + 1], sizeof(*next) * (size - (m + 1)));
-    size--;
-    /* Split this symbol into two symbols */
-    n = symb[i];
-    j = n.index;
-    prob = probs[j >> 1];
-    /* Left */
-    n.index = tree[j];
-    n.path <<= 1;
-    n.len++;
-    n.probs[n.len - 1] = prob;
-    symb[nodes] = n;
-    if (n.index > 0) {
-      next[size++] = nodes;
-    }
-    /* Right */
-    n.index = tree[j + 1];
-    n.path += 1;
-    n.probs[n.len - 1] = 256 - prob;
-    symb[nodes + 1] = n;
-    if (n.index > 0) {
-      next[size++] = nodes + 1;
-    }
-    symb[i].prob = prob;
-    symb[i].l = nodes;
-    symb[i].r = nodes + 1;
-    nodes += 2;
-    nsymbs++;
-  }
-  /* Compute the probabilities of each symbol in Q15 */
-  tree_node_compute_probs(symb, 0, 32768);
-  /* Extract the cdf, index, path and length */
-  tree_node_extract(symb, 0, 0, cdf, index, path, len);
-  /* Convert to CDF */
-  for (i = 1; i < nsymbs; i++) {
-    cdf[i] = cdf[i - 1] + cdf[i];
-  }
-  return nsymbs;
-}
-
-/* This code assumes that tree contains as unique leaf nodes the integer values
-    0 to len - 1 and produces the forward and inverse mapping tables in ind[]
-    and inv[] respectively. */
-void av1_indices_from_tree(int *ind, int *inv, int len,
-                           const aom_tree_index *tree) {
-  int i;
-  int index;
-  for (i = index = 0; i < TREE_SIZE(len); i++) {
-    const aom_tree_index j = tree[i];
-    if (j <= 0) {
-      inv[index] = -j;
-      ind[-j] = index++;
-    }
-  }
-}
-#endif
--- a/aom_dsp/prob.h
+++ b/aom_dsp/prob.h
@@ -1,158 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_PROB_H_
-#define AOM_DSP_PROB_H_
-
-#include "./aom_config.h"
-#include "./aom_dsp_common.h"
-
-#include "aom_ports/bitops.h"
-#include "aom_ports/mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef uint8_t aom_prob;
-
-// TODO(negge): Rename this aom_prob once we remove vpxbool.
-typedef uint16_t aom_cdf_prob;
-
-#define MAX_PROB 255
-
-#define aom_prob_half ((aom_prob)128)
-
-typedef int8_t aom_tree_index;
-
-#define TREE_SIZE(leaf_count) (-2 + 2 * (leaf_count))
-
-#define aom_complement(x) (255 - x)
-
-#define MODE_MV_COUNT_SAT 20
-
-/* We build coding trees compactly in arrays.
-   Each node of the tree is a pair of aom_tree_indices.
-   Array index often references a corresponding probability table.
-   Index <= 0 means done encoding/decoding and value = -Index,
-   Index > 0 means need another bit, specification at index.
-   Nonnegative indices are always even;  processing begins at node 0. */
-
-typedef const aom_tree_index aom_tree[];
-
-static INLINE aom_prob clip_prob(int p) {
-  return (p > 255) ? 255 : (p < 1) ? 1 : p;
-}
-
-static INLINE aom_prob get_prob(int num, int den) {
-  return (den == 0) ? 128u : clip_prob(((int64_t)num * 256 + (den >> 1)) / den);
-}
-
-static INLINE aom_prob get_binary_prob(int n0, int n1) {
-  return get_prob(n0, n0 + n1);
-}
-
-/* This function assumes prob1 and prob2 are already within [1,255] range. */
-static INLINE aom_prob weighted_prob(int prob1, int prob2, int factor) {
-  return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8);
-}
-
-static INLINE aom_prob merge_probs(aom_prob pre_prob, const unsigned int ct[2],
-                                   unsigned int count_sat,
-                                   unsigned int max_update_factor) {
-  const aom_prob prob = get_binary_prob(ct[0], ct[1]);
-  const unsigned int count = AOMMIN(ct[0] + ct[1], count_sat);
-  const unsigned int factor = max_update_factor * count / count_sat;
-  return weighted_prob(pre_prob, prob, factor);
-}
-
-// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT;
-static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = {
-  0,  6,  12, 19, 25, 32,  38,  44,  51,  57, 64,
-  70, 76, 83, 89, 96, 102, 108, 115, 121, 128
-};
-
-static INLINE aom_prob mode_mv_merge_probs(aom_prob pre_prob,
-                                           const unsigned int ct[2]) {
-  const unsigned int den = ct[0] + ct[1];
-  if (den == 0) {
-    return pre_prob;
-  } else {
-    const unsigned int count = AOMMIN(den, MODE_MV_COUNT_SAT);
-    const unsigned int factor = count_to_update_factor[count];
-    const aom_prob prob =
-        clip_prob(((int64_t)(ct[0]) * 256 + (den >> 1)) / den);
-    return weighted_prob(pre_prob, prob, factor);
-  }
-}
-
-void aom_tree_merge_probs(const aom_tree_index *tree, const aom_prob *pre_probs,
-                          const unsigned int *counts, aom_prob *probs);
-
-#if CONFIG_EC_MULTISYMBOL
-int tree_to_cdf(const aom_tree_index *tree, const aom_prob *probs,
-                aom_tree_index root, aom_cdf_prob *cdf, aom_tree_index *ind,
-                int *pth, int *len);
-
-static INLINE void av1_tree_to_cdf(const aom_tree_index *tree,
-                                   const aom_prob *probs, aom_cdf_prob *cdf) {
-  aom_tree_index index[16];
-  int path[16];
-  int dist[16];
-  tree_to_cdf(tree, probs, 0, cdf, index, path, dist);
-}
-
-#define av1_tree_to_cdf_1D(tree, probs, cdf, u) \
-  do {                                          \
-    int i;                                      \
-    for (i = 0; i < u; i++) {                   \
-      av1_tree_to_cdf(tree, probs[i], cdf[i]);  \
-    }                                           \
-  } while (0)
-
-#define av1_tree_to_cdf_2D(tree, probs, cdf, v, u)     \
-  do {                                                 \
-    int j;                                             \
-    int i;                                             \
-    for (j = 0; j < v; j++) {                          \
-      for (i = 0; i < u; i++) {                        \
-        av1_tree_to_cdf(tree, probs[j][i], cdf[j][i]); \
-      }                                                \
-    }                                                  \
-  } while (0)
-
-void av1_indices_from_tree(int *ind, int *inv, int len,
-                           const aom_tree_index *tree);
-#endif
-
-DECLARE_ALIGNED(16, extern const uint8_t, aom_norm[256]);
-
-#if CONFIG_EC_ADAPT
-static INLINE void update_cdf(aom_cdf_prob *cdf, int val, int nsymbs) {
-  const int rate = 4 + get_msb(nsymbs);
-  int i, diff, tmp;
-  for (i = 0; i < nsymbs; ++i) {
-    tmp = (i + 1) << (12 - rate);
-    cdf[i] -= ((cdf[i] - tmp) >> rate);
-  }
-  diff = 32768 - cdf[nsymbs - 1];
-
-  for (i = val; i < nsymbs; ++i) {
-    cdf[i] += diff;
-  }
-}
-#endif
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_DSP_PROB_H_
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Johann	042572177b	Release v1.6.0 Khaki Campbell Duck Change-Id: I08da365dd889093f9919476a02ee96ae9615f140	2016-07-20 18:15:41 -07:00
Yaowu Xu	297b2a12d6	Fix encoder crashes for odd size input (cherry picked from commit `98431cde07`) Change-Id: Id5c30c419282369cc8c3280d9a70b34a859a71d8	2016-07-20 15:02:13 -07:00
James Zern	1c0a9f36f1	vp9_pickmode: revert rd modeling change for hbd Avoids a segfault in high-bitdepth builds. This restores the condition to its state prior to: `7991241` vp9: Change the scheme for modeling rd for bsize 32x32. BUG=webm:1250 Change-Id: I6183d5b34cb89dfbf27b7bb589812148a72cd7de	2016-06-25 11:40:26 -07:00
James Zern	cfd5e0221c	Revert "Update vpx subpixel 1d filter ssse3 asm" This reverts commit `1517fb74fd`. Fixes a segfault in windows x64 builds. Change-Id: I6a6959cd7e64a28376849a9f2b11fc852a7c1fbe	2016-06-25 11:37:20 -07:00
Jacky Chen	168eea5d60	Merge "vp9: Change the scheme for modeling rd for bsize 32x32."	2016-06-25 00:43:40 +00:00
James Zern	922751e059	Merge "datarate_test,DatarateTestLarge: normalize bits type"	2016-06-25 00:36:05 +00:00
Jacky Chen	723e357ead	Merge "vp9: Code clean, move low temp var logic out of choose_partitioning."	2016-06-24 22:00:49 +00:00
James Zern	b34705f64f	Merge "cosmetics: Beautify whitespaces and line wrapping"	2016-06-24 21:51:01 +00:00
James Zern	efad6feb9a	Merge "cosmetics: Change few types to their posix version"	2016-06-24 21:50:45 +00:00
James Zern	9e5f355daf	Merge "cosmetics: Make few conditions clearer"	2016-06-24 21:50:32 +00:00
jackychen	dd07443f72	vp9: Code clean, move low temp var logic out of choose_partitioning. Change-Id: I7093e74131e0964471c9993c1e972b4617c4731d	2016-06-24 13:38:22 -07:00
jackychen	7991241a50	vp9: Change the scheme for modeling rd for bsize 32x32. For real-time CBR mode, use model_rd_for_sb_y_large instead of model_rd_for_sb_y for 32x32 block. In the former model, transform might be skipped more aggressively in some condtions, which speeds up encoding time with only a little PSNR/SSIM drop on rtc test set. No obvious visual quality regression. PSNR effect on different speed settings: speed 8 rtc: 0.129% overall PSNR drop, 0.137% SSIM drop speed 7 rtc: 0.135% overall PSNR drop, 0.062% SSIM drop speed 5 rtc_derf: 0.105% overall PSNR drop, 0.095% SSIM drop Speed up: gips_motion_WHD, 1mbps: 3.29% faster on speed 7, 2.56% faster on speed8 gips_stat_WHD, 1mbps: 2.17% faster on speed 7, 1.62% faster on speed8 BUG=webm:1250 Change-Id: I818babce5b8549b4b1a7c3978df8591bffde7173	2016-06-24 12:09:13 -07:00
Marco	b582cf0ea9	vp9-svc: Remove some unneeded code/comment. Change-Id: I710707296042d8586109760544ef68e40ae486c3	2016-06-24 11:43:11 -07:00
Yury Gitman	67611119b5	cosmetics: Beautify whitespaces and line wrapping Change-Id: I9afa02cae671bd3527cf344695e53d0cc767f549	2016-06-24 10:18:06 -07:00
Yury Gitman	3b2e2f2f77	cosmetics: Change few types to their posix version Change-Id: I6d7bc9ed7396e7b0d63ee97bfa473fdea002f9ee	2016-06-24 10:18:06 -07:00
Yury Gitman	79436fadfb	cosmetics: Make few conditions clearer Change-Id: Ib024b3e42efc7ce1af56824a4644fdefcd45b215	2016-06-24 10:17:51 -07:00
Yaowu Xu	7ed1d54ab4	Merge "Revert "vp9: Change the scheme for modeling rd for bsize 32x32.""	2016-06-24 16:05:55 +00:00
Yaowu Xu	26daa30da4	Merge "Rationalize type to avoid integer out of range"	2016-06-24 13:58:36 +00:00
Yaowu Xu	7738bcb350	Rationalize type to avoid integer out of range BUG=webm:1250 Change-Id: Id5bb2762ca1bf996ba4f9a60eec977a7994c1d94	2016-06-24 13:58:02 +00:00
James Zern	73b11ec876	datarate_test,DatarateTestLarge: normalize bits type quiets a msvc warning: conversion from 'const int64_t' to 'size_t', possible loss of data Change-Id: I90a2ac6b040454dac7434fc9b63b98c42ea127b1	2016-06-23 23:29:26 -07:00
James Zern	d4596485be	Revert "vp9: Change the scheme for modeling rd for bsize 32x32." This reverts commit `5c29ee726e`. Causes segfaults in VP9/EndToEndTestLarge.EndtoEndPSNRTest. BUG=webm:1250 Change-Id: I8a30e97be30589abdb76820b5c3c37c46cd6cafb	2016-06-23 15:59:25 -07:00
Johann Koenig	57adf3d573	Merge "configure: clean up var style and set_all usage"	2016-06-23 22:59:21 +00:00
Johann	74a61b5ab9	configure: clean up var style and set_all usage Use quotes whenever possible and {} always for variables. Replace multiple set_all calls with *able_feature(). Change-Id: If579d3f718bd4133cf1592b4554a8ed00cf9f2d3	2016-06-23 22:15:13 +00:00
Vignesh Venkatasubramanian	692fe74deb	Merge "vp9: Fix potential SEGV in decoder_peek_si_internal"	2016-06-23 21:33:13 +00:00
Linfeng Zhang	bdeb5febe4	Merge "Update vpx subpixel 1d filter ssse3 asm"	2016-06-23 19:08:04 +00:00
Johann Koenig	9eeb1f2fc3	Merge "Fail early when android target does not include --sdk-path"	2016-06-23 19:04:52 +00:00
Angie Chiang	424982bc41	Merge "set interp_filter to SWITCHABLE_FILTER for intra block"	2016-06-23 18:56:27 +00:00
Johann Koenig	5e9c5dfdf0	Merge changes Ifddff89d,I827dfe59,Idca7ef45 * changes: vp8 machine setup: mark unused variable vp8 realtime encoder: mark unused variable vp8 error concealment: remove unused variables	2016-06-23 17:55:34 +00:00
Vignesh Venkatasubramanian	aa1c813c43	vp9: Fix potential SEGV in decoder_peek_si_internal decoder_peek_si_internal could potentially read more bytes than what actually exists in the input buffer. We check for the buffer size to be at least 8, but we try to read up to 10 bytes in the worst case. A well crafted file could thus cause a segfault. Likely change that introduced this bug was: https://chromium-review.googlesource.com/#/c/70439 (git hash: `7c43fb6`) BUG=chromium:621095 Change-Id: Id74880cfdded44caaa45bbdbaac859c09d3db752	2016-06-23 09:39:26 -07:00
Alex Converse	6e4b73125b	Merge "vpx_lpf_horizontal_4_sse2: Remove dead load."	2016-06-23 16:20:36 +00:00
Johann	310073868e	Fail early when android target does not include --sdk-path Change-Id: I07e7e63476a2e32e3aae123abdee8b7bbbdc6a8c	2016-06-23 13:48:18 +00:00
Johann Koenig	cc1524aa90	Merge "Add default flags for arm64/armv8 builds"	2016-06-23 13:47:28 +00:00
Johann	6c6eb16bb9	vp8 machine setup: mark unused variable When building without multithreading and for a non-arm, non-x86 system, ctx is unused. Cleans up -Wextra warning: unused parameter ‘ctx’ [-Werror=unused-parameter] Change-Id: Ifddff89d2ebd45f7d71e3d415a8f2415dd818957	2016-06-23 13:46:20 +00:00
Johann	3b2c3cb366	vp8 realtime encoder: mark unused variable 'duration' is not used in realtime-only mode: Cleans up -Wextra warning: unused parameter 'duration' [-Wunused-parameter] Change-Id: I827dfe59ebcdc72c5a93fdf7e5aca063433914b1	2016-06-23 13:46:00 +00:00
Johann	55f3740d76	vp8 error concealment: remove unused variables vp8_conceal_corrupt_mb is an empty function. Remove it entirely. Cleans up -Wextra warnings: unused parameter 'mi_stride' [-Wunused-parameter] unused parameter 'xd' [-Wunused-parameter] Change-Id: Idca7ef4508fae2b4b76a40d44507522a72ccc2c8	2016-06-22 18:29:03 -07:00
Alex Converse	83db21b2fd	vpx_lpf_horizontal_4_sse2: Remove dead load. Change-Id: I51026c52baa1f0881fcd5b68e1fdf08a2dc0916e	2016-06-22 18:17:41 -07:00
Angie Chiang	d9c417cb49	set interp_filter to SWITCHABLE_FILTER for intra block In vp9_pick_inter_mode(), instead of using vp9_get_pred_context_switchable_interp(xd) to assign filter_ref, we use a less strict condition on assigning filter_ref. This is to reduce the probabily of entering the flow of not assigning filter_ref and then skipping filter search. Overall PSNR gain 0.074% for rtc dataset Details: Low Mid High 0.185% -0.008% -0.082% Change-Id: Id5c5ab38d3766c213d5681e17b4d1afd1529e676	2016-06-22 17:19:43 -07:00
Alex Converse	b2597527a5	Merge "Repack vp9_token_state."	2016-06-23 00:17:23 +00:00
Jacky Chen	8496390e73	Merge "vp9: Change the scheme for modeling rd for bsize 32x32."	2016-06-22 23:50:46 +00:00
Johann	ac27b062b0	Add default flags for arm64/armv8 builds Allows building simple targets with sane default flags. For example, using the Android arm64 toolchain from the NDK: https://developer.android.com/ndk/guides/standalone_toolchain.html ./build/tools/make-standalone-toolchain.sh --arch=arm64 \ --platform=android-24 --install-dir=/tmp/arm64 CROSS=/tmp/arm64/bin/aarch64-linux-android- \ ~/libvpx/configure --target=arm64-linux-gcc --disable-multithread BUG=webm:1143 Change-Id: I06f5a7564f5382cf1a4bad41aef4308566c53adf	2016-06-22 23:17:17 +00:00
James Zern	527a9fea76	Merge "remove vp10"	2016-06-22 22:35:57 +00:00
Linfeng Zhang	1517fb74fd	Update vpx subpixel 1d filter ssse3 asm Speed test shows the new vertical filters have degradation on Celeron Chromebook. Added "X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON" to control the vertical filters activated code. Now just simply active the code without degradation on Celeron. Later there should be 2 set of vertical filters ssse3 functions, and let jump table to choose based on CPU type. Change-Id: I37e3e9c5694737d9134a6bce6698d3e43f8fc962	2016-06-22 13:15:00 -07:00
jackychen	5c29ee726e	vp9: Change the scheme for modeling rd for bsize 32x32. For real-time CBR mode, use model_rd_for_sb_y_large instead of model_rd_for_sb_y for 32x32 block. In the former model, transform might be skipped more aggressively in some condtions, which speeds up encoding time with only a little PSNR/SSIM drop on rtc test set. No obvious visual quality regression. PSNR effect on different speed setting: speed 8 rtc: 0.129% overall PSNR drop, 0.137% SSIM drop speed 7 rtc: 0.135% overall PSNR drop, 0.062% SSIM drop speed 5 rtc_derf: 0.105% overall PSNR drop, 0.095% SSIM drop Speed up: gips_motion_WHD, 1mbps: 3.29% faster on speed 7, 2.56% faster on speed8 gips_stat_WHD, 1mbps: 2.17% faster on speed 7, 1.62% faster on speed8 Change-Id: I902f62def225ea01c145d7e5a93497398b8f5edf	2016-06-22 11:17:56 -07:00
Alex Converse	50d3629c61	Repack vp9_token_state. Reduces size from 32 bytes to 24 bytes on x86_64. Change-Id: I8a22552343a1fc916117f35267fe6a295250f742	2016-06-20 12:56:32 -07:00
James Zern	67edc5e83b	remove vp10 development has moved to the nextgenv2 branch and a snapshot from here was used to seed aomedia BUG=b/29457125 Change-Id: Iedaca11ec7870fb3a4e50b2c9ea0c2b056a0d3c0	2016-06-17 18:26:08 -07:00