Compare commits
45 Commits
nextgenv2
...
khakicampb
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
042572177b | ||
|
|
297b2a12d6 | ||
|
|
1c0a9f36f1 | ||
|
|
cfd5e0221c | ||
|
|
168eea5d60 | ||
|
|
922751e059 | ||
|
|
723e357ead | ||
|
|
b34705f64f | ||
|
|
efad6feb9a | ||
|
|
9e5f355daf | ||
|
|
dd07443f72 | ||
|
|
7991241a50 | ||
|
|
b582cf0ea9 | ||
|
|
67611119b5 | ||
|
|
3b2e2f2f77 | ||
|
|
79436fadfb | ||
|
|
7ed1d54ab4 | ||
|
|
26daa30da4 | ||
|
|
7738bcb350 | ||
|
|
73b11ec876 | ||
|
|
d4596485be | ||
|
|
57adf3d573 | ||
|
|
74a61b5ab9 | ||
|
|
692fe74deb | ||
|
|
bdeb5febe4 | ||
|
|
9eeb1f2fc3 | ||
|
|
424982bc41 | ||
|
|
5e9c5dfdf0 | ||
|
|
aa1c813c43 | ||
|
|
6e4b73125b | ||
|
|
310073868e | ||
|
|
cc1524aa90 | ||
|
|
6c6eb16bb9 | ||
|
|
3b2c3cb366 | ||
|
|
55f3740d76 | ||
|
|
83db21b2fd | ||
|
|
d9c417cb49 | ||
|
|
b2597527a5 | ||
|
|
8496390e73 | ||
|
|
ac27b062b0 | ||
|
|
527a9fea76 | ||
|
|
1517fb74fd | ||
|
|
5c29ee726e | ||
|
|
50d3629c61 | ||
|
|
67edc5e83b |
@@ -1,91 +0,0 @@
|
||||
---
|
||||
Language: Cpp
|
||||
# BasedOnStyle: Google
|
||||
# Generated with clang-format 3.8.1
|
||||
AccessModifierOffset: -1
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignConsecutiveDeclarations: false
|
||||
AlignEscapedNewlinesLeft: true
|
||||
AlignOperands: true
|
||||
AlignTrailingComments: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: false
|
||||
AllowShortCaseLabelsOnASingleLine: true
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: true
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
AlwaysBreakTemplateDeclarations: true
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true
|
||||
BraceWrapping:
|
||||
AfterClass: false
|
||||
AfterControlStatement: false
|
||||
AfterEnum: false
|
||||
AfterFunction: false
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
IndentBraces: false
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeBraces: Attach
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
ColumnLimit: 80
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: false
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
|
||||
IncludeCategories:
|
||||
- Regex: '^<.*\.h>'
|
||||
Priority: 1
|
||||
- Regex: '^<.*'
|
||||
Priority: 2
|
||||
- Regex: '.*'
|
||||
Priority: 3
|
||||
IndentCaseLabels: true
|
||||
IndentWidth: 2
|
||||
IndentWrappedFunctionNames: false
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: None
|
||||
ObjCBlockIndentWidth: 2
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: false
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
PointerAlignment: Right
|
||||
ReflowComments: true
|
||||
SortIncludes: false
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 2
|
||||
SpacesInAngles: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
Standard: Auto
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
...
|
||||
|
||||
41
.gitignore
vendored
41
.gitignore
vendored
@@ -29,36 +29,37 @@
|
||||
/examples/decode_with_drops
|
||||
/examples/decode_with_partial_drops
|
||||
/examples/example_xma
|
||||
/examples/lossless_encoder
|
||||
/examples/postproc
|
||||
/examples/resize_util
|
||||
/examples/set_maps
|
||||
/examples/simple_decoder
|
||||
/examples/simple_encoder
|
||||
/examples/twopass_encoder
|
||||
/examples/aom_cx_set_ref
|
||||
/examples/av1_spatial_scalable_encoder
|
||||
/examples/aom_temporal_scalable_patterns
|
||||
/examples/aom_temporal_svc_encoder
|
||||
/examples/vp8_multi_resolution_encoder
|
||||
/examples/vp8cx_set_ref
|
||||
/examples/vp9_lossless_encoder
|
||||
/examples/vp9_spatial_scalable_encoder
|
||||
/examples/vpx_temporal_scalable_patterns
|
||||
/examples/vpx_temporal_svc_encoder
|
||||
/ivfdec
|
||||
/ivfdec.dox
|
||||
/ivfenc
|
||||
/ivfenc.dox
|
||||
/libaom.so*
|
||||
/libaom.ver
|
||||
/libvpx.so*
|
||||
/libvpx.ver
|
||||
/samples.dox
|
||||
/test_intra_pred_speed
|
||||
/test_libaom
|
||||
/aom_api1_migration.dox
|
||||
/av1_rtcd.h
|
||||
/aom.pc
|
||||
/aom_config.c
|
||||
/aom_config.h
|
||||
/aom_dsp_rtcd.h
|
||||
/aom_scale_rtcd.h
|
||||
/aom_version.h
|
||||
/aomdec
|
||||
/aomdec.dox
|
||||
/aomenc
|
||||
/aomenc.dox
|
||||
/test_libvpx
|
||||
/vp8_api1_migration.dox
|
||||
/vp[89x]_rtcd.h
|
||||
/vpx.pc
|
||||
/vpx_config.c
|
||||
/vpx_config.h
|
||||
/vpx_dsp_rtcd.h
|
||||
/vpx_scale_rtcd.h
|
||||
/vpx_version.h
|
||||
/vpxdec
|
||||
/vpxdec.dox
|
||||
/vpxenc
|
||||
/vpxenc.dox
|
||||
TAGS
|
||||
|
||||
7
.mailmap
7
.mailmap
@@ -3,6 +3,7 @@ Aℓex Converse <aconverse@google.com>
|
||||
Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com>
|
||||
Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
|
||||
Alpha Lam <hclam@google.com> <hclam@chromium.org>
|
||||
Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
|
||||
Deb Mukherjee <debargha@google.com>
|
||||
Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
|
||||
Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
|
||||
@@ -13,12 +14,15 @@ Jim Bankoski <jimbankoski@google.com>
|
||||
Johann Koenig <johannkoenig@google.com>
|
||||
Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
|
||||
Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com>
|
||||
Johann Koenig <johannkoenig@google.com> <johannkoenig@chromium.org>
|
||||
John Koleszar <jkoleszar@google.com>
|
||||
Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
|
||||
Marco Paniconi <marpan@google.com>
|
||||
Marco Paniconi <marpan@google.com> <marpan@chromium.org>
|
||||
Pascal Massimino <pascal.massimino@gmail.com>
|
||||
Paul Wilkins <paulwilkins@google.com>
|
||||
Peter de Rivaz <peter.derivaz@gmail.com>
|
||||
Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com>
|
||||
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
|
||||
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
|
||||
Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
|
||||
@@ -26,7 +30,8 @@ Sami Pietilä <samipietila@google.com>
|
||||
Tamar Levy <tamar.levy@intel.com>
|
||||
Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
|
||||
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
|
||||
Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
|
||||
Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com>
|
||||
Tom Finegan <tomfinegan@google.com>
|
||||
Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
|
||||
Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
|
||||
Yaowu Xu <yaowu@google.com> <Yaowu Xu>
|
||||
|
||||
18
AUTHORS
18
AUTHORS
@@ -24,6 +24,7 @@ changjun.yang <changjun.yang@intel.com>
|
||||
Charles 'Buck' Krasic <ckrasic@google.com>
|
||||
chm <chm@rock-chips.com>
|
||||
Christian Duvivier <cduvivier@google.com>
|
||||
Daniele Castagna <dcastagna@chromium.org>
|
||||
Daniel Kang <ddkang@google.com>
|
||||
Deb Mukherjee <debargha@google.com>
|
||||
Dim Temp <dimtemp0@gmail.com>
|
||||
@@ -56,7 +57,7 @@ James Zern <jzern@google.com>
|
||||
Jan Gerber <j@mailb.org>
|
||||
Jan Kratochvil <jan.kratochvil@redhat.com>
|
||||
Janne Salonen <jsalonen@google.com>
|
||||
Jean-Marc Valin <jmvalin@jmvalin.ca>
|
||||
Jean-Yves Avenard <jyavenard@mozilla.com>
|
||||
Jeff Faust <jfaust@google.com>
|
||||
Jeff Muizelaar <jmuizelaar@mozilla.com>
|
||||
Jeff Petkau <jpet@chromium.org>
|
||||
@@ -65,7 +66,6 @@ Jian Zhou <zhoujian@google.com>
|
||||
Jim Bankoski <jimbankoski@google.com>
|
||||
Jingning Han <jingning@google.com>
|
||||
Joey Parrish <joeyparrish@google.com>
|
||||
Johann Koenig <johannkoenig@chromium.org>
|
||||
Johann Koenig <johannkoenig@google.com>
|
||||
John Koleszar <jkoleszar@google.com>
|
||||
Johnny Klonaris <google@jawknee.com>
|
||||
@@ -77,6 +77,7 @@ Justin Clift <justin@salasaga.org>
|
||||
Justin Lebar <justin.lebar@gmail.com>
|
||||
KO Myung-Hun <komh@chollian.net>
|
||||
Lawrence Velázquez <larryv@macports.org>
|
||||
Linfeng Zhang <linfengz@google.com>
|
||||
Lou Quillio <louquillio@google.com>
|
||||
Luca Barbato <lu_zero@gentoo.org>
|
||||
Makoto Kato <makoto.kt@gmail.com>
|
||||
@@ -92,7 +93,6 @@ Mike Hommey <mhommey@mozilla.com>
|
||||
Mikhal Shemer <mikhal@google.com>
|
||||
Minghai Shang <minghai@google.com>
|
||||
Morton Jonuschat <yabawock@gmail.com>
|
||||
Nathan E. Egge <negge@dgql.org>
|
||||
Nico Weber <thakis@chromium.org>
|
||||
Parag Salasakar <img.mips1@gmail.com>
|
||||
Pascal Massimino <pascal.massimino@gmail.com>
|
||||
@@ -101,7 +101,6 @@ Paul Wilkins <paulwilkins@google.com>
|
||||
Pavol Rusnak <stick@gk2.sk>
|
||||
Paweł Hajdan <phajdan@google.com>
|
||||
Pengchong Jin <pengchong@google.com>
|
||||
Peter de Rivaz <peter.derivaz@argondesign.com>
|
||||
Peter de Rivaz <peter.derivaz@gmail.com>
|
||||
Philip Jägenstedt <philipj@opera.com>
|
||||
Priit Laes <plaes@plaes.org>
|
||||
@@ -121,7 +120,6 @@ Sergey Ulanov <sergeyu@chromium.org>
|
||||
Shimon Doodkin <helpmepro1@gmail.com>
|
||||
Shunyao Li <shunyaoli@google.com>
|
||||
Stefan Holmer <holmer@google.com>
|
||||
Steinar Midtskogen <stemidts@cisco.com>
|
||||
Suman Sunkara <sunkaras@google.com>
|
||||
Taekhyun Kim <takim@nvidia.com>
|
||||
Takanori MATSUURA <t.matsuu@gmail.com>
|
||||
@@ -129,16 +127,16 @@ Tamar Levy <tamar.levy@intel.com>
|
||||
Tao Bai <michaelbai@chromium.org>
|
||||
Tero Rintaluoma <teror@google.com>
|
||||
Thijs Vermeir <thijsvermeir@gmail.com>
|
||||
Thomas Daede <tdaede@mozilla.com>
|
||||
Thomas Davies <thdavies@cisco.com>
|
||||
Thomas <thdavies@cisco.com>
|
||||
Tim Kopp <tkopp@google.com>
|
||||
Timothy B. Terriberry <tterribe@xiph.org>
|
||||
Tom Finegan <tomfinegan@google.com>
|
||||
Tristan Matthews <le.businessman@gmail.com>
|
||||
Tristan Matthews <tmatth@videolan.org>
|
||||
Vignesh Venkatasubramanian <vigneshv@google.com>
|
||||
Yaowu Xu <yaowu@google.com>
|
||||
Yi Luo <luoyi@google.com>
|
||||
Yongzhe Wang <yongzhe@google.com>
|
||||
Yunqing Wang <yunqingwang@google.com>
|
||||
Yury Gitman <yuryg@google.com>
|
||||
Zoe Liu <zoeliu@google.com>
|
||||
Google Inc.
|
||||
The Mozilla Foundation
|
||||
The Xiph.Org Foundation
|
||||
|
||||
34
CHANGELOG
34
CHANGELOG
@@ -1,9 +1,33 @@
|
||||
Next Release
|
||||
- Incompatible changes:
|
||||
The AV1 encoder's default keyframe interval changed to 128 from 9999.
|
||||
2016-07-20 v1.6.0 "Khaki Campbell Duck"
|
||||
This release improves upon the VP9 encoder and speeds up the encoding and
|
||||
decoding processes.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI incompatible with 1.5.0 due to a new 'color_range' enum
|
||||
in vpx_image and some minor changes to the VP8_COMP structure.
|
||||
|
||||
The default key frame interval for VP9 has changed from 128 to 9999.
|
||||
|
||||
- Enhancement:
|
||||
A core focus has been performance for low end Intel processors. SSSE3
|
||||
instructions such as 'pshufb' have been avoided and instructions have been
|
||||
reordered to better accommodate the more constrained pipelines.
|
||||
|
||||
As a result, devices based on Celeron processors have seen substantial
|
||||
decoding improvements. From Indian Runner Duck to Javan Whistling Duck,
|
||||
decoding speed improved between 10 and 30%. Between Javan Whistling Duck
|
||||
and Khaki Campbell Duck, it improved another 10 to 15%.
|
||||
|
||||
While Celeron benefited most, Core-i5 also improved 5% and 10% between the
|
||||
respective releases.
|
||||
|
||||
Realtime performance for WebRTC for both speed and quality has received a
|
||||
lot of attention.
|
||||
|
||||
- Bug Fixes:
|
||||
A number of fuzzing issues, found variously by Mozilla, Chromium and others,
|
||||
have been fixed and we strongly recommend updating.
|
||||
|
||||
2016-04-07 v0.1.0 "AOMedia Codec 1"
|
||||
This release is the first Alliance for Open Media codec.
|
||||
2015-11-09 v1.5.0 "Javan Whistling Duck"
|
||||
This release improves upon the VP9 encoder and speeds up the encoding and
|
||||
decoding processes.
|
||||
|
||||
270
CMakeLists.txt
270
CMakeLists.txt
@@ -1,270 +0,0 @@
|
||||
##
|
||||
## Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
##
|
||||
## This source code is subject to the terms of the BSD 2 Clause License and
|
||||
## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
## was not distributed with this source code in the LICENSE file, you can
|
||||
## obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
## Media Patent License 1.0 was not distributed with this source code in the
|
||||
## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
##
|
||||
cmake_minimum_required(VERSION 3.2)
|
||||
project(AOM C CXX)
|
||||
|
||||
set(AOM_ROOT "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
set(AOM_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
include("${AOM_ROOT}/build/cmake/aom_configure.cmake")
|
||||
|
||||
set(AOM_SRCS
|
||||
"${AOM_CONFIG_DIR}/aom_config.c"
|
||||
"${AOM_CONFIG_DIR}/aom_config.h"
|
||||
"${AOM_ROOT}/aom/aom.h"
|
||||
"${AOM_ROOT}/aom/aom_codec.h"
|
||||
"${AOM_ROOT}/aom/aom_decoder.h"
|
||||
"${AOM_ROOT}/aom/aom_encoder.h"
|
||||
"${AOM_ROOT}/aom/aom_frame_buffer.h"
|
||||
"${AOM_ROOT}/aom/aom_image.h"
|
||||
"${AOM_ROOT}/aom/aom_integer.h"
|
||||
"${AOM_ROOT}/aom/aomcx.h"
|
||||
"${AOM_ROOT}/aom/aomdx.h"
|
||||
"${AOM_ROOT}/aom/internal/aom_codec_internal.h"
|
||||
"${AOM_ROOT}/aom/src/aom_codec.c"
|
||||
"${AOM_ROOT}/aom/src/aom_decoder.c"
|
||||
"${AOM_ROOT}/aom/src/aom_encoder.c"
|
||||
"${AOM_ROOT}/aom/src/aom_image.c")
|
||||
|
||||
set(AOM_DSP_SRCS
|
||||
"${AOM_ROOT}/aom_dsp/aom_convolve.c"
|
||||
"${AOM_ROOT}/aom_dsp/aom_convolve.h"
|
||||
"${AOM_ROOT}/aom_dsp/aom_dsp_common.h"
|
||||
"${AOM_ROOT}/aom_dsp/aom_dsp_rtcd.c"
|
||||
"${AOM_ROOT}/aom_dsp/aom_filter.h"
|
||||
"${AOM_ROOT}/aom_dsp/aom_simd.c"
|
||||
"${AOM_ROOT}/aom_dsp/aom_simd.h"
|
||||
"${AOM_ROOT}/aom_dsp/aom_simd_inline.h"
|
||||
"${AOM_ROOT}/aom_dsp/avg.c"
|
||||
"${AOM_ROOT}/aom_dsp/bitreader.h"
|
||||
"${AOM_ROOT}/aom_dsp/bitreader_buffer.c"
|
||||
"${AOM_ROOT}/aom_dsp/bitreader_buffer.h"
|
||||
"${AOM_ROOT}/aom_dsp/bitwriter.h"
|
||||
"${AOM_ROOT}/aom_dsp/bitwriter_buffer.c"
|
||||
"${AOM_ROOT}/aom_dsp/bitwriter_buffer.h"
|
||||
"${AOM_ROOT}/aom_dsp/blend.h"
|
||||
"${AOM_ROOT}/aom_dsp/blend_a64_hmask.c"
|
||||
"${AOM_ROOT}/aom_dsp/blend_a64_mask.c"
|
||||
"${AOM_ROOT}/aom_dsp/blend_a64_vmask.c"
|
||||
"${AOM_ROOT}/aom_dsp/dkboolreader.c"
|
||||
"${AOM_ROOT}/aom_dsp/dkboolreader.h"
|
||||
"${AOM_ROOT}/aom_dsp/dkboolwriter.c"
|
||||
"${AOM_ROOT}/aom_dsp/dkboolwriter.h"
|
||||
"${AOM_ROOT}/aom_dsp/fwd_txfm.c"
|
||||
"${AOM_ROOT}/aom_dsp/fwd_txfm.h"
|
||||
"${AOM_ROOT}/aom_dsp/intrapred.c"
|
||||
"${AOM_ROOT}/aom_dsp/inv_txfm.c"
|
||||
"${AOM_ROOT}/aom_dsp/inv_txfm.h"
|
||||
"${AOM_ROOT}/aom_dsp/loopfilter.c"
|
||||
"${AOM_ROOT}/aom_dsp/prob.c"
|
||||
"${AOM_ROOT}/aom_dsp/prob.h"
|
||||
"${AOM_ROOT}/aom_dsp/psnr.c"
|
||||
"${AOM_ROOT}/aom_dsp/psnr.h"
|
||||
"${AOM_ROOT}/aom_dsp/quantize.c"
|
||||
"${AOM_ROOT}/aom_dsp/quantize.h"
|
||||
"${AOM_ROOT}/aom_dsp/sad.c"
|
||||
"${AOM_ROOT}/aom_dsp/simd/v128_intrinsics.h"
|
||||
"${AOM_ROOT}/aom_dsp/simd/v128_intrinsics_c.h"
|
||||
"${AOM_ROOT}/aom_dsp/simd/v256_intrinsics.h"
|
||||
"${AOM_ROOT}/aom_dsp/simd/v256_intrinsics_c.h"
|
||||
"${AOM_ROOT}/aom_dsp/simd/v64_intrinsics.h"
|
||||
"${AOM_ROOT}/aom_dsp/simd/v64_intrinsics_c.h"
|
||||
"${AOM_ROOT}/aom_dsp/subtract.c"
|
||||
"${AOM_ROOT}/aom_dsp/txfm_common.h"
|
||||
"${AOM_ROOT}/aom_dsp/variance.c"
|
||||
"${AOM_ROOT}/aom_dsp/variance.h")
|
||||
|
||||
set(AOM_MEM_SRCS
|
||||
"${AOM_ROOT}/aom_mem/aom_mem.c"
|
||||
"${AOM_ROOT}/aom_mem/aom_mem.h"
|
||||
"${AOM_ROOT}/aom_mem/include/aom_mem_intrnl.h")
|
||||
|
||||
set(AOM_SCALE_SRCS
|
||||
"${AOM_ROOT}/aom_scale/aom_scale.h"
|
||||
"${AOM_ROOT}/aom_scale/aom_scale_rtcd.c"
|
||||
"${AOM_ROOT}/aom_scale/generic/aom_scale.c"
|
||||
"${AOM_ROOT}/aom_scale/generic/gen_scalers.c"
|
||||
"${AOM_ROOT}/aom_scale/generic/yv12config.c"
|
||||
"${AOM_ROOT}/aom_scale/generic/yv12extend.c"
|
||||
"${AOM_ROOT}/aom_scale/yv12config.h")
|
||||
|
||||
# TODO(tomfinegan): Extract aom_ports from aom_util if possible.
|
||||
set(AOM_UTIL_SRCS
|
||||
"${AOM_ROOT}/aom_ports/aom_once.h"
|
||||
"${AOM_ROOT}/aom_ports/aom_timer.h"
|
||||
"${AOM_ROOT}/aom_ports/bitops.h"
|
||||
"${AOM_ROOT}/aom_ports/emmintrin_compat.h"
|
||||
"${AOM_ROOT}/aom_ports/mem.h"
|
||||
"${AOM_ROOT}/aom_ports/mem_ops.h"
|
||||
"${AOM_ROOT}/aom_ports/mem_ops_aligned.h"
|
||||
"${AOM_ROOT}/aom_ports/msvc.h"
|
||||
"${AOM_ROOT}/aom_ports/system_state.h"
|
||||
"${AOM_ROOT}/aom_util/aom_thread.c"
|
||||
"${AOM_ROOT}/aom_util/aom_thread.h"
|
||||
"${AOM_ROOT}/aom_util/endian_inl.h")
|
||||
|
||||
set(AOM_AV1_COMMON_SRCS
|
||||
"${AOM_ROOT}/av1/av1_iface_common.h"
|
||||
"${AOM_ROOT}/av1/common/alloccommon.c"
|
||||
"${AOM_ROOT}/av1/common/alloccommon.h"
|
||||
"${AOM_ROOT}/av1/common/av1_fwd_txfm.c"
|
||||
"${AOM_ROOT}/av1/common/av1_fwd_txfm.h"
|
||||
"${AOM_ROOT}/av1/common/av1_inv_txfm.c"
|
||||
"${AOM_ROOT}/av1/common/av1_inv_txfm.h"
|
||||
"${AOM_ROOT}/av1/common/av1_rtcd.c"
|
||||
"${AOM_ROOT}/av1/common/blockd.c"
|
||||
"${AOM_ROOT}/av1/common/blockd.h"
|
||||
"${AOM_ROOT}/av1/common/common.h"
|
||||
"${AOM_ROOT}/av1/common/common_data.h"
|
||||
"${AOM_ROOT}/av1/common/convolve.c"
|
||||
"${AOM_ROOT}/av1/common/convolve.h"
|
||||
"${AOM_ROOT}/av1/common/debugmodes.c"
|
||||
"${AOM_ROOT}/av1/common/entropy.c"
|
||||
"${AOM_ROOT}/av1/common/entropy.h"
|
||||
"${AOM_ROOT}/av1/common/entropymode.c"
|
||||
"${AOM_ROOT}/av1/common/entropymode.h"
|
||||
"${AOM_ROOT}/av1/common/entropymv.c"
|
||||
"${AOM_ROOT}/av1/common/entropymv.h"
|
||||
"${AOM_ROOT}/av1/common/enums.h"
|
||||
"${AOM_ROOT}/av1/common/filter.c"
|
||||
"${AOM_ROOT}/av1/common/filter.h"
|
||||
"${AOM_ROOT}/av1/common/frame_buffers.c"
|
||||
"${AOM_ROOT}/av1/common/frame_buffers.h"
|
||||
"${AOM_ROOT}/av1/common/idct.c"
|
||||
"${AOM_ROOT}/av1/common/idct.h"
|
||||
"${AOM_ROOT}/av1/common/loopfilter.c"
|
||||
"${AOM_ROOT}/av1/common/loopfilter.h"
|
||||
"${AOM_ROOT}/av1/common/mv.h"
|
||||
"${AOM_ROOT}/av1/common/mvref_common.c"
|
||||
"${AOM_ROOT}/av1/common/mvref_common.h"
|
||||
"${AOM_ROOT}/av1/common/odintrin.c"
|
||||
"${AOM_ROOT}/av1/common/odintrin.h"
|
||||
"${AOM_ROOT}/av1/common/onyxc_int.h"
|
||||
"${AOM_ROOT}/av1/common/pred_common.c"
|
||||
"${AOM_ROOT}/av1/common/pred_common.h"
|
||||
"${AOM_ROOT}/av1/common/quant_common.c"
|
||||
"${AOM_ROOT}/av1/common/quant_common.h"
|
||||
"${AOM_ROOT}/av1/common/reconinter.c"
|
||||
"${AOM_ROOT}/av1/common/reconinter.h"
|
||||
"${AOM_ROOT}/av1/common/reconintra.c"
|
||||
"${AOM_ROOT}/av1/common/reconintra.h"
|
||||
"${AOM_ROOT}/av1/common/scale.c"
|
||||
"${AOM_ROOT}/av1/common/scale.h"
|
||||
"${AOM_ROOT}/av1/common/scan.c"
|
||||
"${AOM_ROOT}/av1/common/scan.h"
|
||||
"${AOM_ROOT}/av1/common/seg_common.c"
|
||||
"${AOM_ROOT}/av1/common/seg_common.h"
|
||||
"${AOM_ROOT}/av1/common/thread_common.c"
|
||||
"${AOM_ROOT}/av1/common/thread_common.h"
|
||||
"${AOM_ROOT}/av1/common/tile_common.c"
|
||||
"${AOM_ROOT}/av1/common/tile_common.h")
|
||||
|
||||
set(AOM_AV1_DECODER_SRCS
|
||||
"${AOM_ROOT}/av1/av1_dx_iface.c"
|
||||
"${AOM_ROOT}/av1/decoder/decodeframe.c"
|
||||
"${AOM_ROOT}/av1/decoder/decodeframe.h"
|
||||
"${AOM_ROOT}/av1/decoder/decodemv.c"
|
||||
"${AOM_ROOT}/av1/decoder/decodemv.h"
|
||||
"${AOM_ROOT}/av1/decoder/decoder.c"
|
||||
"${AOM_ROOT}/av1/decoder/decoder.h"
|
||||
"${AOM_ROOT}/av1/decoder/detokenize.c"
|
||||
"${AOM_ROOT}/av1/decoder/detokenize.h"
|
||||
"${AOM_ROOT}/av1/decoder/dsubexp.c"
|
||||
"${AOM_ROOT}/av1/decoder/dsubexp.h"
|
||||
"${AOM_ROOT}/av1/decoder/dthread.c"
|
||||
"${AOM_ROOT}/av1/decoder/dthread.h")
|
||||
|
||||
set(AOM_AV1_ENCODER_SRCS
|
||||
"${AOM_ROOT}/av1/av1_cx_iface.c"
|
||||
"${AOM_ROOT}/av1/encoder/aq_complexity.c"
|
||||
"${AOM_ROOT}/av1/encoder/aq_complexity.h"
|
||||
"${AOM_ROOT}/av1/encoder/aq_cyclicrefresh.c"
|
||||
"${AOM_ROOT}/av1/encoder/aq_cyclicrefresh.h"
|
||||
"${AOM_ROOT}/av1/encoder/aq_variance.c"
|
||||
"${AOM_ROOT}/av1/encoder/aq_variance.h"
|
||||
"${AOM_ROOT}/av1/encoder/bitstream.c"
|
||||
"${AOM_ROOT}/av1/encoder/bitstream.h"
|
||||
"${AOM_ROOT}/av1/encoder/block.h"
|
||||
"${AOM_ROOT}/av1/encoder/context_tree.c"
|
||||
"${AOM_ROOT}/av1/encoder/context_tree.h"
|
||||
"${AOM_ROOT}/av1/encoder/cost.c"
|
||||
"${AOM_ROOT}/av1/encoder/cost.h"
|
||||
"${AOM_ROOT}/av1/encoder/dct.c"
|
||||
"${AOM_ROOT}/av1/encoder/encodeframe.c"
|
||||
"${AOM_ROOT}/av1/encoder/encodeframe.h"
|
||||
"${AOM_ROOT}/av1/encoder/encodemb.c"
|
||||
"${AOM_ROOT}/av1/encoder/encodemb.h"
|
||||
"${AOM_ROOT}/av1/encoder/encodemv.c"
|
||||
"${AOM_ROOT}/av1/encoder/encodemv.h"
|
||||
"${AOM_ROOT}/av1/encoder/encoder.c"
|
||||
"${AOM_ROOT}/av1/encoder/encoder.h"
|
||||
"${AOM_ROOT}/av1/encoder/ethread.c"
|
||||
"${AOM_ROOT}/av1/encoder/ethread.h"
|
||||
"${AOM_ROOT}/av1/encoder/extend.c"
|
||||
"${AOM_ROOT}/av1/encoder/extend.h"
|
||||
"${AOM_ROOT}/av1/encoder/firstpass.c"
|
||||
"${AOM_ROOT}/av1/encoder/firstpass.h"
|
||||
"${AOM_ROOT}/av1/encoder/hybrid_fwd_txfm.c"
|
||||
"${AOM_ROOT}/av1/encoder/hybrid_fwd_txfm.h"
|
||||
"${AOM_ROOT}/av1/encoder/lookahead.c"
|
||||
"${AOM_ROOT}/av1/encoder/lookahead.h"
|
||||
"${AOM_ROOT}/av1/encoder/mbgraph.c"
|
||||
"${AOM_ROOT}/av1/encoder/mbgraph.h"
|
||||
"${AOM_ROOT}/av1/encoder/mcomp.c"
|
||||
"${AOM_ROOT}/av1/encoder/mcomp.h"
|
||||
"${AOM_ROOT}/av1/encoder/picklpf.c"
|
||||
"${AOM_ROOT}/av1/encoder/picklpf.h"
|
||||
"${AOM_ROOT}/av1/encoder/quantize.c"
|
||||
"${AOM_ROOT}/av1/encoder/quantize.h"
|
||||
"${AOM_ROOT}/av1/encoder/ratectrl.c"
|
||||
"${AOM_ROOT}/av1/encoder/ratectrl.h"
|
||||
"${AOM_ROOT}/av1/encoder/rd.c"
|
||||
"${AOM_ROOT}/av1/encoder/rd.h"
|
||||
"${AOM_ROOT}/av1/encoder/rdopt.c"
|
||||
"${AOM_ROOT}/av1/encoder/rdopt.h"
|
||||
"${AOM_ROOT}/av1/encoder/resize.c"
|
||||
"${AOM_ROOT}/av1/encoder/resize.h"
|
||||
"${AOM_ROOT}/av1/encoder/segmentation.c"
|
||||
"${AOM_ROOT}/av1/encoder/segmentation.h"
|
||||
"${AOM_ROOT}/av1/encoder/speed_features.c"
|
||||
"${AOM_ROOT}/av1/encoder/speed_features.h"
|
||||
"${AOM_ROOT}/av1/encoder/subexp.c"
|
||||
"${AOM_ROOT}/av1/encoder/subexp.h"
|
||||
"${AOM_ROOT}/av1/encoder/temporal_filter.c"
|
||||
"${AOM_ROOT}/av1/encoder/temporal_filter.h"
|
||||
"${AOM_ROOT}/av1/encoder/tokenize.c"
|
||||
"${AOM_ROOT}/av1/encoder/tokenize.h"
|
||||
"${AOM_ROOT}/av1/encoder/treewriter.c"
|
||||
"${AOM_ROOT}/av1/encoder/treewriter.h")
|
||||
|
||||
# Targets
|
||||
add_library(aom_dsp ${AOM_DSP_SRCS})
|
||||
include_directories(${AOM_ROOT} ${AOM_CONFIG_DIR})
|
||||
add_library(aom_mem ${AOM_MEM_SRCS})
|
||||
add_library(aom_scale ${AOM_SCALE_SRCS})
|
||||
include_directories(${AOM_ROOT} ${AOM_CONFIG_DIR})
|
||||
add_library(aom_util ${AOM_UTIL_SRCS})
|
||||
add_library(aom_av1_decoder ${AOM_AV1_DECODER_SRCS})
|
||||
add_library(aom_av1_encoder ${AOM_AV1_ENCODER_SRCS})
|
||||
add_library(aom ${AOM_SRCS})
|
||||
target_link_libraries(aom LINK_PUBLIC
|
||||
aom_dsp
|
||||
aom_mem
|
||||
aom_scale
|
||||
aom_util
|
||||
aom_av1_decoder
|
||||
aom_av1_encoder)
|
||||
add_executable(simple_decoder examples/simple_decoder.c)
|
||||
include_directories(${AOM_ROOT})
|
||||
target_link_libraries(simple_decoder LINK_PUBLIC aom)
|
||||
add_executable(simple_encoder examples/simple_encoder.c)
|
||||
include_directories(${AOM_ROOT})
|
||||
target_link_libraries(simple_encoder LINK_PUBLIC aom)
|
||||
|
||||
42
LICENSE
42
LICENSE
@@ -1,27 +1,31 @@
|
||||
Copyright (c) 2016, Alliance for Open Media. All rights reserved.
|
||||
Copyright (c) 2010, The WebM Project authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
* Neither the name of Google, nor the WebM Project, nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
127
PATENTS
127
PATENTS
@@ -1,108 +1,23 @@
|
||||
Alliance for Open Media Patent License 1.0
|
||||
Additional IP Rights Grant (Patents)
|
||||
------------------------------------
|
||||
|
||||
1. License Terms.
|
||||
|
||||
1.1. Patent License. Subject to the terms and conditions of this License, each
|
||||
Licensor, on behalf of itself and successors in interest and assigns,
|
||||
grants Licensee a non-sublicensable, perpetual, worldwide, non-exclusive,
|
||||
no-charge, royalty-free, irrevocable (except as expressly stated in this
|
||||
License) patent license to its Necessary Claims to make, use, sell, offer
|
||||
for sale, import or distribute any Implementation.
|
||||
|
||||
1.2. Conditions.
|
||||
|
||||
1.2.1. Availability. As a condition to the grant of rights to Licensee to make,
|
||||
sell, offer for sale, import or distribute an Implementation under
|
||||
Section 1.1, Licensee must make its Necessary Claims available under
|
||||
this License, and must reproduce this License with any Implementation
|
||||
as follows:
|
||||
|
||||
a. For distribution in source code, by including this License in the
|
||||
root directory of the source code with its Implementation.
|
||||
|
||||
b. For distribution in any other form (including binary, object form,
|
||||
and/or hardware description code (e.g., HDL, RTL, Gate Level Netlist,
|
||||
GDSII, etc.)), by including this License in the documentation, legal
|
||||
notices, and/or other written materials provided with the
|
||||
Implementation.
|
||||
|
||||
1.2.2. Additional Conditions. This license is directly from Licensor to
|
||||
Licensee. Licensee acknowledges as a condition of benefiting from it
|
||||
that no rights from Licensor are received from suppliers, distributors,
|
||||
or otherwise in connection with this License.
|
||||
|
||||
1.3. Defensive Termination. If any Licensee, its Affiliates, or its agents
|
||||
initiates patent litigation or files, maintains, or voluntarily
|
||||
participates in a lawsuit against another entity or any person asserting
|
||||
that any Implementation infringes Necessary Claims, any patent licenses
|
||||
granted under this License directly to the Licensee are immediately
|
||||
terminated as of the date of the initiation of action unless 1) that suit
|
||||
was in response to a corresponding suit regarding an Implementation first
|
||||
brought against an initiating entity, or 2) that suit was brought to
|
||||
enforce the terms of this License (including intervention in a third-party
|
||||
action by a Licensee).
|
||||
|
||||
1.4. Disclaimers. The Reference Implementation and Specification are provided
|
||||
"AS IS" and without warranty. The entire risk as to implementing or
|
||||
otherwise using the Reference Implementation or Specification is assumed
|
||||
by the implementer and user. Licensor expressly disclaims any warranties
|
||||
(express, implied, or otherwise), including implied warranties of
|
||||
merchantability, non-infringement, fitness for a particular purpose, or
|
||||
title, related to the material. IN NO EVENT WILL LICENSOR BE LIABLE TO
|
||||
ANY OTHER PARTY FOR LOST PROFITS OR ANY FORM OF INDIRECT, SPECIAL,
|
||||
INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER FROM ANY CAUSES OF
|
||||
ACTION OF ANY KIND WITH RESPECT TO THIS LICENSE, WHETHER BASED ON BREACH
|
||||
OF CONTRACT, TORT (INCLUDING NEGLIGENCE), OR OTHERWISE, AND WHETHER OR
|
||||
NOT THE OTHER PARTRY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
2. Definitions.
|
||||
|
||||
2.1. Affiliate. <20>Affiliate<74> means an entity that directly or indirectly
|
||||
Controls, is Controlled by, or is under common Control of that party.
|
||||
|
||||
2.2. Control. <20>Control<6F> means direct or indirect control of more than 50% of
|
||||
the voting power to elect directors of that corporation, or for any other
|
||||
entity, the power to direct management of such entity.
|
||||
|
||||
2.3. Decoder. "Decoder" means any decoder that conforms fully with all
|
||||
non-optional portions of the Specification.
|
||||
|
||||
2.4. Encoder. "Encoder" means any encoder that produces a bitstream that can
|
||||
be decoded by a Decoder only to the extent it produces such a bitstream.
|
||||
|
||||
2.5. Final Deliverable. <20>Final Deliverable<6C> means the final version of a
|
||||
deliverable approved by the Alliance for Open Media as a Final
|
||||
Deliverable.
|
||||
|
||||
2.6. Implementation. "Implementation" means any implementation, including the
|
||||
Reference Implementation, that is an Encoder and/or a Decoder. An
|
||||
Implementation also includes components of an Implementation only to the
|
||||
extent they are used as part of an Implementation.
|
||||
|
||||
2.7. License. <20>License<73> means this license.
|
||||
|
||||
2.8. Licensee. <20>Licensee<65> means any person or entity who exercises patent
|
||||
rights granted under this License.
|
||||
|
||||
2.9. Licensor. "Licensor" means (i) any Licensee that makes, sells, offers
|
||||
for sale, imports or distributes any Implementation, or (ii) a person
|
||||
or entity that has a licensing obligation to the Implementation as a
|
||||
result of its membership and/or participation in the Alliance for Open
|
||||
Media working group that developed the Specification.
|
||||
|
||||
2.10. Necessary Claims. "Necessary Claims" means all claims of patents or
|
||||
patent applications, (a) that currently or at any time in the future,
|
||||
are owned or controlled by the Licensor, and (b) (i) would be an
|
||||
Essential Claim as defined by the W3C Policy as of February 5, 2004
|
||||
(https://www.w3.org/Consortium/Patent-Policy-20040205/#def-essential)
|
||||
as if the Specification was a W3C Recommendation; or (ii) are infringed
|
||||
by the Reference Implementation.
|
||||
|
||||
2.11. Reference Implementation. <20>Reference Implementation<6F> means an Encoder
|
||||
and/or Decoder released by the Alliance for Open Media as a Final
|
||||
Deliverable.
|
||||
|
||||
2.12. Specification. <20>Specification<6F> means the specification designated by
|
||||
the Alliance for Open Media as a Final Deliverable for which this
|
||||
License was issued.
|
||||
"These implementations" means the copyrightable works that implement the WebM
|
||||
codecs distributed by Google as part of the WebM Project.
|
||||
|
||||
Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge,
|
||||
royalty-free, irrevocable (except as stated in this section) patent license to
|
||||
make, have made, use, offer to sell, sell, import, transfer, and otherwise
|
||||
run, modify and propagate the contents of these implementations of WebM, where
|
||||
such license applies only to those patent claims, both currently owned by
|
||||
Google and acquired in the future, licensable by Google that are necessarily
|
||||
infringed by these implementations of WebM. This grant does not include claims
|
||||
that would be infringed only as a consequence of further modification of these
|
||||
implementations. If you or your agent or exclusive licensee institute or order
|
||||
or agree to the institution of patent litigation or any other patent
|
||||
enforcement activity against any entity (including a cross-claim or
|
||||
counterclaim in a lawsuit) alleging that any of these implementations of WebM
|
||||
or any code incorporated within any of these implementations of WebM
|
||||
constitute direct or contributory patent infringement, or inducement of
|
||||
patent infringement, then any patent rights granted to you under this License
|
||||
for these implementations of WebM shall terminate as of the date such
|
||||
litigation is filed.
|
||||
|
||||
36
README
36
README
@@ -1,6 +1,6 @@
|
||||
README - 23 March 2015
|
||||
README - 20 July 2016
|
||||
|
||||
Welcome to the WebM VP8/AV1 Codec SDK!
|
||||
Welcome to the WebM VP8/VP9 Codec SDK!
|
||||
|
||||
COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
The build system used is similar to autotools. Building generally consists of
|
||||
@@ -33,13 +33,13 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ ../libaom/configure <options>
|
||||
$ ../libvpx/configure <options>
|
||||
$ make
|
||||
|
||||
3. Configuration options
|
||||
The 'configure' script supports a number of options. The --help option can be
|
||||
used to get a list of supported options:
|
||||
$ ../libaom/configure --help
|
||||
$ ../libvpx/configure --help
|
||||
|
||||
4. Cross development
|
||||
For cross development, the most notable option is the --target option. The
|
||||
@@ -79,6 +79,9 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
x86-os2-gcc
|
||||
x86-solaris-gcc
|
||||
x86-win32-gcc
|
||||
x86-win32-vs7
|
||||
x86-win32-vs8
|
||||
x86-win32-vs9
|
||||
x86-win32-vs10
|
||||
x86-win32-vs11
|
||||
x86-win32-vs12
|
||||
@@ -95,6 +98,8 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
x86_64-linux-icc
|
||||
x86_64-solaris-gcc
|
||||
x86_64-win64-gcc
|
||||
x86_64-win64-vs8
|
||||
x86_64-win64-vs9
|
||||
x86_64-win64-vs10
|
||||
x86_64-win64-vs11
|
||||
x86_64-win64-vs12
|
||||
@@ -108,7 +113,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
toolchain, the following command could be used (note, POSIX SH syntax, adapt
|
||||
to your shell as necessary):
|
||||
|
||||
$ CROSS=mipsel-linux-uclibc- ../libaom/configure
|
||||
$ CROSS=mipsel-linux-uclibc- ../libvpx/configure
|
||||
|
||||
In addition, the executables to be invoked can be overridden by specifying the
|
||||
environment variables: CC, AR, LD, AS, STRIP, NM. Additional flags can be
|
||||
@@ -119,28 +124,13 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
This defaults to config.log. This should give a good indication of what went
|
||||
wrong. If not, contact us for support.
|
||||
|
||||
VP8/AV1 TEST VECTORS:
|
||||
VP8/VP9 TEST VECTORS:
|
||||
The test vectors can be downloaded and verified using the build system after
|
||||
running configure. To specify an alternate directory the
|
||||
LIBAOM_TEST_DATA_PATH environment variable can be used.
|
||||
LIBVPX_TEST_DATA_PATH environment variable can be used.
|
||||
|
||||
$ ./configure --enable-unit-tests
|
||||
$ LIBAOM_TEST_DATA_PATH=../-test-data make testdata
|
||||
|
||||
CODE STYLE:
|
||||
The coding style used by this project is enforced with clang-format using the
|
||||
configuration contained in the .clang-format file in the root of the
|
||||
repository.
|
||||
|
||||
Before pushing changes for review you can format your code with:
|
||||
# Apply clang-format to modified .c, .h and .cc files
|
||||
$ clang-format -i --style=file \
|
||||
$(git diff --name-only --diff-filter=ACMR '*.[hc]' '*.cc')
|
||||
|
||||
Check the .clang-format file for the version used to generate it if there is
|
||||
any difference between your local formatting and the review system.
|
||||
|
||||
See also: http://clang.llvm.org/docs/ClangFormat.html
|
||||
$ LIBVPX_TEST_DATA_PATH=../libvpx-test-data make testdata
|
||||
|
||||
SUPPORT
|
||||
This library is an open source project supported by its community. Please
|
||||
|
||||
160
aom/aom.h
160
aom/aom.h
@@ -1,160 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
/*!\defgroup aom AOM
|
||||
* \ingroup codecs
|
||||
* AOM is aom's newest video compression algorithm that uses motion
|
||||
* compensated prediction, Discrete Cosine Transform (DCT) coding of the
|
||||
* prediction error signal and context dependent entropy coding techniques
|
||||
* based on arithmetic principles. It features:
|
||||
* - YUV 4:2:0 image format
|
||||
* - Macro-block based coding (16x16 luma plus two 8x8 chroma)
|
||||
* - 1/4 (1/8) pixel accuracy motion compensated prediction
|
||||
* - 4x4 DCT transform
|
||||
* - 128 level linear quantizer
|
||||
* - In loop deblocking filter
|
||||
* - Context-based entropy coding
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
/*!\file
|
||||
* \brief Provides controls common to both the AOM encoder and decoder.
|
||||
*/
|
||||
#ifndef AOM_AOM_H_
|
||||
#define AOM_AOM_H_
|
||||
|
||||
#include "./aom_codec.h"
|
||||
#include "./aom_image.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*!\brief Control functions
|
||||
*
|
||||
* The set of macros define the control functions of AOM interface
|
||||
*/
|
||||
enum aom_com_control_id {
|
||||
/*!\brief pass in an external frame into decoder to be used as reference frame
|
||||
*/
|
||||
AOM_SET_REFERENCE = 1,
|
||||
AOM_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
|
||||
AOM_SET_POSTPROC = 3, /**< set the decoder's post processing settings */
|
||||
AOM_SET_DBG_COLOR_REF_FRAME =
|
||||
4, /**< set the reference frames to color for each macroblock */
|
||||
AOM_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */
|
||||
AOM_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */
|
||||
AOM_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */
|
||||
|
||||
/* TODO(jkoleszar): The encoder incorrectly reuses some of these values (5+)
|
||||
* for its control ids. These should be migrated to something like the
|
||||
* AOM_DECODER_CTRL_ID_START range next time we're ready to break the ABI.
|
||||
*/
|
||||
AV1_GET_REFERENCE = 128, /**< get a pointer to a reference frame */
|
||||
AOM_COMMON_CTRL_ID_MAX,
|
||||
|
||||
AV1_GET_NEW_FRAME_IMAGE = 192, /**< get a pointer to the new frame */
|
||||
|
||||
AOM_DECODER_CTRL_ID_START = 256
|
||||
};
|
||||
|
||||
/*!\brief post process flags
|
||||
*
|
||||
* The set of macros define AOM decoder post processing flags
|
||||
*/
|
||||
enum aom_postproc_level {
|
||||
AOM_NOFILTERING = 0,
|
||||
AOM_DEBLOCK = 1 << 0,
|
||||
AOM_DEMACROBLOCK = 1 << 1,
|
||||
AOM_ADDNOISE = 1 << 2,
|
||||
AOM_DEBUG_TXT_FRAME_INFO = 1 << 3, /**< print frame information */
|
||||
AOM_DEBUG_TXT_MBLK_MODES =
|
||||
1 << 4, /**< print macro block modes over each macro block */
|
||||
AOM_DEBUG_TXT_DC_DIFF = 1 << 5, /**< print dc diff for each macro block */
|
||||
AOM_DEBUG_TXT_RATE_INFO = 1 << 6, /**< print video rate info (encoder only) */
|
||||
AOM_MFQE = 1 << 10
|
||||
};
|
||||
|
||||
/*!\brief post process flags
|
||||
*
|
||||
* This define a structure that describe the post processing settings. For
|
||||
* the best objective measure (using the PSNR metric) set post_proc_flag
|
||||
* to AOM_DEBLOCK and deblocking_level to 1.
|
||||
*/
|
||||
|
||||
typedef struct aom_postproc_cfg {
|
||||
/*!\brief the types of post processing to be done, should be combination of
|
||||
* "aom_postproc_level" */
|
||||
int post_proc_flag;
|
||||
int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */
|
||||
int noise_level; /**< the strength of additive noise, valid range [0, 16] */
|
||||
} aom_postproc_cfg_t;
|
||||
|
||||
/*!\brief reference frame type
|
||||
*
|
||||
* The set of macros define the type of AOM reference frames
|
||||
*/
|
||||
typedef enum aom_ref_frame_type {
|
||||
AOM_LAST_FRAME = 1,
|
||||
AOM_GOLD_FRAME = 2,
|
||||
AOM_ALTR_FRAME = 4
|
||||
} aom_ref_frame_type_t;
|
||||
|
||||
/*!\brief reference frame data struct
|
||||
*
|
||||
* Define the data struct to access aom reference frames.
|
||||
*/
|
||||
typedef struct aom_ref_frame {
|
||||
aom_ref_frame_type_t frame_type; /**< which reference frame */
|
||||
aom_image_t img; /**< reference frame data in image format */
|
||||
} aom_ref_frame_t;
|
||||
|
||||
/*!\brief AV1 specific reference frame data struct
|
||||
*
|
||||
* Define the data struct to access av1 reference frames.
|
||||
*/
|
||||
typedef struct av1_ref_frame {
|
||||
int idx; /**< frame index to get (input) */
|
||||
aom_image_t img; /**< img structure to populate (output) */
|
||||
} av1_ref_frame_t;
|
||||
|
||||
/*!\cond */
|
||||
/*!\brief aom decoder control function parameter type
|
||||
*
|
||||
* defines the data type for each of AOM decoder control function requires
|
||||
*/
|
||||
AOM_CTRL_USE_TYPE(AOM_SET_REFERENCE, aom_ref_frame_t *)
|
||||
#define AOM_CTRL_AOM_SET_REFERENCE
|
||||
AOM_CTRL_USE_TYPE(AOM_COPY_REFERENCE, aom_ref_frame_t *)
|
||||
#define AOM_CTRL_AOM_COPY_REFERENCE
|
||||
AOM_CTRL_USE_TYPE(AOM_SET_POSTPROC, aom_postproc_cfg_t *)
|
||||
#define AOM_CTRL_AOM_SET_POSTPROC
|
||||
AOM_CTRL_USE_TYPE(AOM_SET_DBG_COLOR_REF_FRAME, int)
|
||||
#define AOM_CTRL_AOM_SET_DBG_COLOR_REF_FRAME
|
||||
AOM_CTRL_USE_TYPE(AOM_SET_DBG_COLOR_MB_MODES, int)
|
||||
#define AOM_CTRL_AOM_SET_DBG_COLOR_MB_MODES
|
||||
AOM_CTRL_USE_TYPE(AOM_SET_DBG_COLOR_B_MODES, int)
|
||||
#define AOM_CTRL_AOM_SET_DBG_COLOR_B_MODES
|
||||
AOM_CTRL_USE_TYPE(AOM_SET_DBG_DISPLAY_MV, int)
|
||||
#define AOM_CTRL_AOM_SET_DBG_DISPLAY_MV
|
||||
AOM_CTRL_USE_TYPE(AV1_GET_REFERENCE, av1_ref_frame_t *)
|
||||
#define AOM_CTRL_AV1_GET_REFERENCE
|
||||
AOM_CTRL_USE_TYPE(AV1_GET_NEW_FRAME_IMAGE, aom_image_t *)
|
||||
#define AOM_CTRL_AV1_GET_NEW_FRAME_IMAGE
|
||||
|
||||
/*!\endcond */
|
||||
/*! @} - end defgroup aom */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_AOM_H_
|
||||
487
aom/aom_codec.h
487
aom/aom_codec.h
@@ -1,487 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
/*!\defgroup codec Common Algorithm Interface
|
||||
* This abstraction allows applications to easily support multiple video
|
||||
* formats with minimal code duplication. This section describes the interface
|
||||
* common to all codecs (both encoders and decoders).
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*!\file
|
||||
* \brief Describes the codec algorithm interface to applications.
|
||||
*
|
||||
* This file describes the interface between an application and a
|
||||
* video codec algorithm.
|
||||
*
|
||||
* An application instantiates a specific codec instance by using
|
||||
* aom_codec_init() and a pointer to the algorithm's interface structure:
|
||||
* <pre>
|
||||
* my_app.c:
|
||||
* extern aom_codec_iface_t my_codec;
|
||||
* {
|
||||
* aom_codec_ctx_t algo;
|
||||
* res = aom_codec_init(&algo, &my_codec);
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* Once initialized, the instance is manged using other functions from
|
||||
* the aom_codec_* family.
|
||||
*/
|
||||
#ifndef AOM_AOM_CODEC_H_
|
||||
#define AOM_AOM_CODEC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "./aom_integer.h"
|
||||
#include "./aom_image.h"
|
||||
|
||||
/*!\brief Decorator indicating a function is deprecated */
|
||||
#ifndef DEPRECATED
|
||||
#if defined(__GNUC__) && __GNUC__
|
||||
#define DEPRECATED __attribute__((deprecated))
|
||||
#elif defined(_MSC_VER)
|
||||
#define DEPRECATED
|
||||
#else
|
||||
#define DEPRECATED
|
||||
#endif
|
||||
#endif /* DEPRECATED */
|
||||
|
||||
#ifndef DECLSPEC_DEPRECATED
|
||||
#if defined(__GNUC__) && __GNUC__
|
||||
#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
|
||||
#elif defined(_MSC_VER)
|
||||
/*!\brief \copydoc #DEPRECATED */
|
||||
#define DECLSPEC_DEPRECATED __declspec(deprecated)
|
||||
#else
|
||||
#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
|
||||
#endif
|
||||
#endif /* DECLSPEC_DEPRECATED */
|
||||
|
||||
/*!\brief Decorator indicating a function is potentially unused */
|
||||
#ifdef UNUSED
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#define UNUSED __attribute__((unused))
|
||||
#else
|
||||
#define UNUSED
|
||||
#endif
|
||||
|
||||
/*!\brief Decorator indicating that given struct/union/enum is packed */
|
||||
#ifndef ATTRIBUTE_PACKED
|
||||
#if defined(__GNUC__) && __GNUC__
|
||||
#define ATTRIBUTE_PACKED __attribute__((packed))
|
||||
#elif defined(_MSC_VER)
|
||||
#define ATTRIBUTE_PACKED
|
||||
#else
|
||||
#define ATTRIBUTE_PACKED
|
||||
#endif
|
||||
#endif /* ATTRIBUTE_PACKED */
|
||||
|
||||
/*!\brief Current ABI version number
|
||||
*
|
||||
* \internal
|
||||
* If this file is altered in any way that changes the ABI, this value
|
||||
* must be bumped. Examples include, but are not limited to, changing
|
||||
* types, removing or reassigning enums, adding/removing/rearranging
|
||||
* fields to structures
|
||||
*/
|
||||
#define AOM_CODEC_ABI_VERSION (3 + AOM_IMAGE_ABI_VERSION) /**<\hideinitializer*/
|
||||
|
||||
/*!\brief Algorithm return codes */
|
||||
typedef enum {
|
||||
/*!\brief Operation completed without error */
|
||||
AOM_CODEC_OK,
|
||||
|
||||
/*!\brief Unspecified error */
|
||||
AOM_CODEC_ERROR,
|
||||
|
||||
/*!\brief Memory operation failed */
|
||||
AOM_CODEC_MEM_ERROR,
|
||||
|
||||
/*!\brief ABI version mismatch */
|
||||
AOM_CODEC_ABI_MISMATCH,
|
||||
|
||||
/*!\brief Algorithm does not have required capability */
|
||||
AOM_CODEC_INCAPABLE,
|
||||
|
||||
/*!\brief The given bitstream is not supported.
|
||||
*
|
||||
* The bitstream was unable to be parsed at the highest level. The decoder
|
||||
* is unable to proceed. This error \ref SHOULD be treated as fatal to the
|
||||
* stream. */
|
||||
AOM_CODEC_UNSUP_BITSTREAM,
|
||||
|
||||
/*!\brief Encoded bitstream uses an unsupported feature
|
||||
*
|
||||
* The decoder does not implement a feature required by the encoder. This
|
||||
* return code should only be used for features that prevent future
|
||||
* pictures from being properly decoded. This error \ref MAY be treated as
|
||||
* fatal to the stream or \ref MAY be treated as fatal to the current GOP.
|
||||
*/
|
||||
AOM_CODEC_UNSUP_FEATURE,
|
||||
|
||||
/*!\brief The coded data for this stream is corrupt or incomplete
|
||||
*
|
||||
* There was a problem decoding the current frame. This return code
|
||||
* should only be used for failures that prevent future pictures from
|
||||
* being properly decoded. This error \ref MAY be treated as fatal to the
|
||||
* stream or \ref MAY be treated as fatal to the current GOP. If decoding
|
||||
* is continued for the current GOP, artifacts may be present.
|
||||
*/
|
||||
AOM_CODEC_CORRUPT_FRAME,
|
||||
|
||||
/*!\brief An application-supplied parameter is not valid.
|
||||
*
|
||||
*/
|
||||
AOM_CODEC_INVALID_PARAM,
|
||||
|
||||
/*!\brief An iterator reached the end of list.
|
||||
*
|
||||
*/
|
||||
AOM_CODEC_LIST_END
|
||||
|
||||
} aom_codec_err_t;
|
||||
|
||||
/*! \brief Codec capabilities bitfield
|
||||
*
|
||||
* Each codec advertises the capabilities it supports as part of its
|
||||
* ::aom_codec_iface_t interface structure. Capabilities are extra interfaces
|
||||
* or functionality, and are not required to be supported.
|
||||
*
|
||||
* The available flags are specified by AOM_CODEC_CAP_* defines.
|
||||
*/
|
||||
typedef long aom_codec_caps_t;
|
||||
#define AOM_CODEC_CAP_DECODER 0x1 /**< Is a decoder */
|
||||
#define AOM_CODEC_CAP_ENCODER 0x2 /**< Is an encoder */
|
||||
|
||||
/*! \brief Initialization-time Feature Enabling
|
||||
*
|
||||
* Certain codec features must be known at initialization time, to allow for
|
||||
* proper memory allocation.
|
||||
*
|
||||
* The available flags are specified by AOM_CODEC_USE_* defines.
|
||||
*/
|
||||
typedef long aom_codec_flags_t;
|
||||
|
||||
/*!\brief Codec interface structure.
|
||||
*
|
||||
* Contains function pointers and other data private to the codec
|
||||
* implementation. This structure is opaque to the application.
|
||||
*/
|
||||
typedef const struct aom_codec_iface aom_codec_iface_t;
|
||||
|
||||
/*!\brief Codec private data structure.
|
||||
*
|
||||
* Contains data private to the codec implementation. This structure is opaque
|
||||
* to the application.
|
||||
*/
|
||||
typedef struct aom_codec_priv aom_codec_priv_t;
|
||||
|
||||
/*!\brief Iterator
|
||||
*
|
||||
* Opaque storage used for iterating over lists.
|
||||
*/
|
||||
typedef const void *aom_codec_iter_t;
|
||||
|
||||
/*!\brief Codec context structure
|
||||
*
|
||||
* All codecs \ref MUST support this context structure fully. In general,
|
||||
* this data should be considered private to the codec algorithm, and
|
||||
* not be manipulated or examined by the calling application. Applications
|
||||
* may reference the 'name' member to get a printable description of the
|
||||
* algorithm.
|
||||
*/
|
||||
typedef struct aom_codec_ctx {
|
||||
const char *name; /**< Printable interface name */
|
||||
aom_codec_iface_t *iface; /**< Interface pointers */
|
||||
aom_codec_err_t err; /**< Last returned error */
|
||||
const char *err_detail; /**< Detailed info, if available */
|
||||
aom_codec_flags_t init_flags; /**< Flags passed at init time */
|
||||
union {
|
||||
/**< Decoder Configuration Pointer */
|
||||
const struct aom_codec_dec_cfg *dec;
|
||||
/**< Encoder Configuration Pointer */
|
||||
const struct aom_codec_enc_cfg *enc;
|
||||
const void *raw;
|
||||
} config; /**< Configuration pointer aliasing union */
|
||||
aom_codec_priv_t *priv; /**< Algorithm private storage */
|
||||
} aom_codec_ctx_t;
|
||||
|
||||
/*!\brief Bit depth for codec
|
||||
* *
|
||||
* This enumeration determines the bit depth of the codec.
|
||||
*/
|
||||
typedef enum aom_bit_depth {
|
||||
AOM_BITS_8 = 8, /**< 8 bits */
|
||||
AOM_BITS_10 = 10, /**< 10 bits */
|
||||
AOM_BITS_12 = 12, /**< 12 bits */
|
||||
} aom_bit_depth_t;
|
||||
|
||||
/*!\brief Superblock size selection.
|
||||
*
|
||||
* Defines the superblock size used for encoding. The superblock size can
|
||||
* either be fixed at 64x64 or 128x128 pixels, or it can be dynamically
|
||||
* selected by the encoder for each frame.
|
||||
*/
|
||||
typedef enum aom_superblock_size {
|
||||
AOM_SUPERBLOCK_SIZE_64X64, /**< Always use 64x64 superblocks. */
|
||||
AOM_SUPERBLOCK_SIZE_128X128, /**< Always use 128x128 superblocks. */
|
||||
AOM_SUPERBLOCK_SIZE_DYNAMIC /**< Select superblock size dynamically. */
|
||||
} aom_superblock_size_t;
|
||||
|
||||
/*
|
||||
* Library Version Number Interface
|
||||
*
|
||||
* For example, see the following sample return values:
|
||||
* aom_codec_version() (1<<16 | 2<<8 | 3)
|
||||
* aom_codec_version_str() "v1.2.3-rc1-16-gec6a1ba"
|
||||
* aom_codec_version_extra_str() "rc1-16-gec6a1ba"
|
||||
*/
|
||||
|
||||
/*!\brief Return the version information (as an integer)
|
||||
*
|
||||
* Returns a packed encoding of the library version number. This will only
|
||||
* include
|
||||
* the major.minor.patch component of the version number. Note that this encoded
|
||||
* value should be accessed through the macros provided, as the encoding may
|
||||
* change
|
||||
* in the future.
|
||||
*
|
||||
*/
|
||||
int aom_codec_version(void);
|
||||
#define AOM_VERSION_MAJOR(v) \
|
||||
((v >> 16) & 0xff) /**< extract major from packed version */
|
||||
#define AOM_VERSION_MINOR(v) \
|
||||
((v >> 8) & 0xff) /**< extract minor from packed version */
|
||||
#define AOM_VERSION_PATCH(v) \
|
||||
((v >> 0) & 0xff) /**< extract patch from packed version */
|
||||
|
||||
/*!\brief Return the version major number */
|
||||
#define aom_codec_version_major() ((aom_codec_version() >> 16) & 0xff)
|
||||
|
||||
/*!\brief Return the version minor number */
|
||||
#define aom_codec_version_minor() ((aom_codec_version() >> 8) & 0xff)
|
||||
|
||||
/*!\brief Return the version patch number */
|
||||
#define aom_codec_version_patch() ((aom_codec_version() >> 0) & 0xff)
|
||||
|
||||
/*!\brief Return the version information (as a string)
|
||||
*
|
||||
* Returns a printable string containing the full library version number. This
|
||||
* may
|
||||
* contain additional text following the three digit version number, as to
|
||||
* indicate
|
||||
* release candidates, prerelease versions, etc.
|
||||
*
|
||||
*/
|
||||
const char *aom_codec_version_str(void);
|
||||
|
||||
/*!\brief Return the version information (as a string)
|
||||
*
|
||||
* Returns a printable "extra string". This is the component of the string
|
||||
* returned
|
||||
* by aom_codec_version_str() following the three digit version number.
|
||||
*
|
||||
*/
|
||||
const char *aom_codec_version_extra_str(void);
|
||||
|
||||
/*!\brief Return the build configuration
|
||||
*
|
||||
* Returns a printable string containing an encoded version of the build
|
||||
* configuration. This may be useful to aom support.
|
||||
*
|
||||
*/
|
||||
const char *aom_codec_build_config(void);
|
||||
|
||||
/*!\brief Return the name for a given interface
|
||||
*
|
||||
* Returns a human readable string for name of the given codec interface.
|
||||
*
|
||||
* \param[in] iface Interface pointer
|
||||
*
|
||||
*/
|
||||
const char *aom_codec_iface_name(aom_codec_iface_t *iface);
|
||||
|
||||
/*!\brief Convert error number to printable string
|
||||
*
|
||||
* Returns a human readable string for the last error returned by the
|
||||
* algorithm. The returned error will be one line and will not contain
|
||||
* any newline characters.
|
||||
*
|
||||
*
|
||||
* \param[in] err Error number.
|
||||
*
|
||||
*/
|
||||
const char *aom_codec_err_to_string(aom_codec_err_t err);
|
||||
|
||||
/*!\brief Retrieve error synopsis for codec context
|
||||
*
|
||||
* Returns a human readable string for the last error returned by the
|
||||
* algorithm. The returned error will be one line and will not contain
|
||||
* any newline characters.
|
||||
*
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context.
|
||||
*
|
||||
*/
|
||||
const char *aom_codec_error(aom_codec_ctx_t *ctx);
|
||||
|
||||
/*!\brief Retrieve detailed error information for codec context
|
||||
*
|
||||
* Returns a human readable string providing detailed information about
|
||||
* the last error.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context.
|
||||
*
|
||||
* \retval NULL
|
||||
* No detailed information is available.
|
||||
*/
|
||||
const char *aom_codec_error_detail(aom_codec_ctx_t *ctx);
|
||||
|
||||
/* REQUIRED FUNCTIONS
|
||||
*
|
||||
* The following functions are required to be implemented for all codecs.
|
||||
* They represent the base case functionality expected of all codecs.
|
||||
*/
|
||||
|
||||
/*!\brief Destroy a codec instance
|
||||
*
|
||||
* Destroys a codec context, freeing any associated memory buffers.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* The codec algorithm initialized.
|
||||
* \retval #AOM_CODEC_MEM_ERROR
|
||||
* Memory allocation failed.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx);
|
||||
|
||||
/*!\brief Get the capabilities of an algorithm.
|
||||
*
|
||||
* Retrieves the capabilities bitfield from the algorithm's interface.
|
||||
*
|
||||
* \param[in] iface Pointer to the algorithm interface
|
||||
*
|
||||
*/
|
||||
aom_codec_caps_t aom_codec_get_caps(aom_codec_iface_t *iface);
|
||||
|
||||
/*!\brief Control algorithm
|
||||
*
|
||||
* This function is used to exchange algorithm specific data with the codec
|
||||
* instance. This can be used to implement features specific to a particular
|
||||
* algorithm.
|
||||
*
|
||||
* This wrapper function dispatches the request to the helper function
|
||||
* associated with the given ctrl_id. It tries to call this function
|
||||
* transparently, but will return #AOM_CODEC_ERROR if the request could not
|
||||
* be dispatched.
|
||||
*
|
||||
* Note that this function should not be used directly. Call the
|
||||
* #aom_codec_control wrapper macro instead.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in] ctrl_id Algorithm specific control identifier
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* The control request was processed.
|
||||
* \retval #AOM_CODEC_ERROR
|
||||
* The control request was not processed.
|
||||
* \retval #AOM_CODEC_INVALID_PARAM
|
||||
* The data was not valid.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_control_(aom_codec_ctx_t *ctx, int ctrl_id, ...);
|
||||
#if defined(AOM_DISABLE_CTRL_TYPECHECKS) && AOM_DISABLE_CTRL_TYPECHECKS
|
||||
#define aom_codec_control(ctx, id, data) aom_codec_control_(ctx, id, data)
|
||||
#define AOM_CTRL_USE_TYPE(id, typ)
|
||||
#define AOM_CTRL_USE_TYPE_DEPRECATED(id, typ)
|
||||
#define AOM_CTRL_VOID(id, typ)
|
||||
|
||||
#else
|
||||
/*!\brief aom_codec_control wrapper macro
|
||||
*
|
||||
* This macro allows for type safe conversions across the variadic parameter
|
||||
* to aom_codec_control_().
|
||||
*
|
||||
* \internal
|
||||
* It works by dispatching the call to the control function through a wrapper
|
||||
* function named with the id parameter.
|
||||
*/
|
||||
#define aom_codec_control(ctx, id, data) \
|
||||
aom_codec_control_##id(ctx, id, data) /**<\hideinitializer*/
|
||||
|
||||
/*!\brief aom_codec_control type definition macro
|
||||
*
|
||||
* This macro allows for type safe conversions across the variadic parameter
|
||||
* to aom_codec_control_(). It defines the type of the argument for a given
|
||||
* control identifier.
|
||||
*
|
||||
* \internal
|
||||
* It defines a static function with
|
||||
* the correctly typed arguments as a wrapper to the type-unsafe internal
|
||||
* function.
|
||||
*/
|
||||
#define AOM_CTRL_USE_TYPE(id, typ) \
|
||||
static aom_codec_err_t aom_codec_control_##id(aom_codec_ctx_t *, int, typ) \
|
||||
UNUSED; \
|
||||
\
|
||||
static aom_codec_err_t aom_codec_control_##id(aom_codec_ctx_t *ctx, \
|
||||
int ctrl_id, typ data) { \
|
||||
return aom_codec_control_(ctx, ctrl_id, data); \
|
||||
} /**<\hideinitializer*/
|
||||
|
||||
/*!\brief aom_codec_control deprecated type definition macro
|
||||
*
|
||||
* Like #AOM_CTRL_USE_TYPE, but indicates that the specified control is
|
||||
* deprecated and should not be used. Consult the documentation for your
|
||||
* codec for more information.
|
||||
*
|
||||
* \internal
|
||||
* It defines a static function with the correctly typed arguments as a
|
||||
* wrapper to the type-unsafe internal function.
|
||||
*/
|
||||
#define AOM_CTRL_USE_TYPE_DEPRECATED(id, typ) \
|
||||
DECLSPEC_DEPRECATED static aom_codec_err_t aom_codec_control_##id( \
|
||||
aom_codec_ctx_t *, int, typ) DEPRECATED UNUSED; \
|
||||
\
|
||||
DECLSPEC_DEPRECATED static aom_codec_err_t aom_codec_control_##id( \
|
||||
aom_codec_ctx_t *ctx, int ctrl_id, typ data) { \
|
||||
return aom_codec_control_(ctx, ctrl_id, data); \
|
||||
} /**<\hideinitializer*/
|
||||
|
||||
/*!\brief aom_codec_control void type definition macro
|
||||
*
|
||||
* This macro allows for type safe conversions across the variadic parameter
|
||||
* to aom_codec_control_(). It indicates that a given control identifier takes
|
||||
* no argument.
|
||||
*
|
||||
* \internal
|
||||
* It defines a static function without a data argument as a wrapper to the
|
||||
* type-unsafe internal function.
|
||||
*/
|
||||
#define AOM_CTRL_VOID(id) \
|
||||
static aom_codec_err_t aom_codec_control_##id(aom_codec_ctx_t *, int) \
|
||||
UNUSED; \
|
||||
\
|
||||
static aom_codec_err_t aom_codec_control_##id(aom_codec_ctx_t *ctx, \
|
||||
int ctrl_id) { \
|
||||
return aom_codec_control_(ctx, ctrl_id); \
|
||||
} /**<\hideinitializer*/
|
||||
|
||||
#endif
|
||||
|
||||
/*!@} - end defgroup codec*/
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // AOM_AOM_CODEC_H_
|
||||
@@ -1,42 +0,0 @@
|
||||
##
|
||||
## Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
##
|
||||
## This source code is subject to the terms of the BSD 2 Clause License and
|
||||
## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
## was not distributed with this source code in the LICENSE file, you can
|
||||
## obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
## Media Patent License 1.0 was not distributed with this source code in the
|
||||
## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
##
|
||||
|
||||
|
||||
API_EXPORTS += exports
|
||||
|
||||
API_SRCS-$(CONFIG_AV1_ENCODER) += aom.h
|
||||
API_SRCS-$(CONFIG_AV1_ENCODER) += aomcx.h
|
||||
API_DOC_SRCS-$(CONFIG_AV1_ENCODER) += aom.h
|
||||
API_DOC_SRCS-$(CONFIG_AV1_ENCODER) += aomcx.h
|
||||
|
||||
API_SRCS-$(CONFIG_AV1_DECODER) += aom.h
|
||||
API_SRCS-$(CONFIG_AV1_DECODER) += aomdx.h
|
||||
API_DOC_SRCS-$(CONFIG_AV1_DECODER) += aom.h
|
||||
API_DOC_SRCS-$(CONFIG_AV1_DECODER) += aomdx.h
|
||||
|
||||
API_DOC_SRCS-yes += aom_codec.h
|
||||
API_DOC_SRCS-yes += aom_decoder.h
|
||||
API_DOC_SRCS-yes += aom_encoder.h
|
||||
API_DOC_SRCS-yes += aom_frame_buffer.h
|
||||
API_DOC_SRCS-yes += aom_image.h
|
||||
|
||||
API_SRCS-yes += src/aom_decoder.c
|
||||
API_SRCS-yes += aom_decoder.h
|
||||
API_SRCS-yes += src/aom_encoder.c
|
||||
API_SRCS-yes += aom_encoder.h
|
||||
API_SRCS-yes += internal/aom_codec_internal.h
|
||||
API_SRCS-yes += src/aom_codec.c
|
||||
API_SRCS-yes += src/aom_image.c
|
||||
API_SRCS-yes += aom_codec.h
|
||||
API_SRCS-yes += aom_codec.mk
|
||||
API_SRCS-yes += aom_frame_buffer.h
|
||||
API_SRCS-yes += aom_image.h
|
||||
API_SRCS-yes += aom_integer.h
|
||||
@@ -1,366 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
#ifndef AOM_AOM_DECODER_H_
|
||||
#define AOM_AOM_DECODER_H_
|
||||
|
||||
/*!\defgroup decoder Decoder Algorithm Interface
|
||||
* \ingroup codec
|
||||
* This abstraction allows applications using this decoder to easily support
|
||||
* multiple video formats with minimal code duplication. This section describes
|
||||
* the interface common to all decoders.
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*!\file
|
||||
* \brief Describes the decoder algorithm interface to applications.
|
||||
*
|
||||
* This file describes the interface between an application and a
|
||||
* video decoder algorithm.
|
||||
*
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "./aom_codec.h"
|
||||
#include "./aom_frame_buffer.h"
|
||||
|
||||
/*!\brief Current ABI version number
|
||||
*
|
||||
* \internal
|
||||
* If this file is altered in any way that changes the ABI, this value
|
||||
* must be bumped. Examples include, but are not limited to, changing
|
||||
* types, removing or reassigning enums, adding/removing/rearranging
|
||||
* fields to structures
|
||||
*/
|
||||
#define AOM_DECODER_ABI_VERSION \
|
||||
(3 + AOM_CODEC_ABI_VERSION) /**<\hideinitializer*/
|
||||
|
||||
/*! \brief Decoder capabilities bitfield
|
||||
*
|
||||
* Each decoder advertises the capabilities it supports as part of its
|
||||
* ::aom_codec_iface_t interface structure. Capabilities are extra interfaces
|
||||
* or functionality, and are not required to be supported by a decoder.
|
||||
*
|
||||
* The available flags are specified by AOM_CODEC_CAP_* defines.
|
||||
*/
|
||||
#define AOM_CODEC_CAP_PUT_SLICE 0x10000 /**< Will issue put_slice callbacks */
|
||||
#define AOM_CODEC_CAP_PUT_FRAME 0x20000 /**< Will issue put_frame callbacks */
|
||||
#define AOM_CODEC_CAP_POSTPROC 0x40000 /**< Can postprocess decoded frame */
|
||||
/*!\brief Can conceal errors due to packet loss */
|
||||
#define AOM_CODEC_CAP_ERROR_CONCEALMENT 0x80000
|
||||
/*!\brief Can receive encoded frames one fragment at a time */
|
||||
#define AOM_CODEC_CAP_INPUT_FRAGMENTS 0x100000
|
||||
|
||||
/*! \brief Initialization-time Feature Enabling
|
||||
*
|
||||
* Certain codec features must be known at initialization time, to allow for
|
||||
* proper memory allocation.
|
||||
*
|
||||
* The available flags are specified by AOM_CODEC_USE_* defines.
|
||||
*/
|
||||
/*!\brief Can support frame-based multi-threading */
|
||||
#define AOM_CODEC_CAP_FRAME_THREADING 0x200000
|
||||
/*!brief Can support external frame buffers */
|
||||
#define AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000
|
||||
|
||||
#define AOM_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */
|
||||
/*!\brief Conceal errors in decoded frames */
|
||||
#define AOM_CODEC_USE_ERROR_CONCEALMENT 0x20000
|
||||
/*!\brief The input frame should be passed to the decoder one fragment at a
|
||||
* time */
|
||||
#define AOM_CODEC_USE_INPUT_FRAGMENTS 0x40000
|
||||
/*!\brief Enable frame-based multi-threading */
|
||||
#define AOM_CODEC_USE_FRAME_THREADING 0x80000
|
||||
|
||||
/*!\brief Stream properties
|
||||
*
|
||||
* This structure is used to query or set properties of the decoded
|
||||
* stream. Algorithms may extend this structure with data specific
|
||||
* to their bitstream by setting the sz member appropriately.
|
||||
*/
|
||||
typedef struct aom_codec_stream_info {
|
||||
unsigned int sz; /**< Size of this structure */
|
||||
unsigned int w; /**< Width (or 0 for unknown/default) */
|
||||
unsigned int h; /**< Height (or 0 for unknown/default) */
|
||||
unsigned int is_kf; /**< Current frame is a keyframe */
|
||||
} aom_codec_stream_info_t;
|
||||
|
||||
/* REQUIRED FUNCTIONS
|
||||
*
|
||||
* The following functions are required to be implemented for all decoders.
|
||||
* They represent the base case functionality expected of all decoders.
|
||||
*/
|
||||
|
||||
/*!\brief Initialization Configurations
|
||||
*
|
||||
* This structure is used to pass init time configuration options to the
|
||||
* decoder.
|
||||
*/
|
||||
typedef struct aom_codec_dec_cfg {
|
||||
unsigned int threads; /**< Maximum number of threads to use, default 1 */
|
||||
unsigned int w; /**< Width */
|
||||
unsigned int h; /**< Height */
|
||||
} aom_codec_dec_cfg_t; /**< alias for struct aom_codec_dec_cfg */
|
||||
|
||||
/*!\brief Initialize a decoder instance
|
||||
*
|
||||
* Initializes a decoder context using the given interface. Applications
|
||||
* should call the aom_codec_dec_init convenience macro instead of this
|
||||
* function directly, to ensure that the ABI version number parameter
|
||||
* is properly initialized.
|
||||
*
|
||||
* If the library was configured with --disable-multithread, this call
|
||||
* is not thread safe and should be guarded with a lock if being used
|
||||
* in a multithreaded context.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context.
|
||||
* \param[in] iface Pointer to the algorithm interface to use.
|
||||
* \param[in] cfg Configuration to use, if known. May be NULL.
|
||||
* \param[in] flags Bitfield of AOM_CODEC_USE_* flags
|
||||
* \param[in] ver ABI version number. Must be set to
|
||||
* AOM_DECODER_ABI_VERSION
|
||||
* \retval #AOM_CODEC_OK
|
||||
* The decoder algorithm initialized.
|
||||
* \retval #AOM_CODEC_MEM_ERROR
|
||||
* Memory allocation failed.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_dec_init_ver(aom_codec_ctx_t *ctx,
|
||||
aom_codec_iface_t *iface,
|
||||
const aom_codec_dec_cfg_t *cfg,
|
||||
aom_codec_flags_t flags, int ver);
|
||||
|
||||
/*!\brief Convenience macro for aom_codec_dec_init_ver()
|
||||
*
|
||||
* Ensures the ABI version parameter is properly set.
|
||||
*/
|
||||
#define aom_codec_dec_init(ctx, iface, cfg, flags) \
|
||||
aom_codec_dec_init_ver(ctx, iface, cfg, flags, AOM_DECODER_ABI_VERSION)
|
||||
|
||||
/*!\brief Parse stream info from a buffer
|
||||
*
|
||||
* Performs high level parsing of the bitstream. Construction of a decoder
|
||||
* context is not necessary. Can be used to determine if the bitstream is
|
||||
* of the proper format, and to extract information from the stream.
|
||||
*
|
||||
* \param[in] iface Pointer to the algorithm interface
|
||||
* \param[in] data Pointer to a block of data to parse
|
||||
* \param[in] data_sz Size of the data buffer
|
||||
* \param[in,out] si Pointer to stream info to update. The size member
|
||||
* \ref MUST be properly initialized, but \ref MAY be
|
||||
* clobbered by the algorithm. This parameter \ref MAY
|
||||
* be NULL.
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* Bitstream is parsable and stream information updated
|
||||
*/
|
||||
aom_codec_err_t aom_codec_peek_stream_info(aom_codec_iface_t *iface,
|
||||
const uint8_t *data,
|
||||
unsigned int data_sz,
|
||||
aom_codec_stream_info_t *si);
|
||||
|
||||
/*!\brief Return information about the current stream.
|
||||
*
|
||||
* Returns information about the stream that has been parsed during decoding.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in,out] si Pointer to stream info to update. The size member
|
||||
* \ref MUST be properly initialized, but \ref MAY be
|
||||
* clobbered by the algorithm. This parameter \ref MAY
|
||||
* be NULL.
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* Bitstream is parsable and stream information updated
|
||||
*/
|
||||
aom_codec_err_t aom_codec_get_stream_info(aom_codec_ctx_t *ctx,
|
||||
aom_codec_stream_info_t *si);
|
||||
|
||||
/*!\brief Decode data
|
||||
*
|
||||
* Processes a buffer of coded data. If the processing results in a new
|
||||
* decoded frame becoming available, PUT_SLICE and PUT_FRAME events may be
|
||||
* generated, as appropriate. Encoded data \ref MUST be passed in DTS (decode
|
||||
* time stamp) order. Frames produced will always be in PTS (presentation
|
||||
* time stamp) order.
|
||||
* If the decoder is configured with AOM_CODEC_USE_INPUT_FRAGMENTS enabled,
|
||||
* data and data_sz can contain a fragment of the encoded frame. Fragment
|
||||
* \#n must contain at least partition \#n, but can also contain subsequent
|
||||
* partitions (\#n+1 - \#n+i), and if so, fragments \#n+1, .., \#n+i must
|
||||
* be empty. When no more data is available, this function should be called
|
||||
* with NULL as data and 0 as data_sz. The memory passed to this function
|
||||
* must be available until the frame has been decoded.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in] data Pointer to this block of new coded data. If
|
||||
* NULL, a AOM_CODEC_CB_PUT_FRAME event is posted
|
||||
* for the previously decoded frame.
|
||||
* \param[in] data_sz Size of the coded data, in bytes.
|
||||
* \param[in] user_priv Application specific data to associate with
|
||||
* this frame.
|
||||
* \param[in] deadline Soft deadline the decoder should attempt to meet,
|
||||
* in us. Set to zero for unlimited.
|
||||
*
|
||||
* \return Returns #AOM_CODEC_OK if the coded data was processed completely
|
||||
* and future pictures can be decoded without error. Otherwise,
|
||||
* see the descriptions of the other error codes in ::aom_codec_err_t
|
||||
* for recoverability capabilities.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data,
|
||||
unsigned int data_sz, void *user_priv,
|
||||
long deadline);
|
||||
|
||||
/*!\brief Decoded frames iterator
|
||||
*
|
||||
* Iterates over a list of the frames available for display. The iterator
|
||||
* storage should be initialized to NULL to start the iteration. Iteration is
|
||||
* complete when this function returns NULL.
|
||||
*
|
||||
* The list of available frames becomes valid upon completion of the
|
||||
* aom_codec_decode call, and remains valid until the next call to
|
||||
* aom_codec_decode.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in,out] iter Iterator storage, initialized to NULL
|
||||
*
|
||||
* \return Returns a pointer to an image, if one is ready for display. Frames
|
||||
* produced will always be in PTS (presentation time stamp) order.
|
||||
*/
|
||||
aom_image_t *aom_codec_get_frame(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter);
|
||||
|
||||
/*!\defgroup cap_put_frame Frame-Based Decoding Functions
|
||||
*
|
||||
* The following functions are required to be implemented for all decoders
|
||||
* that advertise the AOM_CODEC_CAP_PUT_FRAME capability. Calling these
|
||||
* functions
|
||||
* for codecs that don't advertise this capability will result in an error
|
||||
* code being returned, usually AOM_CODEC_ERROR
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*!\brief put frame callback prototype
|
||||
*
|
||||
* This callback is invoked by the decoder to notify the application of
|
||||
* the availability of decoded image data.
|
||||
*/
|
||||
typedef void (*aom_codec_put_frame_cb_fn_t)(void *user_priv,
|
||||
const aom_image_t *img);
|
||||
|
||||
/*!\brief Register for notification of frame completion.
|
||||
*
|
||||
* Registers a given function to be called when a decoded frame is
|
||||
* available.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in] cb Pointer to the callback function
|
||||
* \param[in] user_priv User's private data
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* Callback successfully registered.
|
||||
* \retval #AOM_CODEC_ERROR
|
||||
* Decoder context not initialized, or algorithm not capable of
|
||||
* posting slice completion.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_register_put_frame_cb(aom_codec_ctx_t *ctx,
|
||||
aom_codec_put_frame_cb_fn_t cb,
|
||||
void *user_priv);
|
||||
|
||||
/*!@} - end defgroup cap_put_frame */
|
||||
|
||||
/*!\defgroup cap_put_slice Slice-Based Decoding Functions
|
||||
*
|
||||
* The following functions are required to be implemented for all decoders
|
||||
* that advertise the AOM_CODEC_CAP_PUT_SLICE capability. Calling these
|
||||
* functions
|
||||
* for codecs that don't advertise this capability will result in an error
|
||||
* code being returned, usually AOM_CODEC_ERROR
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*!\brief put slice callback prototype
|
||||
*
|
||||
* This callback is invoked by the decoder to notify the application of
|
||||
* the availability of partially decoded image data. The
|
||||
*/
|
||||
typedef void (*aom_codec_put_slice_cb_fn_t)(void *user_priv,
|
||||
const aom_image_t *img,
|
||||
const aom_image_rect_t *valid,
|
||||
const aom_image_rect_t *update);
|
||||
|
||||
/*!\brief Register for notification of slice completion.
|
||||
*
|
||||
* Registers a given function to be called when a decoded slice is
|
||||
* available.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in] cb Pointer to the callback function
|
||||
* \param[in] user_priv User's private data
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* Callback successfully registered.
|
||||
* \retval #AOM_CODEC_ERROR
|
||||
* Decoder context not initialized, or algorithm not capable of
|
||||
* posting slice completion.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_register_put_slice_cb(aom_codec_ctx_t *ctx,
|
||||
aom_codec_put_slice_cb_fn_t cb,
|
||||
void *user_priv);
|
||||
|
||||
/*!@} - end defgroup cap_put_slice*/
|
||||
|
||||
/*!\defgroup cap_external_frame_buffer External Frame Buffer Functions
|
||||
*
|
||||
* The following section is required to be implemented for all decoders
|
||||
* that advertise the AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER capability.
|
||||
* Calling this function for codecs that don't advertise this capability
|
||||
* will result in an error code being returned, usually AOM_CODEC_ERROR.
|
||||
*
|
||||
* \note
|
||||
* Currently this only works with AV1.
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*!\brief Pass in external frame buffers for the decoder to use.
|
||||
*
|
||||
* Registers functions to be called when libaom needs a frame buffer
|
||||
* to decode the current frame and a function to be called when libaom does
|
||||
* not internally reference the frame buffer. This set function must
|
||||
* be called before the first call to decode or libaom will assume the
|
||||
* default behavior of allocating frame buffers internally.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in] cb_get Pointer to the get callback function
|
||||
* \param[in] cb_release Pointer to the release callback function
|
||||
* \param[in] cb_priv Callback's private data
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* External frame buffers will be used by libaom.
|
||||
* \retval #AOM_CODEC_INVALID_PARAM
|
||||
* One or more of the callbacks were NULL.
|
||||
* \retval #AOM_CODEC_ERROR
|
||||
* Decoder context not initialized, or algorithm not capable of
|
||||
* using external frame buffers.
|
||||
*
|
||||
* \note
|
||||
* When decoding AV1, the application may be required to pass in at least
|
||||
* #AOM_MAXIMUM_WORK_BUFFERS external frame
|
||||
* buffers.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_set_frame_buffer_functions(
|
||||
aom_codec_ctx_t *ctx, aom_get_frame_buffer_cb_fn_t cb_get,
|
||||
aom_release_frame_buffer_cb_fn_t cb_release, void *cb_priv);
|
||||
|
||||
/*!@} - end defgroup cap_external_frame_buffer */
|
||||
|
||||
/*!@} - end defgroup decoder*/
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // AOM_AOM_DECODER_H_
|
||||
@@ -1,837 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
#ifndef AOM_AOM_ENCODER_H_
|
||||
#define AOM_AOM_ENCODER_H_
|
||||
|
||||
/*!\defgroup encoder Encoder Algorithm Interface
|
||||
* \ingroup codec
|
||||
* This abstraction allows applications using this encoder to easily support
|
||||
* multiple video formats with minimal code duplication. This section describes
|
||||
* the interface common to all encoders.
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*!\file
|
||||
* \brief Describes the encoder algorithm interface to applications.
|
||||
*
|
||||
* This file describes the interface between an application and a
|
||||
* video encoder algorithm.
|
||||
*
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "./aom_codec.h"
|
||||
|
||||
/*!\brief Current ABI version number
|
||||
*
|
||||
* \internal
|
||||
* If this file is altered in any way that changes the ABI, this value
|
||||
* must be bumped. Examples include, but are not limited to, changing
|
||||
* types, removing or reassigning enums, adding/removing/rearranging
|
||||
* fields to structures
|
||||
*/
|
||||
#define AOM_ENCODER_ABI_VERSION \
|
||||
(5 + AOM_CODEC_ABI_VERSION) /**<\hideinitializer*/
|
||||
|
||||
/*! \brief Encoder capabilities bitfield
|
||||
*
|
||||
* Each encoder advertises the capabilities it supports as part of its
|
||||
* ::aom_codec_iface_t interface structure. Capabilities are extra
|
||||
* interfaces or functionality, and are not required to be supported
|
||||
* by an encoder.
|
||||
*
|
||||
* The available flags are specified by AOM_CODEC_CAP_* defines.
|
||||
*/
|
||||
#define AOM_CODEC_CAP_PSNR 0x10000 /**< Can issue PSNR packets */
|
||||
|
||||
/*! Can output one partition at a time. Each partition is returned in its
|
||||
* own AOM_CODEC_CX_FRAME_PKT, with the FRAME_IS_FRAGMENT flag set for
|
||||
* every partition but the last. In this mode all frames are always
|
||||
* returned partition by partition.
|
||||
*/
|
||||
#define AOM_CODEC_CAP_OUTPUT_PARTITION 0x20000
|
||||
|
||||
/*! Can support input images at greater than 8 bitdepth.
|
||||
*/
|
||||
#define AOM_CODEC_CAP_HIGHBITDEPTH 0x40000
|
||||
|
||||
/*! \brief Initialization-time Feature Enabling
|
||||
*
|
||||
* Certain codec features must be known at initialization time, to allow
|
||||
* for proper memory allocation.
|
||||
*
|
||||
* The available flags are specified by AOM_CODEC_USE_* defines.
|
||||
*/
|
||||
#define AOM_CODEC_USE_PSNR 0x10000 /**< Calculate PSNR on each frame */
|
||||
/*!\brief Make the encoder output one partition at a time. */
|
||||
#define AOM_CODEC_USE_OUTPUT_PARTITION 0x20000
|
||||
#define AOM_CODEC_USE_HIGHBITDEPTH 0x40000 /**< Use high bitdepth */
|
||||
|
||||
/*!\brief Generic fixed size buffer structure
|
||||
*
|
||||
* This structure is able to hold a reference to any fixed size buffer.
|
||||
*/
|
||||
typedef struct aom_fixed_buf {
|
||||
void *buf; /**< Pointer to the data */
|
||||
size_t sz; /**< Length of the buffer, in chars */
|
||||
} aom_fixed_buf_t; /**< alias for struct aom_fixed_buf */
|
||||
|
||||
/*!\brief Time Stamp Type
|
||||
*
|
||||
* An integer, which when multiplied by the stream's time base, provides
|
||||
* the absolute time of a sample.
|
||||
*/
|
||||
typedef int64_t aom_codec_pts_t;
|
||||
|
||||
/*!\brief Compressed Frame Flags
|
||||
*
|
||||
* This type represents a bitfield containing information about a compressed
|
||||
* frame that may be useful to an application. The most significant 16 bits
|
||||
* can be used by an algorithm to provide additional detail, for example to
|
||||
* support frame types that are codec specific (MPEG-1 D-frames for example)
|
||||
*/
|
||||
typedef uint32_t aom_codec_frame_flags_t;
|
||||
#define AOM_FRAME_IS_KEY 0x1 /**< frame is the start of a GOP */
|
||||
/*!\brief frame can be dropped without affecting the stream (no future frame
|
||||
* depends on this one) */
|
||||
#define AOM_FRAME_IS_DROPPABLE 0x2
|
||||
/*!\brief frame should be decoded but will not be shown */
|
||||
#define AOM_FRAME_IS_INVISIBLE 0x4
|
||||
/*!\brief this is a fragment of the encoded frame */
|
||||
#define AOM_FRAME_IS_FRAGMENT 0x8
|
||||
|
||||
/*!\brief Error Resilient flags
|
||||
*
|
||||
* These flags define which error resilient features to enable in the
|
||||
* encoder. The flags are specified through the
|
||||
* aom_codec_enc_cfg::g_error_resilient variable.
|
||||
*/
|
||||
typedef uint32_t aom_codec_er_flags_t;
|
||||
/*!\brief Improve resiliency against losses of whole frames */
|
||||
#define AOM_ERROR_RESILIENT_DEFAULT 0x1
|
||||
/*!\brief The frame partitions are independently decodable by the bool decoder,
|
||||
* meaning that partitions can be decoded even though earlier partitions have
|
||||
* been lost. Note that intra prediction is still done over the partition
|
||||
* boundary. */
|
||||
#define AOM_ERROR_RESILIENT_PARTITIONS 0x2
|
||||
|
||||
/*!\brief Encoder output packet variants
|
||||
*
|
||||
* This enumeration lists the different kinds of data packets that can be
|
||||
* returned by calls to aom_codec_get_cx_data(). Algorithms \ref MAY
|
||||
* extend this list to provide additional functionality.
|
||||
*/
|
||||
enum aom_codec_cx_pkt_kind {
|
||||
AOM_CODEC_CX_FRAME_PKT, /**< Compressed video frame */
|
||||
AOM_CODEC_STATS_PKT, /**< Two-pass statistics for this frame */
|
||||
AOM_CODEC_FPMB_STATS_PKT, /**< first pass mb statistics for this frame */
|
||||
AOM_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */
|
||||
AOM_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions */
|
||||
};
|
||||
|
||||
/*!\brief Encoder output packet
|
||||
*
|
||||
* This structure contains the different kinds of output data the encoder
|
||||
* may produce while compressing a frame.
|
||||
*/
|
||||
typedef struct aom_codec_cx_pkt {
|
||||
enum aom_codec_cx_pkt_kind kind; /**< packet variant */
|
||||
union {
|
||||
struct {
|
||||
void *buf; /**< compressed data buffer */
|
||||
size_t sz; /**< length of compressed data */
|
||||
/*!\brief time stamp to show frame (in timebase units) */
|
||||
aom_codec_pts_t pts;
|
||||
/*!\brief duration to show frame (in timebase units) */
|
||||
unsigned long duration;
|
||||
aom_codec_frame_flags_t flags; /**< flags for this frame */
|
||||
/*!\brief the partition id defines the decoding order of the partitions.
|
||||
* Only applicable when "output partition" mode is enabled. First
|
||||
* partition has id 0.*/
|
||||
int partition_id;
|
||||
} frame; /**< data for compressed frame packet */
|
||||
aom_fixed_buf_t twopass_stats; /**< data for two-pass packet */
|
||||
aom_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */
|
||||
struct aom_psnr_pkt {
|
||||
unsigned int samples[4]; /**< Number of samples, total/y/u/v */
|
||||
uint64_t sse[4]; /**< sum squared error, total/y/u/v */
|
||||
double psnr[4]; /**< PSNR, total/y/u/v */
|
||||
} psnr; /**< data for PSNR packet */
|
||||
aom_fixed_buf_t raw; /**< data for arbitrary packets */
|
||||
|
||||
/* This packet size is fixed to allow codecs to extend this
|
||||
* interface without having to manage storage for raw packets,
|
||||
* i.e., if it's smaller than 128 bytes, you can store in the
|
||||
* packet list directly.
|
||||
*/
|
||||
char pad[128 - sizeof(enum aom_codec_cx_pkt_kind)]; /**< fixed sz */
|
||||
} data; /**< packet data */
|
||||
} aom_codec_cx_pkt_t; /**< alias for struct aom_codec_cx_pkt */
|
||||
|
||||
/*!\brief Rational Number
|
||||
*
|
||||
* This structure holds a fractional value.
|
||||
*/
|
||||
typedef struct aom_rational {
|
||||
int num; /**< fraction numerator */
|
||||
int den; /**< fraction denominator */
|
||||
} aom_rational_t; /**< alias for struct aom_rational */
|
||||
|
||||
/*!\brief Multi-pass Encoding Pass */
|
||||
enum aom_enc_pass {
|
||||
AOM_RC_ONE_PASS, /**< Single pass mode */
|
||||
AOM_RC_FIRST_PASS, /**< First pass of multi-pass mode */
|
||||
AOM_RC_LAST_PASS /**< Final pass of multi-pass mode */
|
||||
};
|
||||
|
||||
/*!\brief Rate control mode */
|
||||
enum aom_rc_mode {
|
||||
AOM_VBR, /**< Variable Bit Rate (VBR) mode */
|
||||
AOM_CBR, /**< Constant Bit Rate (CBR) mode */
|
||||
AOM_CQ, /**< Constrained Quality (CQ) mode */
|
||||
AOM_Q, /**< Constant Quality (Q) mode */
|
||||
};
|
||||
|
||||
/*!\brief Keyframe placement mode.
|
||||
*
|
||||
* This enumeration determines whether keyframes are placed automatically by
|
||||
* the encoder or whether this behavior is disabled. Older releases of this
|
||||
* SDK were implemented such that AOM_KF_FIXED meant keyframes were disabled.
|
||||
* This name is confusing for this behavior, so the new symbols to be used
|
||||
* are AOM_KF_AUTO and AOM_KF_DISABLED.
|
||||
*/
|
||||
enum aom_kf_mode {
|
||||
AOM_KF_FIXED, /**< deprecated, implies AOM_KF_DISABLED */
|
||||
AOM_KF_AUTO, /**< Encoder determines optimal placement automatically */
|
||||
AOM_KF_DISABLED = 0 /**< Encoder does not place keyframes. */
|
||||
};
|
||||
|
||||
/*!\brief Encoded Frame Flags
|
||||
*
|
||||
* This type indicates a bitfield to be passed to aom_codec_encode(), defining
|
||||
* per-frame boolean values. By convention, bits common to all codecs will be
|
||||
* named AOM_EFLAG_*, and bits specific to an algorithm will be named
|
||||
* /algo/_eflag_*. The lower order 16 bits are reserved for common use.
|
||||
*/
|
||||
typedef long aom_enc_frame_flags_t;
|
||||
#define AOM_EFLAG_FORCE_KF (1 << 0) /**< Force this frame to be a keyframe */
|
||||
|
||||
/*!\brief Encoder configuration structure
|
||||
*
|
||||
* This structure contains the encoder settings that have common representations
|
||||
* across all codecs. This doesn't imply that all codecs support all features,
|
||||
* however.
|
||||
*/
|
||||
typedef struct aom_codec_enc_cfg {
|
||||
/*
|
||||
* generic settings (g)
|
||||
*/
|
||||
|
||||
/*!\brief Algorithm specific "usage" value
|
||||
*
|
||||
* Algorithms may define multiple values for usage, which may convey the
|
||||
* intent of how the application intends to use the stream. If this value
|
||||
* is non-zero, consult the documentation for the codec to determine its
|
||||
* meaning.
|
||||
*/
|
||||
unsigned int g_usage;
|
||||
|
||||
/*!\brief Maximum number of threads to use
|
||||
*
|
||||
* For multi-threaded implementations, use no more than this number of
|
||||
* threads. The codec may use fewer threads than allowed. The value
|
||||
* 0 is equivalent to the value 1.
|
||||
*/
|
||||
unsigned int g_threads;
|
||||
|
||||
/*!\brief Bitstream profile to use
|
||||
*
|
||||
* Some codecs support a notion of multiple bitstream profiles. Typically
|
||||
* this maps to a set of features that are turned on or off. Often the
|
||||
* profile to use is determined by the features of the intended decoder.
|
||||
* Consult the documentation for the codec to determine the valid values
|
||||
* for this parameter, or set to zero for a sane default.
|
||||
*/
|
||||
unsigned int g_profile; /**< profile of bitstream to use */
|
||||
|
||||
/*!\brief Width of the frame
|
||||
*
|
||||
* This value identifies the presentation resolution of the frame,
|
||||
* in pixels. Note that the frames passed as input to the encoder must
|
||||
* have this resolution. Frames will be presented by the decoder in this
|
||||
* resolution, independent of any spatial resampling the encoder may do.
|
||||
*/
|
||||
unsigned int g_w;
|
||||
|
||||
/*!\brief Height of the frame
|
||||
*
|
||||
* This value identifies the presentation resolution of the frame,
|
||||
* in pixels. Note that the frames passed as input to the encoder must
|
||||
* have this resolution. Frames will be presented by the decoder in this
|
||||
* resolution, independent of any spatial resampling the encoder may do.
|
||||
*/
|
||||
unsigned int g_h;
|
||||
|
||||
/*!\brief Bit-depth of the codec
|
||||
*
|
||||
* This value identifies the bit_depth of the codec,
|
||||
* Only certain bit-depths are supported as identified in the
|
||||
* aom_bit_depth_t enum.
|
||||
*/
|
||||
aom_bit_depth_t g_bit_depth;
|
||||
|
||||
/*!\brief Bit-depth of the input frames
|
||||
*
|
||||
* This value identifies the bit_depth of the input frames in bits.
|
||||
* Note that the frames passed as input to the encoder must have
|
||||
* this bit-depth.
|
||||
*/
|
||||
unsigned int g_input_bit_depth;
|
||||
|
||||
/*!\brief Stream timebase units
|
||||
*
|
||||
* Indicates the smallest interval of time, in seconds, used by the stream.
|
||||
* For fixed frame rate material, or variable frame rate material where
|
||||
* frames are timed at a multiple of a given clock (ex: video capture),
|
||||
* the \ref RECOMMENDED method is to set the timebase to the reciprocal
|
||||
* of the frame rate (ex: 1001/30000 for 29.970 Hz NTSC). This allows the
|
||||
* pts to correspond to the frame number, which can be handy. For
|
||||
* re-encoding video from containers with absolute time timestamps, the
|
||||
* \ref RECOMMENDED method is to set the timebase to that of the parent
|
||||
* container or multimedia framework (ex: 1/1000 for ms, as in FLV).
|
||||
*/
|
||||
struct aom_rational g_timebase;
|
||||
|
||||
/*!\brief Enable error resilient modes.
|
||||
*
|
||||
* The error resilient bitfield indicates to the encoder which features
|
||||
* it should enable to take measures for streaming over lossy or noisy
|
||||
* links.
|
||||
*/
|
||||
aom_codec_er_flags_t g_error_resilient;
|
||||
|
||||
/*!\brief Multi-pass Encoding Mode
|
||||
*
|
||||
* This value should be set to the current phase for multi-pass encoding.
|
||||
* For single pass, set to #AOM_RC_ONE_PASS.
|
||||
*/
|
||||
enum aom_enc_pass g_pass;
|
||||
|
||||
/*!\brief Allow lagged encoding
|
||||
*
|
||||
* If set, this value allows the encoder to consume a number of input
|
||||
* frames before producing output frames. This allows the encoder to
|
||||
* base decisions for the current frame on future frames. This does
|
||||
* increase the latency of the encoding pipeline, so it is not appropriate
|
||||
* in all situations (ex: realtime encoding).
|
||||
*
|
||||
* Note that this is a maximum value -- the encoder may produce frames
|
||||
* sooner than the given limit. Set this value to 0 to disable this
|
||||
* feature.
|
||||
*/
|
||||
unsigned int g_lag_in_frames;
|
||||
|
||||
/*
|
||||
* rate control settings (rc)
|
||||
*/
|
||||
|
||||
/*!\brief Temporal resampling configuration, if supported by the codec.
|
||||
*
|
||||
* Temporal resampling allows the codec to "drop" frames as a strategy to
|
||||
* meet its target data rate. This can cause temporal discontinuities in
|
||||
* the encoded video, which may appear as stuttering during playback. This
|
||||
* trade-off is often acceptable, but for many applications is not. It can
|
||||
* be disabled in these cases.
|
||||
*
|
||||
* Note that not all codecs support this feature. All aom AVx codecs do.
|
||||
* For other codecs, consult the documentation for that algorithm.
|
||||
*
|
||||
* This threshold is described as a percentage of the target data buffer.
|
||||
* When the data buffer falls below this percentage of fullness, a
|
||||
* dropped frame is indicated. Set the threshold to zero (0) to disable
|
||||
* this feature.
|
||||
*/
|
||||
unsigned int rc_dropframe_thresh;
|
||||
|
||||
/*!\brief Enable/disable spatial resampling, if supported by the codec.
|
||||
*
|
||||
* Spatial resampling allows the codec to compress a lower resolution
|
||||
* version of the frame, which is then upscaled by the encoder to the
|
||||
* correct presentation resolution. This increases visual quality at
|
||||
* low data rates, at the expense of CPU time on the encoder/decoder.
|
||||
*/
|
||||
unsigned int rc_resize_allowed;
|
||||
|
||||
/*!\brief Internal coded frame width.
|
||||
*
|
||||
* If spatial resampling is enabled this specifies the width of the
|
||||
* encoded frame.
|
||||
*/
|
||||
unsigned int rc_scaled_width;
|
||||
|
||||
/*!\brief Internal coded frame height.
|
||||
*
|
||||
* If spatial resampling is enabled this specifies the height of the
|
||||
* encoded frame.
|
||||
*/
|
||||
unsigned int rc_scaled_height;
|
||||
|
||||
/*!\brief Spatial resampling up watermark.
|
||||
*
|
||||
* This threshold is described as a percentage of the target data buffer.
|
||||
* When the data buffer rises above this percentage of fullness, the
|
||||
* encoder will step up to a higher resolution version of the frame.
|
||||
*/
|
||||
unsigned int rc_resize_up_thresh;
|
||||
|
||||
/*!\brief Spatial resampling down watermark.
|
||||
*
|
||||
* This threshold is described as a percentage of the target data buffer.
|
||||
* When the data buffer falls below this percentage of fullness, the
|
||||
* encoder will step down to a lower resolution version of the frame.
|
||||
*/
|
||||
unsigned int rc_resize_down_thresh;
|
||||
|
||||
/*!\brief Rate control algorithm to use.
|
||||
*
|
||||
* Indicates whether the end usage of this stream is to be streamed over
|
||||
* a bandwidth constrained link, indicating that Constant Bit Rate (CBR)
|
||||
* mode should be used, or whether it will be played back on a high
|
||||
* bandwidth link, as from a local disk, where higher variations in
|
||||
* bitrate are acceptable.
|
||||
*/
|
||||
enum aom_rc_mode rc_end_usage;
|
||||
|
||||
/*!\brief Two-pass stats buffer.
|
||||
*
|
||||
* A buffer containing all of the stats packets produced in the first
|
||||
* pass, concatenated.
|
||||
*/
|
||||
aom_fixed_buf_t rc_twopass_stats_in;
|
||||
|
||||
/*!\brief first pass mb stats buffer.
|
||||
*
|
||||
* A buffer containing all of the first pass mb stats packets produced
|
||||
* in the first pass, concatenated.
|
||||
*/
|
||||
aom_fixed_buf_t rc_firstpass_mb_stats_in;
|
||||
|
||||
/*!\brief Target data rate
|
||||
*
|
||||
* Target bandwidth to use for this stream, in kilobits per second.
|
||||
*/
|
||||
unsigned int rc_target_bitrate;
|
||||
|
||||
/*
|
||||
* quantizer settings
|
||||
*/
|
||||
|
||||
/*!\brief Minimum (Best Quality) Quantizer
|
||||
*
|
||||
* The quantizer is the most direct control over the quality of the
|
||||
* encoded image. The range of valid values for the quantizer is codec
|
||||
* specific. Consult the documentation for the codec to determine the
|
||||
* values to use. To determine the range programmatically, call
|
||||
* aom_codec_enc_config_default() with a usage value of 0.
|
||||
*/
|
||||
unsigned int rc_min_quantizer;
|
||||
|
||||
/*!\brief Maximum (Worst Quality) Quantizer
|
||||
*
|
||||
* The quantizer is the most direct control over the quality of the
|
||||
* encoded image. The range of valid values for the quantizer is codec
|
||||
* specific. Consult the documentation for the codec to determine the
|
||||
* values to use. To determine the range programmatically, call
|
||||
* aom_codec_enc_config_default() with a usage value of 0.
|
||||
*/
|
||||
unsigned int rc_max_quantizer;
|
||||
|
||||
/*
|
||||
* bitrate tolerance
|
||||
*/
|
||||
|
||||
/*!\brief Rate control adaptation undershoot control
|
||||
*
|
||||
* This value, expressed as a percentage of the target bitrate,
|
||||
* controls the maximum allowed adaptation speed of the codec.
|
||||
* This factor controls the maximum amount of bits that can
|
||||
* be subtracted from the target bitrate in order to compensate
|
||||
* for prior overshoot.
|
||||
*
|
||||
* Valid values in the range 0-1000.
|
||||
*/
|
||||
unsigned int rc_undershoot_pct;
|
||||
|
||||
/*!\brief Rate control adaptation overshoot control
|
||||
*
|
||||
* This value, expressed as a percentage of the target bitrate,
|
||||
* controls the maximum allowed adaptation speed of the codec.
|
||||
* This factor controls the maximum amount of bits that can
|
||||
* be added to the target bitrate in order to compensate for
|
||||
* prior undershoot.
|
||||
*
|
||||
* Valid values in the range 0-1000.
|
||||
*/
|
||||
unsigned int rc_overshoot_pct;
|
||||
|
||||
/*
|
||||
* decoder buffer model parameters
|
||||
*/
|
||||
|
||||
/*!\brief Decoder Buffer Size
|
||||
*
|
||||
* This value indicates the amount of data that may be buffered by the
|
||||
* decoding application. Note that this value is expressed in units of
|
||||
* time (milliseconds). For example, a value of 5000 indicates that the
|
||||
* client will buffer (at least) 5000ms worth of encoded data. Use the
|
||||
* target bitrate (#rc_target_bitrate) to convert to bits/bytes, if
|
||||
* necessary.
|
||||
*/
|
||||
unsigned int rc_buf_sz;
|
||||
|
||||
/*!\brief Decoder Buffer Initial Size
|
||||
*
|
||||
* This value indicates the amount of data that will be buffered by the
|
||||
* decoding application prior to beginning playback. This value is
|
||||
* expressed in units of time (milliseconds). Use the target bitrate
|
||||
* (#rc_target_bitrate) to convert to bits/bytes, if necessary.
|
||||
*/
|
||||
unsigned int rc_buf_initial_sz;
|
||||
|
||||
/*!\brief Decoder Buffer Optimal Size
|
||||
*
|
||||
* This value indicates the amount of data that the encoder should try
|
||||
* to maintain in the decoder's buffer. This value is expressed in units
|
||||
* of time (milliseconds). Use the target bitrate (#rc_target_bitrate)
|
||||
* to convert to bits/bytes, if necessary.
|
||||
*/
|
||||
unsigned int rc_buf_optimal_sz;
|
||||
|
||||
/*
|
||||
* 2 pass rate control parameters
|
||||
*/
|
||||
|
||||
/*!\brief Two-pass mode CBR/VBR bias
|
||||
*
|
||||
* Bias, expressed on a scale of 0 to 100, for determining target size
|
||||
* for the current frame. The value 0 indicates the optimal CBR mode
|
||||
* value should be used. The value 100 indicates the optimal VBR mode
|
||||
* value should be used. Values in between indicate which way the
|
||||
* encoder should "lean."
|
||||
*/
|
||||
unsigned int rc_2pass_vbr_bias_pct;
|
||||
|
||||
/*!\brief Two-pass mode per-GOP minimum bitrate
|
||||
*
|
||||
* This value, expressed as a percentage of the target bitrate, indicates
|
||||
* the minimum bitrate to be used for a single GOP (aka "section")
|
||||
*/
|
||||
unsigned int rc_2pass_vbr_minsection_pct;
|
||||
|
||||
/*!\brief Two-pass mode per-GOP maximum bitrate
|
||||
*
|
||||
* This value, expressed as a percentage of the target bitrate, indicates
|
||||
* the maximum bitrate to be used for a single GOP (aka "section")
|
||||
*/
|
||||
unsigned int rc_2pass_vbr_maxsection_pct;
|
||||
|
||||
/*
|
||||
* keyframing settings (kf)
|
||||
*/
|
||||
|
||||
/*!\brief Keyframe placement mode
|
||||
*
|
||||
* This value indicates whether the encoder should place keyframes at a
|
||||
* fixed interval, or determine the optimal placement automatically
|
||||
* (as governed by the #kf_min_dist and #kf_max_dist parameters)
|
||||
*/
|
||||
enum aom_kf_mode kf_mode;
|
||||
|
||||
/*!\brief Keyframe minimum interval
|
||||
*
|
||||
* This value, expressed as a number of frames, prevents the encoder from
|
||||
* placing a keyframe nearer than kf_min_dist to the previous keyframe. At
|
||||
* least kf_min_dist frames non-keyframes will be coded before the next
|
||||
* keyframe. Set kf_min_dist equal to kf_max_dist for a fixed interval.
|
||||
*/
|
||||
unsigned int kf_min_dist;
|
||||
|
||||
/*!\brief Keyframe maximum interval
|
||||
*
|
||||
* This value, expressed as a number of frames, forces the encoder to code
|
||||
* a keyframe if one has not been coded in the last kf_max_dist frames.
|
||||
* A value of 0 implies all frames will be keyframes. Set kf_min_dist
|
||||
* equal to kf_max_dist for a fixed interval.
|
||||
*/
|
||||
unsigned int kf_max_dist;
|
||||
} aom_codec_enc_cfg_t; /**< alias for struct aom_codec_enc_cfg */
|
||||
|
||||
/*!\brief Initialize an encoder instance
|
||||
*
|
||||
* Initializes a encoder context using the given interface. Applications
|
||||
* should call the aom_codec_enc_init convenience macro instead of this
|
||||
* function directly, to ensure that the ABI version number parameter
|
||||
* is properly initialized.
|
||||
*
|
||||
* If the library was configured with --disable-multithread, this call
|
||||
* is not thread safe and should be guarded with a lock if being used
|
||||
* in a multithreaded context.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context.
|
||||
* \param[in] iface Pointer to the algorithm interface to use.
|
||||
* \param[in] cfg Configuration to use, if known. May be NULL.
|
||||
* \param[in] flags Bitfield of AOM_CODEC_USE_* flags
|
||||
* \param[in] ver ABI version number. Must be set to
|
||||
* AOM_ENCODER_ABI_VERSION
|
||||
* \retval #AOM_CODEC_OK
|
||||
* The decoder algorithm initialized.
|
||||
* \retval #AOM_CODEC_MEM_ERROR
|
||||
* Memory allocation failed.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_enc_init_ver(aom_codec_ctx_t *ctx,
|
||||
aom_codec_iface_t *iface,
|
||||
const aom_codec_enc_cfg_t *cfg,
|
||||
aom_codec_flags_t flags, int ver);
|
||||
|
||||
/*!\brief Convenience macro for aom_codec_enc_init_ver()
|
||||
*
|
||||
* Ensures the ABI version parameter is properly set.
|
||||
*/
|
||||
#define aom_codec_enc_init(ctx, iface, cfg, flags) \
|
||||
aom_codec_enc_init_ver(ctx, iface, cfg, flags, AOM_ENCODER_ABI_VERSION)
|
||||
|
||||
/*!\brief Initialize multi-encoder instance
|
||||
*
|
||||
* Initializes multi-encoder context using the given interface.
|
||||
* Applications should call the aom_codec_enc_init_multi convenience macro
|
||||
* instead of this function directly, to ensure that the ABI version number
|
||||
* parameter is properly initialized.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context.
|
||||
* \param[in] iface Pointer to the algorithm interface to use.
|
||||
* \param[in] cfg Configuration to use, if known. May be NULL.
|
||||
* \param[in] num_enc Total number of encoders.
|
||||
* \param[in] flags Bitfield of AOM_CODEC_USE_* flags
|
||||
* \param[in] dsf Pointer to down-sampling factors.
|
||||
* \param[in] ver ABI version number. Must be set to
|
||||
* AOM_ENCODER_ABI_VERSION
|
||||
* \retval #AOM_CODEC_OK
|
||||
* The decoder algorithm initialized.
|
||||
* \retval #AOM_CODEC_MEM_ERROR
|
||||
* Memory allocation failed.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_enc_init_multi_ver(
|
||||
aom_codec_ctx_t *ctx, aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg,
|
||||
int num_enc, aom_codec_flags_t flags, aom_rational_t *dsf, int ver);
|
||||
|
||||
/*!\brief Convenience macro for aom_codec_enc_init_multi_ver()
|
||||
*
|
||||
* Ensures the ABI version parameter is properly set.
|
||||
*/
|
||||
#define aom_codec_enc_init_multi(ctx, iface, cfg, num_enc, flags, dsf) \
|
||||
aom_codec_enc_init_multi_ver(ctx, iface, cfg, num_enc, flags, dsf, \
|
||||
AOM_ENCODER_ABI_VERSION)
|
||||
|
||||
/*!\brief Get a default configuration
|
||||
*
|
||||
* Initializes a encoder configuration structure with default values. Supports
|
||||
* the notion of "usages" so that an algorithm may offer different default
|
||||
* settings depending on the user's intended goal. This function \ref SHOULD
|
||||
* be called by all applications to initialize the configuration structure
|
||||
* before specializing the configuration with application specific values.
|
||||
*
|
||||
* \param[in] iface Pointer to the algorithm interface to use.
|
||||
* \param[out] cfg Configuration buffer to populate.
|
||||
* \param[in] reserved Must set to 0 for VP8 and AV1.
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* The configuration was populated.
|
||||
* \retval #AOM_CODEC_INCAPABLE
|
||||
* Interface is not an encoder interface.
|
||||
* \retval #AOM_CODEC_INVALID_PARAM
|
||||
* A parameter was NULL, or the usage value was not recognized.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface,
|
||||
aom_codec_enc_cfg_t *cfg,
|
||||
unsigned int reserved);
|
||||
|
||||
/*!\brief Set or change configuration
|
||||
*
|
||||
* Reconfigures an encoder instance according to the given configuration.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in] cfg Configuration buffer to use
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* The configuration was populated.
|
||||
* \retval #AOM_CODEC_INCAPABLE
|
||||
* Interface is not an encoder interface.
|
||||
* \retval #AOM_CODEC_INVALID_PARAM
|
||||
* A parameter was NULL, or the usage value was not recognized.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_enc_config_set(aom_codec_ctx_t *ctx,
|
||||
const aom_codec_enc_cfg_t *cfg);
|
||||
|
||||
/*!\brief Get global stream headers
|
||||
*
|
||||
* Retrieves a stream level global header packet, if supported by the codec.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
*
|
||||
* \retval NULL
|
||||
* Encoder does not support global header
|
||||
* \retval Non-NULL
|
||||
* Pointer to buffer containing global header packet
|
||||
*/
|
||||
aom_fixed_buf_t *aom_codec_get_global_headers(aom_codec_ctx_t *ctx);
|
||||
|
||||
/*!\brief deadline parameter analogous to AVx REALTIME mode. */
|
||||
#define AOM_DL_REALTIME (1)
|
||||
/*!\brief deadline parameter analogous to AVx GOOD QUALITY mode. */
|
||||
#define AOM_DL_GOOD_QUALITY (1000000)
|
||||
/*!\brief deadline parameter analogous to AVx BEST QUALITY mode. */
|
||||
#define AOM_DL_BEST_QUALITY (0)
|
||||
/*!\brief Encode a frame
|
||||
*
|
||||
* Encodes a video frame at the given "presentation time." The presentation
|
||||
* time stamp (PTS) \ref MUST be strictly increasing.
|
||||
*
|
||||
* The encoder supports the notion of a soft real-time deadline. Given a
|
||||
* non-zero value to the deadline parameter, the encoder will make a "best
|
||||
* effort" guarantee to return before the given time slice expires. It is
|
||||
* implicit that limiting the available time to encode will degrade the
|
||||
* output quality. The encoder can be given an unlimited time to produce the
|
||||
* best possible frame by specifying a deadline of '0'. This deadline
|
||||
* supercedes the AVx notion of "best quality, good quality, realtime".
|
||||
* Applications that wish to map these former settings to the new deadline
|
||||
* based system can use the symbols #AOM_DL_REALTIME, #AOM_DL_GOOD_QUALITY,
|
||||
* and #AOM_DL_BEST_QUALITY.
|
||||
*
|
||||
* When the last frame has been passed to the encoder, this function should
|
||||
* continue to be called, with the img parameter set to NULL. This will
|
||||
* signal the end-of-stream condition to the encoder and allow it to encode
|
||||
* any held buffers. Encoding is complete when aom_codec_encode() is called
|
||||
* and aom_codec_get_cx_data() returns no data.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in] img Image data to encode, NULL to flush.
|
||||
* \param[in] pts Presentation time stamp, in timebase units.
|
||||
* \param[in] duration Duration to show frame, in timebase units.
|
||||
* \param[in] flags Flags to use for encoding this frame.
|
||||
* \param[in] deadline Time to spend encoding, in microseconds. (0=infinite)
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* The configuration was populated.
|
||||
* \retval #AOM_CODEC_INCAPABLE
|
||||
* Interface is not an encoder interface.
|
||||
* \retval #AOM_CODEC_INVALID_PARAM
|
||||
* A parameter was NULL, the image format is unsupported, etc.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img,
|
||||
aom_codec_pts_t pts, unsigned long duration,
|
||||
aom_enc_frame_flags_t flags,
|
||||
unsigned long deadline);
|
||||
|
||||
/*!\brief Set compressed data output buffer
|
||||
*
|
||||
* Sets the buffer that the codec should output the compressed data
|
||||
* into. This call effectively sets the buffer pointer returned in the
|
||||
* next AOM_CODEC_CX_FRAME_PKT packet. Subsequent packets will be
|
||||
* appended into this buffer. The buffer is preserved across frames,
|
||||
* so applications must periodically call this function after flushing
|
||||
* the accumulated compressed data to disk or to the network to reset
|
||||
* the pointer to the buffer's head.
|
||||
*
|
||||
* `pad_before` bytes will be skipped before writing the compressed
|
||||
* data, and `pad_after` bytes will be appended to the packet. The size
|
||||
* of the packet will be the sum of the size of the actual compressed
|
||||
* data, pad_before, and pad_after. The padding bytes will be preserved
|
||||
* (not overwritten).
|
||||
*
|
||||
* Note that calling this function does not guarantee that the returned
|
||||
* compressed data will be placed into the specified buffer. In the
|
||||
* event that the encoded data will not fit into the buffer provided,
|
||||
* the returned packet \ref MAY point to an internal buffer, as it would
|
||||
* if this call were never used. In this event, the output packet will
|
||||
* NOT have any padding, and the application must free space and copy it
|
||||
* to the proper place. This is of particular note in configurations
|
||||
* that may output multiple packets for a single encoded frame (e.g., lagged
|
||||
* encoding) or if the application does not reset the buffer periodically.
|
||||
*
|
||||
* Applications may restore the default behavior of the codec providing
|
||||
* the compressed data buffer by calling this function with a NULL
|
||||
* buffer.
|
||||
*
|
||||
* Applications \ref MUSTNOT call this function during iteration of
|
||||
* aom_codec_get_cx_data().
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in] buf Buffer to store compressed data into
|
||||
* \param[in] pad_before Bytes to skip before writing compressed data
|
||||
* \param[in] pad_after Bytes to skip after writing compressed data
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* The buffer was set successfully.
|
||||
* \retval #AOM_CODEC_INVALID_PARAM
|
||||
* A parameter was NULL, the image format is unsupported, etc.
|
||||
*/
|
||||
aom_codec_err_t aom_codec_set_cx_data_buf(aom_codec_ctx_t *ctx,
|
||||
const aom_fixed_buf_t *buf,
|
||||
unsigned int pad_before,
|
||||
unsigned int pad_after);
|
||||
|
||||
/*!\brief Encoded data iterator
|
||||
*
|
||||
* Iterates over a list of data packets to be passed from the encoder to the
|
||||
* application. The different kinds of packets available are enumerated in
|
||||
* #aom_codec_cx_pkt_kind.
|
||||
*
|
||||
* #AOM_CODEC_CX_FRAME_PKT packets should be passed to the application's
|
||||
* muxer. Multiple compressed frames may be in the list.
|
||||
* #AOM_CODEC_STATS_PKT packets should be appended to a global buffer.
|
||||
*
|
||||
* The application \ref MUST silently ignore any packet kinds that it does
|
||||
* not recognize or support.
|
||||
*
|
||||
* The data buffers returned from this function are only guaranteed to be
|
||||
* valid until the application makes another call to any aom_codec_* function.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in,out] iter Iterator storage, initialized to NULL
|
||||
*
|
||||
* \return Returns a pointer to an output data packet (compressed frame data,
|
||||
* two-pass statistics, etc.) or NULL to signal end-of-list.
|
||||
*
|
||||
*/
|
||||
const aom_codec_cx_pkt_t *aom_codec_get_cx_data(aom_codec_ctx_t *ctx,
|
||||
aom_codec_iter_t *iter);
|
||||
|
||||
/*!\brief Get Preview Frame
|
||||
*
|
||||
* Returns an image that can be used as a preview. Shows the image as it would
|
||||
* exist at the decompressor. The application \ref MUST NOT write into this
|
||||
* image buffer.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
*
|
||||
* \return Returns a pointer to a preview image, or NULL if no image is
|
||||
* available.
|
||||
*
|
||||
*/
|
||||
const aom_image_t *aom_codec_get_preview_frame(aom_codec_ctx_t *ctx);
|
||||
|
||||
/*!@} - end defgroup encoder*/
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // AOM_AOM_ENCODER_H_
|
||||
225
aom/aom_image.h
225
aom/aom_image.h
@@ -1,225 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
/*!\file
|
||||
* \brief Describes the aom image descriptor and associated operations
|
||||
*
|
||||
*/
|
||||
#ifndef AOM_AOM_IMAGE_H_
|
||||
#define AOM_AOM_IMAGE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*!\brief Current ABI version number
|
||||
*
|
||||
* \internal
|
||||
* If this file is altered in any way that changes the ABI, this value
|
||||
* must be bumped. Examples include, but are not limited to, changing
|
||||
* types, removing or reassigning enums, adding/removing/rearranging
|
||||
* fields to structures
|
||||
*/
|
||||
#define AOM_IMAGE_ABI_VERSION (4) /**<\hideinitializer*/
|
||||
|
||||
#define AOM_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */
|
||||
#define AOM_IMG_FMT_UV_FLIP 0x200 /**< V plane precedes U in memory. */
|
||||
#define AOM_IMG_FMT_HAS_ALPHA 0x400 /**< Image has an alpha channel. */
|
||||
#define AOM_IMG_FMT_HIGHBITDEPTH 0x800 /**< Image uses 16bit framebuffer. */
|
||||
|
||||
/*!\brief List of supported image formats */
|
||||
typedef enum aom_img_fmt {
|
||||
AOM_IMG_FMT_NONE,
|
||||
AOM_IMG_FMT_RGB24, /**< 24 bit per pixel packed RGB */
|
||||
AOM_IMG_FMT_RGB32, /**< 32 bit per pixel packed 0RGB */
|
||||
AOM_IMG_FMT_RGB565, /**< 16 bit per pixel, 565 */
|
||||
AOM_IMG_FMT_RGB555, /**< 16 bit per pixel, 555 */
|
||||
AOM_IMG_FMT_UYVY, /**< UYVY packed YUV */
|
||||
AOM_IMG_FMT_YUY2, /**< YUYV packed YUV */
|
||||
AOM_IMG_FMT_YVYU, /**< YVYU packed YUV */
|
||||
AOM_IMG_FMT_BGR24, /**< 24 bit per pixel packed BGR */
|
||||
AOM_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */
|
||||
AOM_IMG_FMT_ARGB, /**< 32 bit packed ARGB, alpha=255 */
|
||||
AOM_IMG_FMT_ARGB_LE, /**< 32 bit packed BGRA, alpha=255 */
|
||||
AOM_IMG_FMT_RGB565_LE, /**< 16 bit per pixel, gggbbbbb rrrrrggg */
|
||||
AOM_IMG_FMT_RGB555_LE, /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */
|
||||
AOM_IMG_FMT_YV12 =
|
||||
AOM_IMG_FMT_PLANAR | AOM_IMG_FMT_UV_FLIP | 1, /**< planar YVU */
|
||||
AOM_IMG_FMT_I420 = AOM_IMG_FMT_PLANAR | 2,
|
||||
AOM_IMG_FMT_AOMYV12 = AOM_IMG_FMT_PLANAR | AOM_IMG_FMT_UV_FLIP |
|
||||
3, /** < planar 4:2:0 format with aom color space */
|
||||
AOM_IMG_FMT_AOMI420 = AOM_IMG_FMT_PLANAR | 4,
|
||||
AOM_IMG_FMT_I422 = AOM_IMG_FMT_PLANAR | 5,
|
||||
AOM_IMG_FMT_I444 = AOM_IMG_FMT_PLANAR | 6,
|
||||
AOM_IMG_FMT_I440 = AOM_IMG_FMT_PLANAR | 7,
|
||||
AOM_IMG_FMT_444A = AOM_IMG_FMT_PLANAR | AOM_IMG_FMT_HAS_ALPHA | 6,
|
||||
AOM_IMG_FMT_I42016 = AOM_IMG_FMT_I420 | AOM_IMG_FMT_HIGHBITDEPTH,
|
||||
AOM_IMG_FMT_I42216 = AOM_IMG_FMT_I422 | AOM_IMG_FMT_HIGHBITDEPTH,
|
||||
AOM_IMG_FMT_I44416 = AOM_IMG_FMT_I444 | AOM_IMG_FMT_HIGHBITDEPTH,
|
||||
AOM_IMG_FMT_I44016 = AOM_IMG_FMT_I440 | AOM_IMG_FMT_HIGHBITDEPTH
|
||||
} aom_img_fmt_t; /**< alias for enum aom_img_fmt */
|
||||
|
||||
/*!\brief List of supported color spaces */
|
||||
typedef enum aom_color_space {
|
||||
AOM_CS_UNKNOWN = 0, /**< Unknown */
|
||||
AOM_CS_BT_601 = 1, /**< BT.601 */
|
||||
AOM_CS_BT_709 = 2, /**< BT.709 */
|
||||
AOM_CS_SMPTE_170 = 3, /**< SMPTE.170 */
|
||||
AOM_CS_SMPTE_240 = 4, /**< SMPTE.240 */
|
||||
AOM_CS_BT_2020 = 5, /**< BT.2020 */
|
||||
AOM_CS_RESERVED = 6, /**< Reserved */
|
||||
AOM_CS_SRGB = 7 /**< sRGB */
|
||||
} aom_color_space_t; /**< alias for enum aom_color_space */
|
||||
|
||||
/*!\brief List of supported color range */
|
||||
typedef enum aom_color_range {
|
||||
AOM_CR_STUDIO_RANGE = 0, /**< Y [16..235], UV [16..240] */
|
||||
AOM_CR_FULL_RANGE = 1 /**< YUV/RGB [0..255] */
|
||||
} aom_color_range_t; /**< alias for enum aom_color_range */
|
||||
|
||||
/**\brief Image Descriptor */
|
||||
typedef struct aom_image {
|
||||
aom_img_fmt_t fmt; /**< Image Format */
|
||||
aom_color_space_t cs; /**< Color Space */
|
||||
aom_color_range_t range; /**< Color Range */
|
||||
|
||||
/* Image storage dimensions */
|
||||
unsigned int w; /**< Stored image width */
|
||||
unsigned int h; /**< Stored image height */
|
||||
unsigned int bit_depth; /**< Stored image bit-depth */
|
||||
|
||||
/* Image display dimensions */
|
||||
unsigned int d_w; /**< Displayed image width */
|
||||
unsigned int d_h; /**< Displayed image height */
|
||||
|
||||
/* Image intended rendering dimensions */
|
||||
unsigned int r_w; /**< Intended rendering image width */
|
||||
unsigned int r_h; /**< Intended rendering image height */
|
||||
|
||||
/* Chroma subsampling info */
|
||||
unsigned int x_chroma_shift; /**< subsampling order, X */
|
||||
unsigned int y_chroma_shift; /**< subsampling order, Y */
|
||||
|
||||
/* Image data pointers. */
|
||||
#define AOM_PLANE_PACKED 0 /**< To be used for all packed formats */
|
||||
#define AOM_PLANE_Y 0 /**< Y (Luminance) plane */
|
||||
#define AOM_PLANE_U 1 /**< U (Chroma) plane */
|
||||
#define AOM_PLANE_V 2 /**< V (Chroma) plane */
|
||||
#define AOM_PLANE_ALPHA 3 /**< A (Transparency) plane */
|
||||
unsigned char *planes[4]; /**< pointer to the top left pixel for each plane */
|
||||
int stride[4]; /**< stride between rows for each plane */
|
||||
|
||||
int bps; /**< bits per sample (for packed formats) */
|
||||
|
||||
/*!\brief The following member may be set by the application to associate
|
||||
* data with this image.
|
||||
*/
|
||||
void *user_priv;
|
||||
|
||||
/* The following members should be treated as private. */
|
||||
unsigned char *img_data; /**< private */
|
||||
int img_data_owner; /**< private */
|
||||
int self_allocd; /**< private */
|
||||
|
||||
void *fb_priv; /**< Frame buffer data associated with the image. */
|
||||
} aom_image_t; /**< alias for struct aom_image */
|
||||
|
||||
/**\brief Representation of a rectangle on a surface */
|
||||
typedef struct aom_image_rect {
|
||||
unsigned int x; /**< leftmost column */
|
||||
unsigned int y; /**< topmost row */
|
||||
unsigned int w; /**< width */
|
||||
unsigned int h; /**< height */
|
||||
} aom_image_rect_t; /**< alias for struct aom_image_rect */
|
||||
|
||||
/*!\brief Open a descriptor, allocating storage for the underlying image
|
||||
*
|
||||
* Returns a descriptor for storing an image of the given format. The
|
||||
* storage for the descriptor is allocated on the heap.
|
||||
*
|
||||
* \param[in] img Pointer to storage for descriptor. If this parameter
|
||||
* is NULL, the storage for the descriptor will be
|
||||
* allocated on the heap.
|
||||
* \param[in] fmt Format for the image
|
||||
* \param[in] d_w Width of the image
|
||||
* \param[in] d_h Height of the image
|
||||
* \param[in] align Alignment, in bytes, of the image buffer and
|
||||
* each row in the image(stride).
|
||||
*
|
||||
* \return Returns a pointer to the initialized image descriptor. If the img
|
||||
* parameter is non-null, the value of the img parameter will be
|
||||
* returned.
|
||||
*/
|
||||
aom_image_t *aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt,
|
||||
unsigned int d_w, unsigned int d_h,
|
||||
unsigned int align);
|
||||
|
||||
/*!\brief Open a descriptor, using existing storage for the underlying image
|
||||
*
|
||||
* Returns a descriptor for storing an image of the given format. The
|
||||
* storage for descriptor has been allocated elsewhere, and a descriptor is
|
||||
* desired to "wrap" that storage.
|
||||
*
|
||||
* \param[in] img Pointer to storage for descriptor. If this parameter
|
||||
* is NULL, the storage for the descriptor will be
|
||||
* allocated on the heap.
|
||||
* \param[in] fmt Format for the image
|
||||
* \param[in] d_w Width of the image
|
||||
* \param[in] d_h Height of the image
|
||||
* \param[in] align Alignment, in bytes, of each row in the image.
|
||||
* \param[in] img_data Storage to use for the image
|
||||
*
|
||||
* \return Returns a pointer to the initialized image descriptor. If the img
|
||||
* parameter is non-null, the value of the img parameter will be
|
||||
* returned.
|
||||
*/
|
||||
aom_image_t *aom_img_wrap(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w,
|
||||
unsigned int d_h, unsigned int align,
|
||||
unsigned char *img_data);
|
||||
|
||||
/*!\brief Set the rectangle identifying the displayed portion of the image
|
||||
*
|
||||
* Updates the displayed rectangle (aka viewport) on the image surface to
|
||||
* match the specified coordinates and size.
|
||||
*
|
||||
* \param[in] img Image descriptor
|
||||
* \param[in] x leftmost column
|
||||
* \param[in] y topmost row
|
||||
* \param[in] w width
|
||||
* \param[in] h height
|
||||
*
|
||||
* \return 0 if the requested rectangle is valid, nonzero otherwise.
|
||||
*/
|
||||
int aom_img_set_rect(aom_image_t *img, unsigned int x, unsigned int y,
|
||||
unsigned int w, unsigned int h);
|
||||
|
||||
/*!\brief Flip the image vertically (top for bottom)
|
||||
*
|
||||
* Adjusts the image descriptor's pointers and strides to make the image
|
||||
* be referenced upside-down.
|
||||
*
|
||||
* \param[in] img Image descriptor
|
||||
*/
|
||||
void aom_img_flip(aom_image_t *img);
|
||||
|
||||
/*!\brief Close an image descriptor
|
||||
*
|
||||
* Frees all allocated storage associated with an image descriptor.
|
||||
*
|
||||
* \param[in] img Image descriptor
|
||||
*/
|
||||
void aom_img_free(aom_image_t *img);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_AOM_IMAGE_H_
|
||||
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_AOM_INTEGER_H_
|
||||
#define AOM_AOM_INTEGER_H_
|
||||
|
||||
/* get ptrdiff_t, size_t, wchar_t, NULL */
|
||||
#include <stddef.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define AOM_FORCE_INLINE __forceinline
|
||||
#define AOM_INLINE __inline
|
||||
#else
|
||||
#define AOM_FORCE_INLINE __inline__ __attribute__((always_inline))
|
||||
// TODO(jbb): Allow a way to force inline off for older compilers.
|
||||
#define AOM_INLINE inline
|
||||
#endif
|
||||
|
||||
#if defined(AOM_EMULATE_INTTYPES)
|
||||
typedef signed char int8_t;
|
||||
typedef signed short int16_t;
|
||||
typedef signed int int32_t;
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
|
||||
#ifndef _UINTPTR_T_DEFINED
|
||||
typedef size_t uintptr_t;
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
/* Most platforms have the C99 standard integer types. */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
#if !defined(__STDC_FORMAT_MACROS)
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#endif
|
||||
#if !defined(__STDC_LIMIT_MACROS)
|
||||
#define __STDC_LIMIT_MACROS
|
||||
#endif
|
||||
#endif // __cplusplus
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#endif
|
||||
|
||||
/* VS2010 defines stdint.h, but not inttypes.h */
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1800
|
||||
#define PRId64 "I64d"
|
||||
#else
|
||||
#include <inttypes.h>
|
||||
#endif
|
||||
|
||||
#endif // AOM_AOM_INTEGER_H_
|
||||
759
aom/aomcx.h
759
aom/aomcx.h
@@ -1,759 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
#ifndef AOM_AOMCX_H_
|
||||
#define AOM_AOMCX_H_
|
||||
|
||||
/*!\defgroup aom_encoder AOMedia AOM/AV1 Encoder
|
||||
* \ingroup aom
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
#include "./aom.h"
|
||||
#include "./aom_encoder.h"
|
||||
|
||||
/*!\file
|
||||
* \brief Provides definitions for using AOM or AV1 encoder algorithm within the
|
||||
* aom Codec Interface.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*!\name Algorithm interface for AV1
|
||||
*
|
||||
* This interface provides the capability to encode raw AV1 streams.
|
||||
* @{
|
||||
*/
|
||||
extern aom_codec_iface_t aom_codec_av1_cx_algo;
|
||||
extern aom_codec_iface_t *aom_codec_av1_cx(void);
|
||||
/*!@} - end algorithm interface member group*/
|
||||
|
||||
/*
|
||||
* Algorithm Flags
|
||||
*/
|
||||
|
||||
/*!\brief Don't reference the last frame
|
||||
*
|
||||
* When this flag is set, the encoder will not use the last frame as a
|
||||
* predictor. When not set, the encoder will choose whether to use the
|
||||
* last frame or not automatically.
|
||||
*/
|
||||
#define AOM_EFLAG_NO_REF_LAST (1 << 16)
|
||||
|
||||
/*!\brief Don't reference the golden frame
|
||||
*
|
||||
* When this flag is set, the encoder will not use the golden frame as a
|
||||
* predictor. When not set, the encoder will choose whether to use the
|
||||
* golden frame or not automatically.
|
||||
*/
|
||||
#define AOM_EFLAG_NO_REF_GF (1 << 17)
|
||||
|
||||
/*!\brief Don't reference the alternate reference frame
|
||||
*
|
||||
* When this flag is set, the encoder will not use the alt ref frame as a
|
||||
* predictor. When not set, the encoder will choose whether to use the
|
||||
* alt ref frame or not automatically.
|
||||
*/
|
||||
#define AOM_EFLAG_NO_REF_ARF (1 << 21)
|
||||
|
||||
/*!\brief Don't update the last frame
|
||||
*
|
||||
* When this flag is set, the encoder will not update the last frame with
|
||||
* the contents of the current frame.
|
||||
*/
|
||||
#define AOM_EFLAG_NO_UPD_LAST (1 << 18)
|
||||
|
||||
/*!\brief Don't update the golden frame
|
||||
*
|
||||
* When this flag is set, the encoder will not update the golden frame with
|
||||
* the contents of the current frame.
|
||||
*/
|
||||
#define AOM_EFLAG_NO_UPD_GF (1 << 22)
|
||||
|
||||
/*!\brief Don't update the alternate reference frame
|
||||
*
|
||||
* When this flag is set, the encoder will not update the alt ref frame with
|
||||
* the contents of the current frame.
|
||||
*/
|
||||
#define AOM_EFLAG_NO_UPD_ARF (1 << 23)
|
||||
|
||||
/*!\brief Force golden frame update
|
||||
*
|
||||
* When this flag is set, the encoder copy the contents of the current frame
|
||||
* to the golden frame buffer.
|
||||
*/
|
||||
#define AOM_EFLAG_FORCE_GF (1 << 19)
|
||||
|
||||
/*!\brief Force alternate reference frame update
|
||||
*
|
||||
* When this flag is set, the encoder copy the contents of the current frame
|
||||
* to the alternate reference frame buffer.
|
||||
*/
|
||||
#define AOM_EFLAG_FORCE_ARF (1 << 24)
|
||||
|
||||
/*!\brief Disable entropy update
|
||||
*
|
||||
* When this flag is set, the encoder will not update its internal entropy
|
||||
* model based on the entropy of this frame.
|
||||
*/
|
||||
#define AOM_EFLAG_NO_UPD_ENTROPY (1 << 20)
|
||||
|
||||
/*!\brief AVx encoder control functions
|
||||
*
|
||||
* This set of macros define the control functions available for AVx
|
||||
* encoder interface.
|
||||
*
|
||||
* \sa #aom_codec_control
|
||||
*/
|
||||
enum aome_enc_control_id {
|
||||
/*!\brief Codec control function to set which reference frame encoder can use.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_USE_REFERENCE = 7,
|
||||
|
||||
/*!\brief Codec control function to pass an ROI map to encoder.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_SET_ROI_MAP = 8,
|
||||
|
||||
/*!\brief Codec control function to pass an Active map to encoder.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_SET_ACTIVEMAP,
|
||||
|
||||
/*!\brief Codec control function to set encoder scaling mode.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_SET_SCALEMODE = 11,
|
||||
|
||||
/*!\brief Codec control function to set encoder internal speed settings.
|
||||
*
|
||||
* Changes in this value influences, among others, the encoder's selection
|
||||
* of motion estimation methods. Values greater than 0 will increase encoder
|
||||
* speed at the expense of quality.
|
||||
*
|
||||
* \note Valid range for VP8: -16..16
|
||||
* \note Valid range for AV1: -8..8
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_SET_CPUUSED = 13,
|
||||
|
||||
/*!\brief Codec control function to enable automatic set and use alf frames.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_SET_ENABLEAUTOALTREF,
|
||||
|
||||
#if CONFIG_EXT_REFS
|
||||
/*!\brief Codec control function to enable automatic set and use
|
||||
* bwd-pred frames.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AOME_SET_ENABLEAUTOBWDREF,
|
||||
#endif // CONFIG_EXT_REFS
|
||||
|
||||
/*!\brief control function to set noise sensitivity
|
||||
*
|
||||
* 0: off, 1: OnYOnly, 2: OnYUV,
|
||||
* 3: OnYUVAggressive, 4: Adaptive
|
||||
*
|
||||
* Supported in codecs: VP8
|
||||
*/
|
||||
AOME_SET_NOISE_SENSITIVITY,
|
||||
|
||||
/*!\brief Codec control function to set sharpness.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_SET_SHARPNESS,
|
||||
|
||||
/*!\brief Codec control function to set the threshold for MBs treated static.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_SET_STATIC_THRESHOLD,
|
||||
|
||||
/*!\brief Codec control function to set the number of token partitions.
|
||||
*
|
||||
* Supported in codecs: VP8
|
||||
*/
|
||||
AOME_SET_TOKEN_PARTITIONS,
|
||||
|
||||
/*!\brief Codec control function to get last quantizer chosen by the encoder.
|
||||
*
|
||||
* Return value uses internal quantizer scale defined by the codec.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_GET_LAST_QUANTIZER,
|
||||
|
||||
/*!\brief Codec control function to get last quantizer chosen by the encoder.
|
||||
*
|
||||
* Return value uses the 0..63 scale as used by the rc_*_quantizer config
|
||||
* parameters.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_GET_LAST_QUANTIZER_64,
|
||||
|
||||
/*!\brief Codec control function to set the max no of frames to create arf.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_SET_ARNR_MAXFRAMES,
|
||||
|
||||
/*!\brief Codec control function to set the filter strength for the arf.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_SET_ARNR_STRENGTH,
|
||||
|
||||
/*!\deprecated control function to set the filter type to use for the arf. */
|
||||
AOME_SET_ARNR_TYPE,
|
||||
|
||||
/*!\brief Codec control function to set visual tuning.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_SET_TUNING,
|
||||
|
||||
/*!\brief Codec control function to set constrained quality level.
|
||||
*
|
||||
* \attention For this value to be used aom_codec_enc_cfg_t::g_usage must be
|
||||
* set to #AOM_CQ.
|
||||
* \note Valid range: 0..63
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_SET_CQ_LEVEL,
|
||||
|
||||
/*!\brief Codec control function to set Max data rate for Intra frames.
|
||||
*
|
||||
* This value controls additional clamping on the maximum size of a
|
||||
* keyframe. It is expressed as a percentage of the average
|
||||
* per-frame bitrate, with the special (and default) value 0 meaning
|
||||
* unlimited, or no additional clamping beyond the codec's built-in
|
||||
* algorithm.
|
||||
*
|
||||
* For example, to allocate no more than 4.5 frames worth of bitrate
|
||||
* to a keyframe, set this to 450.
|
||||
*
|
||||
* Supported in codecs: VP8, AV1
|
||||
*/
|
||||
AOME_SET_MAX_INTRA_BITRATE_PCT,
|
||||
|
||||
/*!\brief Codec control function to set reference and update frame flags.
|
||||
*
|
||||
* Supported in codecs: VP8
|
||||
*/
|
||||
AOME_SET_FRAME_FLAGS,
|
||||
|
||||
/*!\brief Codec control function to set max data rate for Inter frames.
|
||||
*
|
||||
* This value controls additional clamping on the maximum size of an
|
||||
* inter frame. It is expressed as a percentage of the average
|
||||
* per-frame bitrate, with the special (and default) value 0 meaning
|
||||
* unlimited, or no additional clamping beyond the codec's built-in
|
||||
* algorithm.
|
||||
*
|
||||
* For example, to allow no more than 4.5 frames worth of bitrate
|
||||
* to an inter frame, set this to 450.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_MAX_INTER_BITRATE_PCT,
|
||||
|
||||
/*!\brief Boost percentage for Golden Frame in CBR mode.
|
||||
*
|
||||
* This value controls the amount of boost given to Golden Frame in
|
||||
* CBR mode. It is expressed as a percentage of the average
|
||||
* per-frame bitrate, with the special (and default) value 0 meaning
|
||||
* the feature is off, i.e., no golden frame boost in CBR mode and
|
||||
* average bitrate target is used.
|
||||
*
|
||||
* For example, to allow 100% more bits, i.e, 2X, in a golden frame
|
||||
* than average frame, set this to 100.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_GF_CBR_BOOST_PCT,
|
||||
|
||||
/*!\brief Codec control function to set encoder screen content mode.
|
||||
*
|
||||
* 0: off, 1: On, 2: On with more aggressive rate control.
|
||||
*
|
||||
* Supported in codecs: VP8
|
||||
*/
|
||||
AOME_SET_SCREEN_CONTENT_MODE,
|
||||
|
||||
/*!\brief Codec control function to set lossless encoding mode.
|
||||
*
|
||||
* AV1 can operate in lossless encoding mode, in which the bitstream
|
||||
* produced will be able to decode and reconstruct a perfect copy of
|
||||
* input source. This control function provides a mean to switch encoder
|
||||
* into lossless coding mode(1) or normal coding mode(0) that may be lossy.
|
||||
* 0 = lossy coding mode
|
||||
* 1 = lossless coding mode
|
||||
*
|
||||
* By default, encoder operates in normal coding mode (maybe lossy).
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_LOSSLESS,
|
||||
#if CONFIG_AOM_QM
|
||||
/*!\brief Codec control function to encode with quantisation matrices.
|
||||
*
|
||||
* AOM can operate with default quantisation matrices dependent on
|
||||
* quantisation level and block type.
|
||||
* 0 = do not use quantisation matrices
|
||||
* 1 = use quantisation matrices
|
||||
*
|
||||
* By default, the encoder operates without quantisation matrices.
|
||||
*
|
||||
* Supported in codecs: AOM
|
||||
*/
|
||||
|
||||
AV1E_SET_ENABLE_QM,
|
||||
|
||||
/*!\brief Codec control function to set the min quant matrix flatness.
|
||||
*
|
||||
* AOM can operate with different ranges of quantisation matrices.
|
||||
* As quantisation levels increase, the matrices get flatter. This
|
||||
* control sets the minimum level of flatness from which the matrices
|
||||
* are determined.
|
||||
*
|
||||
* By default, the encoder sets this minimum at half the available
|
||||
* range.
|
||||
*
|
||||
* Supported in codecs: AOM
|
||||
*/
|
||||
AV1E_SET_QM_MIN,
|
||||
|
||||
/*!\brief Codec control function to set the max quant matrix flatness.
|
||||
*
|
||||
* AOM can operate with different ranges of quantisation matrices.
|
||||
* As quantisation levels increase, the matrices get flatter. This
|
||||
* control sets the maximum level of flatness possible.
|
||||
*
|
||||
* By default, the encoder sets this maximum at the top of the
|
||||
* available range.
|
||||
*
|
||||
* Supported in codecs: AOM
|
||||
*/
|
||||
AV1E_SET_QM_MAX,
|
||||
#endif
|
||||
|
||||
/*!\brief Codec control function to set number of tile columns.
|
||||
*
|
||||
* In encoding and decoding, AV1 allows an input image frame be partitioned
|
||||
* into separated vertical tile columns, which can be encoded or decoded
|
||||
* independently. This enables easy implementation of parallel encoding and
|
||||
* decoding. This control requests the encoder to use column tiles in
|
||||
* encoding an input frame, with number of tile columns (in Log2 unit) as
|
||||
* the parameter:
|
||||
* 0 = 1 tile column
|
||||
* 1 = 2 tile columns
|
||||
* 2 = 4 tile columns
|
||||
* .....
|
||||
* n = 2**n tile columns
|
||||
* The requested tile columns will be capped by encoder based on image size
|
||||
* limitation (The minimum width of a tile column is 256 pixel, the maximum
|
||||
* is 4096).
|
||||
*
|
||||
* By default, the value is 0, i.e. one single column tile for entire image.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_TILE_COLUMNS,
|
||||
|
||||
/*!\brief Codec control function to set number of tile rows.
|
||||
*
|
||||
* In encoding and decoding, AV1 allows an input image frame be partitioned
|
||||
* into separated horizontal tile rows. Tile rows are encoded or decoded
|
||||
* sequentially. Even though encoding/decoding of later tile rows depends on
|
||||
* earlier ones, this allows the encoder to output data packets for tile rows
|
||||
* prior to completely processing all tile rows in a frame, thereby reducing
|
||||
* the latency in processing between input and output. The parameter
|
||||
* for this control describes the number of tile rows, which has a valid
|
||||
* range [0, 2]:
|
||||
* 0 = 1 tile row
|
||||
* 1 = 2 tile rows
|
||||
* 2 = 4 tile rows
|
||||
*
|
||||
* By default, the value is 0, i.e. one single row tile for entire image.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_TILE_ROWS,
|
||||
|
||||
/*!\brief Codec control function to enable frame parallel decoding feature.
|
||||
*
|
||||
* AV1 has a bitstream feature to reduce decoding dependency between frames
|
||||
* by turning off backward update of probability context used in encoding
|
||||
* and decoding. This allows staged parallel processing of more than one
|
||||
* video frames in the decoder. This control function provides a mean to
|
||||
* turn this feature on or off for bitstreams produced by encoder.
|
||||
*
|
||||
* By default, this feature is off.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_FRAME_PARALLEL_DECODING,
|
||||
|
||||
/*!\brief Codec control function to set adaptive quantization mode.
|
||||
*
|
||||
* AV1 has a segment based feature that allows encoder to adaptively change
|
||||
* quantization parameter for each segment within a frame to improve the
|
||||
* subjective quality. This control makes encoder operate in one of the
|
||||
* several AQ_modes supported.
|
||||
*
|
||||
* By default, encoder operates with AQ_Mode 0(adaptive quantization off).
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_AQ_MODE,
|
||||
|
||||
/*!\brief Codec control function to enable/disable periodic Q boost.
|
||||
*
|
||||
* One AV1 encoder speed feature is to enable quality boost by lowering
|
||||
* frame level Q periodically. This control function provides a mean to
|
||||
* turn on/off this feature.
|
||||
* 0 = off
|
||||
* 1 = on
|
||||
*
|
||||
* By default, the encoder is allowed to use this feature for appropriate
|
||||
* encoding modes.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_FRAME_PERIODIC_BOOST,
|
||||
|
||||
/*!\brief Codec control function to set noise sensitivity.
|
||||
*
|
||||
* 0: off, 1: On(YOnly)
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_NOISE_SENSITIVITY,
|
||||
|
||||
/*!\brief Codec control function to set content type.
|
||||
* \note Valid parameter range:
|
||||
* AOM_CONTENT_DEFAULT = Regular video content (Default)
|
||||
* AOM_CONTENT_SCREEN = Screen capture content
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_TUNE_CONTENT,
|
||||
|
||||
/*!\brief Codec control function to set color space info.
|
||||
* \note Valid ranges: 0..7, default is "UNKNOWN".
|
||||
* 0 = UNKNOWN,
|
||||
* 1 = BT_601
|
||||
* 2 = BT_709
|
||||
* 3 = SMPTE_170
|
||||
* 4 = SMPTE_240
|
||||
* 5 = BT_2020
|
||||
* 6 = RESERVED
|
||||
* 7 = SRGB
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_COLOR_SPACE,
|
||||
|
||||
/*!\brief Codec control function to set minimum interval between GF/ARF frames
|
||||
*
|
||||
* By default the value is set as 4.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_MIN_GF_INTERVAL,
|
||||
|
||||
/*!\brief Codec control function to set minimum interval between GF/ARF frames
|
||||
*
|
||||
* By default the value is set as 16.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_MAX_GF_INTERVAL,
|
||||
|
||||
/*!\brief Codec control function to get an Active map back from the encoder.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_GET_ACTIVEMAP,
|
||||
|
||||
/*!\brief Codec control function to set color range bit.
|
||||
* \note Valid ranges: 0..1, default is 0
|
||||
* 0 = Limited range (16..235 or HBD equivalent)
|
||||
* 1 = Full range (0..255 or HBD equivalent)
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_COLOR_RANGE,
|
||||
|
||||
/*!\brief Codec control function to set intended rendering image size.
|
||||
*
|
||||
* By default, this is identical to the image size in pixels.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_RENDER_SIZE,
|
||||
|
||||
/*!\brief Codec control function to set target level.
|
||||
*
|
||||
* 255: off (default); 0: only keep level stats; 10: target for level 1.0;
|
||||
* 11: target for level 1.1; ... 62: target for level 6.2
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_TARGET_LEVEL,
|
||||
|
||||
/*!\brief Codec control function to get bitstream level.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_GET_LEVEL,
|
||||
|
||||
/*!\brief Codec control function to set intended superblock size.
|
||||
*
|
||||
* By default, the superblock size is determined separately for each
|
||||
* frame by the encoder.
|
||||
*
|
||||
* Supported in codecs: AV1
|
||||
*/
|
||||
AV1E_SET_SUPERBLOCK_SIZE,
|
||||
};
|
||||
|
||||
/*!\brief aom 1-D scaling mode
|
||||
*
|
||||
* This set of constants define 1-D aom scaling modes
|
||||
*/
|
||||
typedef enum aom_scaling_mode_1d {
|
||||
AOME_NORMAL = 0,
|
||||
AOME_FOURFIVE = 1,
|
||||
AOME_THREEFIVE = 2,
|
||||
AOME_ONETWO = 3
|
||||
} AOM_SCALING_MODE;
|
||||
|
||||
/*!\brief aom region of interest map
|
||||
*
|
||||
* These defines the data structures for the region of interest map
|
||||
*
|
||||
*/
|
||||
|
||||
typedef struct aom_roi_map {
|
||||
/*! An id between 0 and 3 for each 16x16 region within a frame. */
|
||||
unsigned char *roi_map;
|
||||
unsigned int rows; /**< Number of rows. */
|
||||
unsigned int cols; /**< Number of columns. */
|
||||
// TODO(paulwilkins): broken for AV1 which has 8 segments
|
||||
// q and loop filter deltas for each segment
|
||||
// (see MAX_MB_SEGMENTS)
|
||||
int delta_q[4]; /**< Quantizer deltas. */
|
||||
int delta_lf[4]; /**< Loop filter deltas. */
|
||||
/*! Static breakout threshold for each segment. */
|
||||
unsigned int static_threshold[4];
|
||||
} aom_roi_map_t;
|
||||
|
||||
/*!\brief aom active region map
|
||||
*
|
||||
* These defines the data structures for active region map
|
||||
*
|
||||
*/
|
||||
|
||||
typedef struct aom_active_map {
|
||||
/*!\brief specify an on (1) or off (0) each 16x16 region within a frame */
|
||||
unsigned char *active_map;
|
||||
unsigned int rows; /**< number of rows */
|
||||
unsigned int cols; /**< number of cols */
|
||||
} aom_active_map_t;
|
||||
|
||||
/*!\brief aom image scaling mode
|
||||
*
|
||||
* This defines the data structure for image scaling mode
|
||||
*
|
||||
*/
|
||||
typedef struct aom_scaling_mode {
|
||||
AOM_SCALING_MODE h_scaling_mode; /**< horizontal scaling mode */
|
||||
AOM_SCALING_MODE v_scaling_mode; /**< vertical scaling mode */
|
||||
} aom_scaling_mode_t;
|
||||
|
||||
/*!\brief VP8 token partition mode
|
||||
*
|
||||
* This defines VP8 partitioning mode for compressed data, i.e., the number of
|
||||
* sub-streams in the bitstream. Used for parallelized decoding.
|
||||
*
|
||||
*/
|
||||
|
||||
typedef enum {
|
||||
AOM_ONE_TOKENPARTITION = 0,
|
||||
AOM_TWO_TOKENPARTITION = 1,
|
||||
AOM_FOUR_TOKENPARTITION = 2,
|
||||
AOM_EIGHT_TOKENPARTITION = 3
|
||||
} aome_token_partitions;
|
||||
|
||||
/*!brief AV1 encoder content type */
|
||||
typedef enum {
|
||||
AOM_CONTENT_DEFAULT,
|
||||
AOM_CONTENT_SCREEN,
|
||||
AOM_CONTENT_INVALID
|
||||
} aom_tune_content;
|
||||
|
||||
/*!\brief VP8 model tuning parameters
|
||||
*
|
||||
* Changes the encoder to tune for certain types of input material.
|
||||
*
|
||||
*/
|
||||
typedef enum { AOM_TUNE_PSNR, AOM_TUNE_SSIM } aom_tune_metric;
|
||||
|
||||
/*!\cond */
|
||||
/*!\brief VP8 encoder control function parameter type
|
||||
*
|
||||
* Defines the data types that VP8E control functions take. Note that
|
||||
* additional common controls are defined in aom.h
|
||||
*
|
||||
*/
|
||||
|
||||
AOM_CTRL_USE_TYPE_DEPRECATED(AOME_USE_REFERENCE, int)
|
||||
#define AOM_CTRL_AOME_USE_REFERENCE
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_FRAME_FLAGS, int)
|
||||
#define AOM_CTRL_AOME_SET_FRAME_FLAGS
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_ROI_MAP, aom_roi_map_t *)
|
||||
#define AOM_CTRL_AOME_SET_ROI_MAP
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_ACTIVEMAP, aom_active_map_t *)
|
||||
#define AOM_CTRL_AOME_SET_ACTIVEMAP
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_SCALEMODE, aom_scaling_mode_t *)
|
||||
#define AOM_CTRL_AOME_SET_SCALEMODE
|
||||
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_CPUUSED, int)
|
||||
#define AOM_CTRL_AOME_SET_CPUUSED
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_ENABLEAUTOALTREF, unsigned int)
|
||||
#define AOM_CTRL_AOME_SET_ENABLEAUTOALTREF
|
||||
|
||||
#if CONFIG_EXT_REFS
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_ENABLEAUTOBWDREF, unsigned int)
|
||||
#define AOM_CTRL_AOME_SET_ENABLEAUTOBWDREF
|
||||
#endif // CONFIG_EXT_REFS
|
||||
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_NOISE_SENSITIVITY, unsigned int)
|
||||
#define AOM_CTRL_AOME_SET_NOISE_SENSITIVITY
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_SHARPNESS, unsigned int)
|
||||
#define AOM_CTRL_AOME_SET_SHARPNESS
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_STATIC_THRESHOLD, unsigned int)
|
||||
#define AOM_CTRL_AOME_SET_STATIC_THRESHOLD
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_TOKEN_PARTITIONS, int) /* aome_token_partitions */
|
||||
#define AOM_CTRL_AOME_SET_TOKEN_PARTITIONS
|
||||
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_ARNR_MAXFRAMES, unsigned int)
|
||||
#define AOM_CTRL_AOME_SET_ARNR_MAXFRAMES
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_ARNR_STRENGTH, unsigned int)
|
||||
#define AOM_CTRL_AOME_SET_ARNR_STRENGTH
|
||||
AOM_CTRL_USE_TYPE_DEPRECATED(AOME_SET_ARNR_TYPE, unsigned int)
|
||||
#define AOM_CTRL_AOME_SET_ARNR_TYPE
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_TUNING, int) /* aom_tune_metric */
|
||||
#define AOM_CTRL_AOME_SET_TUNING
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_CQ_LEVEL, unsigned int)
|
||||
#define AOM_CTRL_AOME_SET_CQ_LEVEL
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_TILE_COLUMNS, int)
|
||||
#define AOM_CTRL_AV1E_SET_TILE_COLUMNS
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_TILE_ROWS, int)
|
||||
#define AOM_CTRL_AV1E_SET_TILE_ROWS
|
||||
|
||||
AOM_CTRL_USE_TYPE(AOME_GET_LAST_QUANTIZER, int *)
|
||||
#define AOM_CTRL_AOME_GET_LAST_QUANTIZER
|
||||
AOM_CTRL_USE_TYPE(AOME_GET_LAST_QUANTIZER_64, int *)
|
||||
#define AOM_CTRL_AOME_GET_LAST_QUANTIZER_64
|
||||
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_MAX_INTRA_BITRATE_PCT, unsigned int)
|
||||
#define AOM_CTRL_AOME_SET_MAX_INTRA_BITRATE_PCT
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_MAX_INTER_BITRATE_PCT, unsigned int)
|
||||
#define AOM_CTRL_AOME_SET_MAX_INTER_BITRATE_PCT
|
||||
|
||||
AOM_CTRL_USE_TYPE(AOME_SET_SCREEN_CONTENT_MODE, unsigned int)
|
||||
#define AOM_CTRL_AOME_SET_SCREEN_CONTENT_MODE
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_GF_CBR_BOOST_PCT, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_GF_CBR_BOOST_PCT
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_LOSSLESS, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_LOSSLESS
|
||||
|
||||
#if CONFIG_AOM_QM
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_QM, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_ENABLE_QM
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_QM_MIN, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_QM_MIN
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_QM_MAX, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_QM_MAX
|
||||
#endif
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_FRAME_PARALLEL_DECODING, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_FRAME_PARALLEL_DECODING
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_AQ_MODE, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_AQ_MODE
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_FRAME_PERIODIC_BOOST, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_FRAME_PERIODIC_BOOST
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_NOISE_SENSITIVITY, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_NOISE_SENSITIVITY
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_TUNE_CONTENT, int) /* aom_tune_content */
|
||||
#define AOM_CTRL_AV1E_SET_TUNE_CONTENT
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_COLOR_SPACE, int)
|
||||
#define AOM_CTRL_AV1E_SET_COLOR_SPACE
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_MIN_GF_INTERVAL, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_MIN_GF_INTERVAL
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_MAX_GF_INTERVAL, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_MAX_GF_INTERVAL
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_GET_ACTIVEMAP, aom_active_map_t *)
|
||||
#define AOM_CTRL_AV1E_GET_ACTIVEMAP
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_COLOR_RANGE, int)
|
||||
#define AOM_CTRL_AV1E_SET_COLOR_RANGE
|
||||
|
||||
/*!\brief
|
||||
*
|
||||
* TODO(rbultje) : add support of the control in ffmpeg
|
||||
*/
|
||||
#define AOM_CTRL_AV1E_SET_RENDER_SIZE
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_RENDER_SIZE, int *)
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_SUPERBLOCK_SIZE, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_SUPERBLOCK_SIZE
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_TARGET_LEVEL, unsigned int)
|
||||
#define AOM_CTRL_AV1E_SET_TARGET_LEVEL
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_GET_LEVEL, int *)
|
||||
#define AOM_CTRL_AV1E_GET_LEVEL
|
||||
/*!\endcond */
|
||||
/*! @} - end defgroup vp8_encoder */
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_AOMCX_H_
|
||||
191
aom/aomdx.h
191
aom/aomdx.h
@@ -1,191 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
/*!\defgroup aom_decoder AOMedia AOM/AV1 Decoder
|
||||
* \ingroup aom
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
/*!\file
|
||||
* \brief Provides definitions for using AOM or AV1 within the aom Decoder
|
||||
* interface.
|
||||
*/
|
||||
#ifndef AOM_AOMDX_H_
|
||||
#define AOM_AOMDX_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Include controls common to both the encoder and decoder */
|
||||
#include "./aom.h"
|
||||
|
||||
/*!\name Algorithm interface for AV1
|
||||
*
|
||||
* This interface provides the capability to decode AV1 streams.
|
||||
* @{
|
||||
*/
|
||||
extern aom_codec_iface_t aom_codec_av1_dx_algo;
|
||||
extern aom_codec_iface_t *aom_codec_av1_dx(void);
|
||||
/*!@} - end algorithm interface member group*/
|
||||
|
||||
/** Data structure that stores bit accounting for debug
|
||||
*/
|
||||
typedef struct Accounting Accounting;
|
||||
|
||||
/*!\enum aom_dec_control_id
|
||||
* \brief AOM decoder control functions
|
||||
*
|
||||
* This set of macros define the control functions available for the AOM
|
||||
* decoder interface.
|
||||
*
|
||||
* \sa #aom_codec_control
|
||||
*/
|
||||
enum aom_dec_control_id {
|
||||
/** control function to get info on which reference frames were updated
|
||||
* by the last decode
|
||||
*/
|
||||
AOMD_GET_LAST_REF_UPDATES = AOM_DECODER_CTRL_ID_START,
|
||||
|
||||
/** check if the indicated frame is corrupted */
|
||||
AOMD_GET_FRAME_CORRUPTED,
|
||||
|
||||
/** control function to get info on which reference frames were used
|
||||
* by the last decode
|
||||
*/
|
||||
AOMD_GET_LAST_REF_USED,
|
||||
|
||||
/** decryption function to decrypt encoded buffer data immediately
|
||||
* before decoding. Takes a aom_decrypt_init, which contains
|
||||
* a callback function and opaque context pointer.
|
||||
*/
|
||||
AOMD_SET_DECRYPTOR,
|
||||
// AOMD_SET_DECRYPTOR = AOMD_SET_DECRYPTOR,
|
||||
|
||||
/** control function to get the dimensions that the current frame is decoded
|
||||
* at. This may be different to the intended display size for the frame as
|
||||
* specified in the wrapper or frame header (see AV1D_GET_DISPLAY_SIZE). */
|
||||
AV1D_GET_FRAME_SIZE,
|
||||
|
||||
/** control function to get the current frame's intended display dimensions
|
||||
* (as specified in the wrapper or frame header). This may be different to
|
||||
* the decoded dimensions of this frame (see AV1D_GET_FRAME_SIZE). */
|
||||
AV1D_GET_DISPLAY_SIZE,
|
||||
|
||||
/** control function to get the bit depth of the stream. */
|
||||
AV1D_GET_BIT_DEPTH,
|
||||
|
||||
/** control function to set the byte alignment of the planes in the reference
|
||||
* buffers. Valid values are power of 2, from 32 to 1024. A value of 0 sets
|
||||
* legacy alignment. I.e. Y plane is aligned to 32 bytes, U plane directly
|
||||
* follows Y plane, and V plane directly follows U plane. Default value is 0.
|
||||
*/
|
||||
AV1_SET_BYTE_ALIGNMENT,
|
||||
|
||||
/** control function to invert the decoding order to from right to left. The
|
||||
* function is used in a test to confirm the decoding independence of tile
|
||||
* columns. The function may be used in application where this order
|
||||
* of decoding is desired.
|
||||
*
|
||||
* TODO(yaowu): Rework the unit test that uses this control, and in a future
|
||||
* release, this test-only control shall be removed.
|
||||
*/
|
||||
AV1_INVERT_TILE_DECODE_ORDER,
|
||||
|
||||
/** control function to set the skip loop filter flag. Valid values are
|
||||
* integers. The decoder will skip the loop filter when its value is set to
|
||||
* nonzero. If the loop filter is skipped the decoder may accumulate decode
|
||||
* artifacts. The default value is 0.
|
||||
*/
|
||||
AV1_SET_SKIP_LOOP_FILTER,
|
||||
|
||||
/** control function to retrieve a pointer to the Accounting struct. When
|
||||
* compiled without --enable-accounting, this returns AOM_CODEC_INCAPABLE.
|
||||
* If called before a frame has been decoded, this returns AOM_CODEC_ERROR.
|
||||
* The caller should ensure that AOM_CODEC_OK is returned before attempting
|
||||
* to dereference the Accounting pointer.
|
||||
*/
|
||||
AV1_GET_ACCOUNTING,
|
||||
|
||||
AOM_DECODER_CTRL_ID_MAX,
|
||||
|
||||
/** control function to set the range of tile decoding. A value that is
|
||||
* greater and equal to zero indicates only the specific row/column is
|
||||
* decoded. A value that is -1 indicates the whole row/column is decoded.
|
||||
* A special case is both values are -1 that means the whole frame is
|
||||
* decoded.
|
||||
*/
|
||||
AV1_SET_DECODE_TILE_ROW,
|
||||
AV1_SET_DECODE_TILE_COL
|
||||
};
|
||||
|
||||
/** Decrypt n bytes of data from input -> output, using the decrypt_state
|
||||
* passed in AOMD_SET_DECRYPTOR.
|
||||
*/
|
||||
typedef void (*aom_decrypt_cb)(void *decrypt_state, const unsigned char *input,
|
||||
unsigned char *output, int count);
|
||||
|
||||
/*!\brief Structure to hold decryption state
|
||||
*
|
||||
* Defines a structure to hold the decryption state and access function.
|
||||
*/
|
||||
typedef struct aom_decrypt_init {
|
||||
/*! Decrypt callback. */
|
||||
aom_decrypt_cb decrypt_cb;
|
||||
|
||||
/*! Decryption state. */
|
||||
void *decrypt_state;
|
||||
} aom_decrypt_init;
|
||||
|
||||
/*!\brief A deprecated alias for aom_decrypt_init.
|
||||
*/
|
||||
typedef aom_decrypt_init aom_decrypt_init;
|
||||
|
||||
/*!\cond */
|
||||
/*!\brief AOM decoder control function parameter type
|
||||
*
|
||||
* Defines the data types that AOMD control functions take. Note that
|
||||
* additional common controls are defined in aom.h
|
||||
*
|
||||
*/
|
||||
|
||||
AOM_CTRL_USE_TYPE(AOMD_GET_LAST_REF_UPDATES, int *)
|
||||
#define AOM_CTRL_AOMD_GET_LAST_REF_UPDATES
|
||||
AOM_CTRL_USE_TYPE(AOMD_GET_FRAME_CORRUPTED, int *)
|
||||
#define AOM_CTRL_AOMD_GET_FRAME_CORRUPTED
|
||||
AOM_CTRL_USE_TYPE(AOMD_GET_LAST_REF_USED, int *)
|
||||
#define AOM_CTRL_AOMD_GET_LAST_REF_USED
|
||||
AOM_CTRL_USE_TYPE(AOMD_SET_DECRYPTOR, aom_decrypt_init *)
|
||||
#define AOM_CTRL_AOMD_SET_DECRYPTOR
|
||||
// AOM_CTRL_USE_TYPE(AOMD_SET_DECRYPTOR, aom_decrypt_init *)
|
||||
//#define AOM_CTRL_AOMD_SET_DECRYPTOR
|
||||
AOM_CTRL_USE_TYPE(AV1D_GET_DISPLAY_SIZE, int *)
|
||||
#define AOM_CTRL_AV1D_GET_DISPLAY_SIZE
|
||||
AOM_CTRL_USE_TYPE(AV1D_GET_BIT_DEPTH, unsigned int *)
|
||||
#define AOM_CTRL_AV1D_GET_BIT_DEPTH
|
||||
AOM_CTRL_USE_TYPE(AV1D_GET_FRAME_SIZE, int *)
|
||||
#define AOM_CTRL_AV1D_GET_FRAME_SIZE
|
||||
AOM_CTRL_USE_TYPE(AV1_INVERT_TILE_DECODE_ORDER, int)
|
||||
#define AOM_CTRL_AV1_INVERT_TILE_DECODE_ORDER
|
||||
AOM_CTRL_USE_TYPE(AV1_GET_ACCOUNTING, Accounting **)
|
||||
#define AOM_CTRL_AV1_GET_ACCOUNTING
|
||||
AOM_CTRL_USE_TYPE(AV1_SET_DECODE_TILE_ROW, int)
|
||||
#define AOM_CTRL_AV1_SET_DECODE_TILE_ROW
|
||||
AOM_CTRL_USE_TYPE(AV1_SET_DECODE_TILE_COL, int)
|
||||
#define AOM_CTRL_AV1_SET_DECODE_TILE_COL
|
||||
/*!\endcond */
|
||||
/*! @} - end defgroup aom_decoder */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_AOMDX_H_
|
||||
@@ -1,16 +0,0 @@
|
||||
text aom_codec_build_config
|
||||
text aom_codec_control_
|
||||
text aom_codec_destroy
|
||||
text aom_codec_err_to_string
|
||||
text aom_codec_error
|
||||
text aom_codec_error_detail
|
||||
text aom_codec_get_caps
|
||||
text aom_codec_iface_name
|
||||
text aom_codec_version
|
||||
text aom_codec_version_extra_str
|
||||
text aom_codec_version_str
|
||||
text aom_img_alloc
|
||||
text aom_img_flip
|
||||
text aom_img_free
|
||||
text aom_img_set_rect
|
||||
text aom_img_wrap
|
||||
@@ -1,8 +0,0 @@
|
||||
text aom_codec_dec_init_ver
|
||||
text aom_codec_decode
|
||||
text aom_codec_get_frame
|
||||
text aom_codec_get_stream_info
|
||||
text aom_codec_peek_stream_info
|
||||
text aom_codec_register_put_frame_cb
|
||||
text aom_codec_register_put_slice_cb
|
||||
text aom_codec_set_frame_buffer_functions
|
||||
@@ -1,9 +0,0 @@
|
||||
text aom_codec_enc_config_default
|
||||
text aom_codec_enc_config_set
|
||||
text aom_codec_enc_init_multi_ver
|
||||
text aom_codec_enc_init_ver
|
||||
text aom_codec_encode
|
||||
text aom_codec_get_cx_data
|
||||
text aom_codec_get_global_headers
|
||||
text aom_codec_get_preview_frame
|
||||
text aom_codec_set_cx_data_buf
|
||||
@@ -1,465 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
/*!\file
|
||||
* \brief Describes the decoder algorithm interface for algorithm
|
||||
* implementations.
|
||||
*
|
||||
* This file defines the private structures and data types that are only
|
||||
* relevant to implementing an algorithm, as opposed to using it.
|
||||
*
|
||||
* To create a decoder algorithm class, an interface structure is put
|
||||
* into the global namespace:
|
||||
* <pre>
|
||||
* my_codec.c:
|
||||
* aom_codec_iface_t my_codec = {
|
||||
* "My Codec v1.0",
|
||||
* AOM_CODEC_ALG_ABI_VERSION,
|
||||
* ...
|
||||
* };
|
||||
* </pre>
|
||||
*
|
||||
* An application instantiates a specific decoder instance by using
|
||||
* aom_codec_init() and a pointer to the algorithm's interface structure:
|
||||
* <pre>
|
||||
* my_app.c:
|
||||
* extern aom_codec_iface_t my_codec;
|
||||
* {
|
||||
* aom_codec_ctx_t algo;
|
||||
* res = aom_codec_init(&algo, &my_codec);
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* Once initialized, the instance is manged using other functions from
|
||||
* the aom_codec_* family.
|
||||
*/
|
||||
#ifndef AOM_INTERNAL_AOM_CODEC_INTERNAL_H_
|
||||
#define AOM_INTERNAL_AOM_CODEC_INTERNAL_H_
|
||||
#include "./aom_config.h"
|
||||
#include "../aom_decoder.h"
|
||||
#include "../aom_encoder.h"
|
||||
#include <stdarg.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*!\brief Current ABI version number
|
||||
*
|
||||
* \internal
|
||||
* If this file is altered in any way that changes the ABI, this value
|
||||
* must be bumped. Examples include, but are not limited to, changing
|
||||
* types, removing or reassigning enums, adding/removing/rearranging
|
||||
* fields to structures
|
||||
*/
|
||||
#define AOM_CODEC_INTERNAL_ABI_VERSION (5) /**<\hideinitializer*/
|
||||
|
||||
typedef struct aom_codec_alg_priv aom_codec_alg_priv_t;
|
||||
typedef struct aom_codec_priv_enc_mr_cfg aom_codec_priv_enc_mr_cfg_t;
|
||||
|
||||
/*!\brief init function pointer prototype
|
||||
*
|
||||
* Performs algorithm-specific initialization of the decoder context. This
|
||||
* function is called by the generic aom_codec_init() wrapper function, so
|
||||
* plugins implementing this interface may trust the input parameters to be
|
||||
* properly initialized.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \retval #AOM_CODEC_OK
|
||||
* The input stream was recognized and decoder initialized.
|
||||
* \retval #AOM_CODEC_MEM_ERROR
|
||||
* Memory operation failed.
|
||||
*/
|
||||
typedef aom_codec_err_t (*aom_codec_init_fn_t)(
|
||||
aom_codec_ctx_t *ctx, aom_codec_priv_enc_mr_cfg_t *data);
|
||||
|
||||
/*!\brief destroy function pointer prototype
|
||||
*
|
||||
* Performs algorithm-specific destruction of the decoder context. This
|
||||
* function is called by the generic aom_codec_destroy() wrapper function,
|
||||
* so plugins implementing this interface may trust the input parameters
|
||||
* to be properly initialized.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \retval #AOM_CODEC_OK
|
||||
* The input stream was recognized and decoder initialized.
|
||||
* \retval #AOM_CODEC_MEM_ERROR
|
||||
* Memory operation failed.
|
||||
*/
|
||||
typedef aom_codec_err_t (*aom_codec_destroy_fn_t)(aom_codec_alg_priv_t *ctx);
|
||||
|
||||
/*!\brief parse stream info function pointer prototype
|
||||
*
|
||||
* Performs high level parsing of the bitstream. This function is called by the
|
||||
* generic aom_codec_peek_stream_info() wrapper function, so plugins
|
||||
* implementing this interface may trust the input parameters to be properly
|
||||
* initialized.
|
||||
*
|
||||
* \param[in] data Pointer to a block of data to parse
|
||||
* \param[in] data_sz Size of the data buffer
|
||||
* \param[in,out] si Pointer to stream info to update. The size member
|
||||
* \ref MUST be properly initialized, but \ref MAY be
|
||||
* clobbered by the algorithm. This parameter \ref MAY
|
||||
* be NULL.
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* Bitstream is parsable and stream information updated
|
||||
*/
|
||||
typedef aom_codec_err_t (*aom_codec_peek_si_fn_t)(const uint8_t *data,
|
||||
unsigned int data_sz,
|
||||
aom_codec_stream_info_t *si);
|
||||
|
||||
/*!\brief Return information about the current stream.
|
||||
*
|
||||
* Returns information about the stream that has been parsed during decoding.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in,out] si Pointer to stream info to update. The size member
|
||||
* \ref MUST be properly initialized, but \ref MAY be
|
||||
* clobbered by the algorithm. This parameter \ref MAY
|
||||
* be NULL.
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* Bitstream is parsable and stream information updated
|
||||
*/
|
||||
typedef aom_codec_err_t (*aom_codec_get_si_fn_t)(aom_codec_alg_priv_t *ctx,
|
||||
aom_codec_stream_info_t *si);
|
||||
|
||||
/*!\brief control function pointer prototype
|
||||
*
|
||||
* This function is used to exchange algorithm specific data with the decoder
|
||||
* instance. This can be used to implement features specific to a particular
|
||||
* algorithm.
|
||||
*
|
||||
* This function is called by the generic aom_codec_control() wrapper
|
||||
* function, so plugins implementing this interface may trust the input
|
||||
* parameters to be properly initialized. However, this interface does not
|
||||
* provide type safety for the exchanged data or assign meanings to the
|
||||
* control codes. Those details should be specified in the algorithm's
|
||||
* header file. In particular, the ctrl_id parameter is guaranteed to exist
|
||||
* in the algorithm's control mapping table, and the data parameter may be NULL.
|
||||
*
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in] ctrl_id Algorithm specific control identifier
|
||||
* \param[in,out] data Data to exchange with algorithm instance.
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* The internal state data was deserialized.
|
||||
*/
|
||||
typedef aom_codec_err_t (*aom_codec_control_fn_t)(aom_codec_alg_priv_t *ctx,
|
||||
va_list ap);
|
||||
|
||||
/*!\brief control function pointer mapping
|
||||
*
|
||||
* This structure stores the mapping between control identifiers and
|
||||
* implementing functions. Each algorithm provides a list of these
|
||||
* mappings. This list is searched by the aom_codec_control() wrapper
|
||||
* function to determine which function to invoke. The special
|
||||
* value {0, NULL} is used to indicate end-of-list, and must be
|
||||
* present. The special value {0, <non-null>} can be used as a catch-all
|
||||
* mapping. This implies that ctrl_id values chosen by the algorithm
|
||||
* \ref MUST be non-zero.
|
||||
*/
|
||||
typedef const struct aom_codec_ctrl_fn_map {
|
||||
int ctrl_id;
|
||||
aom_codec_control_fn_t fn;
|
||||
} aom_codec_ctrl_fn_map_t;
|
||||
|
||||
/*!\brief decode data function pointer prototype
|
||||
*
|
||||
* Processes a buffer of coded data. If the processing results in a new
|
||||
* decoded frame becoming available, #AOM_CODEC_CB_PUT_SLICE and
|
||||
* #AOM_CODEC_CB_PUT_FRAME events are generated as appropriate. This
|
||||
* function is called by the generic aom_codec_decode() wrapper function,
|
||||
* so plugins implementing this interface may trust the input parameters
|
||||
* to be properly initialized.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in] data Pointer to this block of new coded data. If
|
||||
* NULL, a #AOM_CODEC_CB_PUT_FRAME event is posted
|
||||
* for the previously decoded frame.
|
||||
* \param[in] data_sz Size of the coded data, in bytes.
|
||||
*
|
||||
* \return Returns #AOM_CODEC_OK if the coded data was processed completely
|
||||
* and future pictures can be decoded without error. Otherwise,
|
||||
* see the descriptions of the other error codes in ::aom_codec_err_t
|
||||
* for recoverability capabilities.
|
||||
*/
|
||||
typedef aom_codec_err_t (*aom_codec_decode_fn_t)(aom_codec_alg_priv_t *ctx,
|
||||
const uint8_t *data,
|
||||
unsigned int data_sz,
|
||||
void *user_priv,
|
||||
long deadline);
|
||||
|
||||
/*!\brief Decoded frames iterator
|
||||
*
|
||||
* Iterates over a list of the frames available for display. The iterator
|
||||
* storage should be initialized to NULL to start the iteration. Iteration is
|
||||
* complete when this function returns NULL.
|
||||
*
|
||||
* The list of available frames becomes valid upon completion of the
|
||||
* aom_codec_decode call, and remains valid until the next call to
|
||||
* aom_codec_decode.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in out] iter Iterator storage, initialized to NULL
|
||||
*
|
||||
* \return Returns a pointer to an image, if one is ready for display. Frames
|
||||
* produced will always be in PTS (presentation time stamp) order.
|
||||
*/
|
||||
typedef aom_image_t *(*aom_codec_get_frame_fn_t)(aom_codec_alg_priv_t *ctx,
|
||||
aom_codec_iter_t *iter);
|
||||
|
||||
/*!\brief Pass in external frame buffers for the decoder to use.
|
||||
*
|
||||
* Registers functions to be called when libaom needs a frame buffer
|
||||
* to decode the current frame and a function to be called when libaom does
|
||||
* not internally reference the frame buffer. This set function must
|
||||
* be called before the first call to decode or libaom will assume the
|
||||
* default behavior of allocating frame buffers internally.
|
||||
*
|
||||
* \param[in] ctx Pointer to this instance's context
|
||||
* \param[in] cb_get Pointer to the get callback function
|
||||
* \param[in] cb_release Pointer to the release callback function
|
||||
* \param[in] cb_priv Callback's private data
|
||||
*
|
||||
* \retval #AOM_CODEC_OK
|
||||
* External frame buffers will be used by libaom.
|
||||
* \retval #AOM_CODEC_INVALID_PARAM
|
||||
* One or more of the callbacks were NULL.
|
||||
* \retval #AOM_CODEC_ERROR
|
||||
* Decoder context not initialized, or algorithm not capable of
|
||||
* using external frame buffers.
|
||||
*
|
||||
* \note
|
||||
* When decoding AV1, the application may be required to pass in at least
|
||||
* #AOM_MAXIMUM_WORK_BUFFERS external frame
|
||||
* buffers.
|
||||
*/
|
||||
typedef aom_codec_err_t (*aom_codec_set_fb_fn_t)(
|
||||
aom_codec_alg_priv_t *ctx, aom_get_frame_buffer_cb_fn_t cb_get,
|
||||
aom_release_frame_buffer_cb_fn_t cb_release, void *cb_priv);
|
||||
|
||||
typedef aom_codec_err_t (*aom_codec_encode_fn_t)(aom_codec_alg_priv_t *ctx,
|
||||
const aom_image_t *img,
|
||||
aom_codec_pts_t pts,
|
||||
unsigned long duration,
|
||||
aom_enc_frame_flags_t flags,
|
||||
unsigned long deadline);
|
||||
typedef const aom_codec_cx_pkt_t *(*aom_codec_get_cx_data_fn_t)(
|
||||
aom_codec_alg_priv_t *ctx, aom_codec_iter_t *iter);
|
||||
|
||||
typedef aom_codec_err_t (*aom_codec_enc_config_set_fn_t)(
|
||||
aom_codec_alg_priv_t *ctx, const aom_codec_enc_cfg_t *cfg);
|
||||
typedef aom_fixed_buf_t *(*aom_codec_get_global_headers_fn_t)(
|
||||
aom_codec_alg_priv_t *ctx);
|
||||
|
||||
typedef aom_image_t *(*aom_codec_get_preview_frame_fn_t)(
|
||||
aom_codec_alg_priv_t *ctx);
|
||||
|
||||
typedef aom_codec_err_t (*aom_codec_enc_mr_get_mem_loc_fn_t)(
|
||||
const aom_codec_enc_cfg_t *cfg, void **mem_loc);
|
||||
|
||||
/*!\brief usage configuration mapping
|
||||
*
|
||||
* This structure stores the mapping between usage identifiers and
|
||||
* configuration structures. Each algorithm provides a list of these
|
||||
* mappings. This list is searched by the aom_codec_enc_config_default()
|
||||
* wrapper function to determine which config to return. The special value
|
||||
* {-1, {0}} is used to indicate end-of-list, and must be present. At least
|
||||
* one mapping must be present, in addition to the end-of-list.
|
||||
*
|
||||
*/
|
||||
typedef const struct aom_codec_enc_cfg_map {
|
||||
int usage;
|
||||
aom_codec_enc_cfg_t cfg;
|
||||
} aom_codec_enc_cfg_map_t;
|
||||
|
||||
/*!\brief Decoder algorithm interface interface
|
||||
*
|
||||
* All decoders \ref MUST expose a variable of this type.
|
||||
*/
|
||||
struct aom_codec_iface {
|
||||
const char *name; /**< Identification String */
|
||||
int abi_version; /**< Implemented ABI version */
|
||||
aom_codec_caps_t caps; /**< Decoder capabilities */
|
||||
aom_codec_init_fn_t init; /**< \copydoc ::aom_codec_init_fn_t */
|
||||
aom_codec_destroy_fn_t destroy; /**< \copydoc ::aom_codec_destroy_fn_t */
|
||||
aom_codec_ctrl_fn_map_t *ctrl_maps; /**< \copydoc ::aom_codec_ctrl_fn_map_t */
|
||||
struct aom_codec_dec_iface {
|
||||
aom_codec_peek_si_fn_t peek_si; /**< \copydoc ::aom_codec_peek_si_fn_t */
|
||||
aom_codec_get_si_fn_t get_si; /**< \copydoc ::aom_codec_get_si_fn_t */
|
||||
aom_codec_decode_fn_t decode; /**< \copydoc ::aom_codec_decode_fn_t */
|
||||
aom_codec_get_frame_fn_t
|
||||
get_frame; /**< \copydoc ::aom_codec_get_frame_fn_t */
|
||||
aom_codec_set_fb_fn_t set_fb_fn; /**< \copydoc ::aom_codec_set_fb_fn_t */
|
||||
} dec;
|
||||
struct aom_codec_enc_iface {
|
||||
int cfg_map_count;
|
||||
aom_codec_enc_cfg_map_t
|
||||
*cfg_maps; /**< \copydoc ::aom_codec_enc_cfg_map_t */
|
||||
aom_codec_encode_fn_t encode; /**< \copydoc ::aom_codec_encode_fn_t */
|
||||
aom_codec_get_cx_data_fn_t
|
||||
get_cx_data; /**< \copydoc ::aom_codec_get_cx_data_fn_t */
|
||||
aom_codec_enc_config_set_fn_t
|
||||
cfg_set; /**< \copydoc ::aom_codec_enc_config_set_fn_t */
|
||||
aom_codec_get_global_headers_fn_t
|
||||
get_glob_hdrs; /**< \copydoc ::aom_codec_get_global_headers_fn_t */
|
||||
aom_codec_get_preview_frame_fn_t
|
||||
get_preview; /**< \copydoc ::aom_codec_get_preview_frame_fn_t */
|
||||
aom_codec_enc_mr_get_mem_loc_fn_t
|
||||
mr_get_mem_loc; /**< \copydoc ::aom_codec_enc_mr_get_mem_loc_fn_t */
|
||||
} enc;
|
||||
};
|
||||
|
||||
/*!\brief Callback function pointer / user data pair storage */
|
||||
typedef struct aom_codec_priv_cb_pair {
|
||||
union {
|
||||
aom_codec_put_frame_cb_fn_t put_frame;
|
||||
aom_codec_put_slice_cb_fn_t put_slice;
|
||||
} u;
|
||||
void *user_priv;
|
||||
} aom_codec_priv_cb_pair_t;
|
||||
|
||||
/*!\brief Instance private storage
|
||||
*
|
||||
* This structure is allocated by the algorithm's init function. It can be
|
||||
* extended in one of two ways. First, a second, algorithm specific structure
|
||||
* can be allocated and the priv member pointed to it. Alternatively, this
|
||||
* structure can be made the first member of the algorithm specific structure,
|
||||
* and the pointer cast to the proper type.
|
||||
*/
|
||||
struct aom_codec_priv {
|
||||
const char *err_detail;
|
||||
aom_codec_flags_t init_flags;
|
||||
struct {
|
||||
aom_codec_priv_cb_pair_t put_frame_cb;
|
||||
aom_codec_priv_cb_pair_t put_slice_cb;
|
||||
} dec;
|
||||
struct {
|
||||
aom_fixed_buf_t cx_data_dst_buf;
|
||||
unsigned int cx_data_pad_before;
|
||||
unsigned int cx_data_pad_after;
|
||||
aom_codec_cx_pkt_t cx_data_pkt;
|
||||
unsigned int total_encoders;
|
||||
} enc;
|
||||
};
|
||||
|
||||
/*
|
||||
* Multi-resolution encoding internal configuration
|
||||
*/
|
||||
struct aom_codec_priv_enc_mr_cfg {
|
||||
unsigned int mr_total_resolutions;
|
||||
unsigned int mr_encoder_id;
|
||||
struct aom_rational mr_down_sampling_factor;
|
||||
void *mr_low_res_mode_info;
|
||||
};
|
||||
|
||||
#undef AOM_CTRL_USE_TYPE
|
||||
#define AOM_CTRL_USE_TYPE(id, typ) \
|
||||
static AOM_INLINE typ id##__value(va_list args) { return va_arg(args, typ); }
|
||||
|
||||
#undef AOM_CTRL_USE_TYPE_DEPRECATED
|
||||
#define AOM_CTRL_USE_TYPE_DEPRECATED(id, typ) \
|
||||
static AOM_INLINE typ id##__value(va_list args) { return va_arg(args, typ); }
|
||||
|
||||
#define CAST(id, arg) id##__value(arg)
|
||||
|
||||
/* CODEC_INTERFACE convenience macro
|
||||
*
|
||||
* By convention, each codec interface is a struct with extern linkage, where
|
||||
* the symbol is suffixed with _algo. A getter function is also defined to
|
||||
* return a pointer to the struct, since in some cases it's easier to work
|
||||
* with text symbols than data symbols (see issue #169). This function has
|
||||
* the same name as the struct, less the _algo suffix. The CODEC_INTERFACE
|
||||
* macro is provided to define this getter function automatically.
|
||||
*/
|
||||
#define CODEC_INTERFACE(id) \
|
||||
aom_codec_iface_t *id(void) { return &id##_algo; } \
|
||||
aom_codec_iface_t id##_algo
|
||||
|
||||
/* Internal Utility Functions
|
||||
*
|
||||
* The following functions are intended to be used inside algorithms as
|
||||
* utilities for manipulating aom_codec_* data structures.
|
||||
*/
|
||||
struct aom_codec_pkt_list {
|
||||
unsigned int cnt;
|
||||
unsigned int max;
|
||||
struct aom_codec_cx_pkt pkts[1];
|
||||
};
|
||||
|
||||
#define aom_codec_pkt_list_decl(n) \
|
||||
union { \
|
||||
struct aom_codec_pkt_list head; \
|
||||
struct { \
|
||||
struct aom_codec_pkt_list head; \
|
||||
struct aom_codec_cx_pkt pkts[n]; \
|
||||
} alloc; \
|
||||
}
|
||||
|
||||
#define aom_codec_pkt_list_init(m) \
|
||||
(m)->alloc.head.cnt = 0, \
|
||||
(m)->alloc.head.max = sizeof((m)->alloc.pkts) / sizeof((m)->alloc.pkts[0])
|
||||
|
||||
int aom_codec_pkt_list_add(struct aom_codec_pkt_list *,
|
||||
const struct aom_codec_cx_pkt *);
|
||||
|
||||
const aom_codec_cx_pkt_t *aom_codec_pkt_list_get(
|
||||
struct aom_codec_pkt_list *list, aom_codec_iter_t *iter);
|
||||
|
||||
#include <stdio.h>
|
||||
#include <setjmp.h>
|
||||
|
||||
struct aom_internal_error_info {
|
||||
aom_codec_err_t error_code;
|
||||
int has_detail;
|
||||
char detail[80];
|
||||
int setjmp;
|
||||
jmp_buf jmp;
|
||||
};
|
||||
|
||||
#define CLANG_ANALYZER_NORETURN
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(attribute_analyzer_noreturn)
|
||||
#undef CLANG_ANALYZER_NORETURN
|
||||
#define CLANG_ANALYZER_NORETURN __attribute__((analyzer_noreturn))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void aom_internal_error(struct aom_internal_error_info *info,
|
||||
aom_codec_err_t error, const char *fmt,
|
||||
...) CLANG_ANALYZER_NORETURN;
|
||||
|
||||
#if CONFIG_DEBUG
|
||||
#define AOM_CHECK_MEM_ERROR(error_info, lval, expr) \
|
||||
do { \
|
||||
lval = (expr); \
|
||||
if (!lval) \
|
||||
aom_internal_error(error_info, AOM_CODEC_MEM_ERROR, \
|
||||
"Failed to allocate " #lval " at %s:%d", __FILE__, \
|
||||
__LINE__); \
|
||||
} while (0)
|
||||
#else
|
||||
#define AOM_CHECK_MEM_ERROR(error_info, lval, expr) \
|
||||
do { \
|
||||
lval = (expr); \
|
||||
if (!lval) \
|
||||
aom_internal_error(error_info, AOM_CODEC_MEM_ERROR, \
|
||||
"Failed to allocate " #lval); \
|
||||
} while (0)
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_INTERNAL_AOM_CODEC_INTERNAL_H_
|
||||
@@ -1,134 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
/*!\file
|
||||
* \brief Provides the high level interface to wrap decoder algorithms.
|
||||
*
|
||||
*/
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom/internal/aom_codec_internal.h"
|
||||
#include "aom_version.h"
|
||||
|
||||
#define SAVE_STATUS(ctx, var) (ctx ? (ctx->err = var) : var)
|
||||
|
||||
int aom_codec_version(void) { return VERSION_PACKED; }
|
||||
|
||||
const char *aom_codec_version_str(void) { return VERSION_STRING_NOSP; }
|
||||
|
||||
const char *aom_codec_version_extra_str(void) { return VERSION_EXTRA; }
|
||||
|
||||
const char *aom_codec_iface_name(aom_codec_iface_t *iface) {
|
||||
return iface ? iface->name : "<invalid interface>";
|
||||
}
|
||||
|
||||
const char *aom_codec_err_to_string(aom_codec_err_t err) {
|
||||
switch (err) {
|
||||
case AOM_CODEC_OK: return "Success";
|
||||
case AOM_CODEC_ERROR: return "Unspecified internal error";
|
||||
case AOM_CODEC_MEM_ERROR: return "Memory allocation error";
|
||||
case AOM_CODEC_ABI_MISMATCH: return "ABI version mismatch";
|
||||
case AOM_CODEC_INCAPABLE:
|
||||
return "Codec does not implement requested capability";
|
||||
case AOM_CODEC_UNSUP_BITSTREAM:
|
||||
return "Bitstream not supported by this decoder";
|
||||
case AOM_CODEC_UNSUP_FEATURE:
|
||||
return "Bitstream required feature not supported by this decoder";
|
||||
case AOM_CODEC_CORRUPT_FRAME: return "Corrupt frame detected";
|
||||
case AOM_CODEC_INVALID_PARAM: return "Invalid parameter";
|
||||
case AOM_CODEC_LIST_END: return "End of iterated list";
|
||||
}
|
||||
|
||||
return "Unrecognized error code";
|
||||
}
|
||||
|
||||
const char *aom_codec_error(aom_codec_ctx_t *ctx) {
|
||||
return (ctx) ? aom_codec_err_to_string(ctx->err)
|
||||
: aom_codec_err_to_string(AOM_CODEC_INVALID_PARAM);
|
||||
}
|
||||
|
||||
const char *aom_codec_error_detail(aom_codec_ctx_t *ctx) {
|
||||
if (ctx && ctx->err)
|
||||
return ctx->priv ? ctx->priv->err_detail : ctx->err_detail;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx) {
|
||||
aom_codec_err_t res;
|
||||
|
||||
if (!ctx)
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else if (!ctx->iface || !ctx->priv)
|
||||
res = AOM_CODEC_ERROR;
|
||||
else {
|
||||
ctx->iface->destroy((aom_codec_alg_priv_t *)ctx->priv);
|
||||
|
||||
ctx->iface = NULL;
|
||||
ctx->name = NULL;
|
||||
ctx->priv = NULL;
|
||||
res = AOM_CODEC_OK;
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
aom_codec_caps_t aom_codec_get_caps(aom_codec_iface_t *iface) {
|
||||
return (iface) ? iface->caps : 0;
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_control_(aom_codec_ctx_t *ctx, int ctrl_id, ...) {
|
||||
aom_codec_err_t res;
|
||||
|
||||
if (!ctx || !ctrl_id)
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else if (!ctx->iface || !ctx->priv || !ctx->iface->ctrl_maps)
|
||||
res = AOM_CODEC_ERROR;
|
||||
else {
|
||||
aom_codec_ctrl_fn_map_t *entry;
|
||||
|
||||
res = AOM_CODEC_ERROR;
|
||||
|
||||
for (entry = ctx->iface->ctrl_maps; entry && entry->fn; entry++) {
|
||||
if (!entry->ctrl_id || entry->ctrl_id == ctrl_id) {
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, ctrl_id);
|
||||
res = entry->fn((aom_codec_alg_priv_t *)ctx->priv, ap);
|
||||
va_end(ap);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
void aom_internal_error(struct aom_internal_error_info *info,
|
||||
aom_codec_err_t error, const char *fmt, ...) {
|
||||
va_list ap;
|
||||
|
||||
info->error_code = error;
|
||||
info->has_detail = 0;
|
||||
|
||||
if (fmt) {
|
||||
size_t sz = sizeof(info->detail);
|
||||
|
||||
info->has_detail = 1;
|
||||
va_start(ap, fmt);
|
||||
vsnprintf(info->detail, sz - 1, fmt, ap);
|
||||
va_end(ap);
|
||||
info->detail[sz - 1] = '\0';
|
||||
}
|
||||
|
||||
if (info->setjmp) longjmp(info->jmp, info->error_code);
|
||||
}
|
||||
@@ -1,189 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
/*!\file
|
||||
* \brief Provides the high level interface to wrap decoder algorithms.
|
||||
*
|
||||
*/
|
||||
#include <string.h>
|
||||
#include "aom/internal/aom_codec_internal.h"
|
||||
|
||||
#define SAVE_STATUS(ctx, var) (ctx ? (ctx->err = var) : var)
|
||||
|
||||
static aom_codec_alg_priv_t *get_alg_priv(aom_codec_ctx_t *ctx) {
|
||||
return (aom_codec_alg_priv_t *)ctx->priv;
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_dec_init_ver(aom_codec_ctx_t *ctx,
|
||||
aom_codec_iface_t *iface,
|
||||
const aom_codec_dec_cfg_t *cfg,
|
||||
aom_codec_flags_t flags, int ver) {
|
||||
aom_codec_err_t res;
|
||||
|
||||
if (ver != AOM_DECODER_ABI_VERSION)
|
||||
res = AOM_CODEC_ABI_MISMATCH;
|
||||
else if (!ctx || !iface)
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else if (iface->abi_version != AOM_CODEC_INTERNAL_ABI_VERSION)
|
||||
res = AOM_CODEC_ABI_MISMATCH;
|
||||
else if ((flags & AOM_CODEC_USE_POSTPROC) &&
|
||||
!(iface->caps & AOM_CODEC_CAP_POSTPROC))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else if ((flags & AOM_CODEC_USE_ERROR_CONCEALMENT) &&
|
||||
!(iface->caps & AOM_CODEC_CAP_ERROR_CONCEALMENT))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else if ((flags & AOM_CODEC_USE_INPUT_FRAGMENTS) &&
|
||||
!(iface->caps & AOM_CODEC_CAP_INPUT_FRAGMENTS))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else if (!(iface->caps & AOM_CODEC_CAP_DECODER))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else {
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
ctx->iface = iface;
|
||||
ctx->name = iface->name;
|
||||
ctx->priv = NULL;
|
||||
ctx->init_flags = flags;
|
||||
ctx->config.dec = cfg;
|
||||
|
||||
res = ctx->iface->init(ctx, NULL);
|
||||
if (res) {
|
||||
ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
|
||||
aom_codec_destroy(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_peek_stream_info(aom_codec_iface_t *iface,
|
||||
const uint8_t *data,
|
||||
unsigned int data_sz,
|
||||
aom_codec_stream_info_t *si) {
|
||||
aom_codec_err_t res;
|
||||
|
||||
if (!iface || !data || !data_sz || !si ||
|
||||
si->sz < sizeof(aom_codec_stream_info_t))
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else {
|
||||
/* Set default/unknown values */
|
||||
si->w = 0;
|
||||
si->h = 0;
|
||||
|
||||
res = iface->dec.peek_si(data, data_sz, si);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_get_stream_info(aom_codec_ctx_t *ctx,
|
||||
aom_codec_stream_info_t *si) {
|
||||
aom_codec_err_t res;
|
||||
|
||||
if (!ctx || !si || si->sz < sizeof(aom_codec_stream_info_t))
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else if (!ctx->iface || !ctx->priv)
|
||||
res = AOM_CODEC_ERROR;
|
||||
else {
|
||||
/* Set default/unknown values */
|
||||
si->w = 0;
|
||||
si->h = 0;
|
||||
|
||||
res = ctx->iface->dec.get_si(get_alg_priv(ctx), si);
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data,
|
||||
unsigned int data_sz, void *user_priv,
|
||||
long deadline) {
|
||||
aom_codec_err_t res;
|
||||
|
||||
/* Sanity checks */
|
||||
/* NULL data ptr allowed if data_sz is 0 too */
|
||||
if (!ctx || (!data && data_sz) || (data && !data_sz))
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else if (!ctx->iface || !ctx->priv)
|
||||
res = AOM_CODEC_ERROR;
|
||||
else {
|
||||
res = ctx->iface->dec.decode(get_alg_priv(ctx), data, data_sz, user_priv,
|
||||
deadline);
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
aom_image_t *aom_codec_get_frame(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter) {
|
||||
aom_image_t *img;
|
||||
|
||||
if (!ctx || !iter || !ctx->iface || !ctx->priv)
|
||||
img = NULL;
|
||||
else
|
||||
img = ctx->iface->dec.get_frame(get_alg_priv(ctx), iter);
|
||||
|
||||
return img;
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_register_put_frame_cb(aom_codec_ctx_t *ctx,
|
||||
aom_codec_put_frame_cb_fn_t cb,
|
||||
void *user_priv) {
|
||||
aom_codec_err_t res;
|
||||
|
||||
if (!ctx || !cb)
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else if (!ctx->iface || !ctx->priv ||
|
||||
!(ctx->iface->caps & AOM_CODEC_CAP_PUT_FRAME))
|
||||
res = AOM_CODEC_ERROR;
|
||||
else {
|
||||
ctx->priv->dec.put_frame_cb.u.put_frame = cb;
|
||||
ctx->priv->dec.put_frame_cb.user_priv = user_priv;
|
||||
res = AOM_CODEC_OK;
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_register_put_slice_cb(aom_codec_ctx_t *ctx,
|
||||
aom_codec_put_slice_cb_fn_t cb,
|
||||
void *user_priv) {
|
||||
aom_codec_err_t res;
|
||||
|
||||
if (!ctx || !cb)
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else if (!ctx->iface || !ctx->priv ||
|
||||
!(ctx->iface->caps & AOM_CODEC_CAP_PUT_SLICE))
|
||||
res = AOM_CODEC_ERROR;
|
||||
else {
|
||||
ctx->priv->dec.put_slice_cb.u.put_slice = cb;
|
||||
ctx->priv->dec.put_slice_cb.user_priv = user_priv;
|
||||
res = AOM_CODEC_OK;
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_set_frame_buffer_functions(
|
||||
aom_codec_ctx_t *ctx, aom_get_frame_buffer_cb_fn_t cb_get,
|
||||
aom_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) {
|
||||
aom_codec_err_t res;
|
||||
|
||||
if (!ctx || !cb_get || !cb_release) {
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
} else if (!ctx->iface || !ctx->priv ||
|
||||
!(ctx->iface->caps & AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER)) {
|
||||
res = AOM_CODEC_ERROR;
|
||||
} else {
|
||||
res = ctx->iface->dec.set_fb_fn(get_alg_priv(ctx), cb_get, cb_release,
|
||||
cb_priv);
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
@@ -1,380 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
/*!\file
|
||||
* \brief Provides the high level interface to wrap encoder algorithms.
|
||||
*
|
||||
*/
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
#include "aom_config.h"
|
||||
#include "aom/internal/aom_codec_internal.h"
|
||||
|
||||
#define SAVE_STATUS(ctx, var) (ctx ? (ctx->err = var) : var)
|
||||
|
||||
static aom_codec_alg_priv_t *get_alg_priv(aom_codec_ctx_t *ctx) {
|
||||
return (aom_codec_alg_priv_t *)ctx->priv;
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_enc_init_ver(aom_codec_ctx_t *ctx,
|
||||
aom_codec_iface_t *iface,
|
||||
const aom_codec_enc_cfg_t *cfg,
|
||||
aom_codec_flags_t flags, int ver) {
|
||||
aom_codec_err_t res;
|
||||
|
||||
if (ver != AOM_ENCODER_ABI_VERSION)
|
||||
res = AOM_CODEC_ABI_MISMATCH;
|
||||
else if (!ctx || !iface || !cfg)
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else if (iface->abi_version != AOM_CODEC_INTERNAL_ABI_VERSION)
|
||||
res = AOM_CODEC_ABI_MISMATCH;
|
||||
else if (!(iface->caps & AOM_CODEC_CAP_ENCODER))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else if ((flags & AOM_CODEC_USE_PSNR) && !(iface->caps & AOM_CODEC_CAP_PSNR))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else if ((flags & AOM_CODEC_USE_OUTPUT_PARTITION) &&
|
||||
!(iface->caps & AOM_CODEC_CAP_OUTPUT_PARTITION))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else {
|
||||
ctx->iface = iface;
|
||||
ctx->name = iface->name;
|
||||
ctx->priv = NULL;
|
||||
ctx->init_flags = flags;
|
||||
ctx->config.enc = cfg;
|
||||
res = ctx->iface->init(ctx, NULL);
|
||||
|
||||
if (res) {
|
||||
ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
|
||||
aom_codec_destroy(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_enc_init_multi_ver(
|
||||
aom_codec_ctx_t *ctx, aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg,
|
||||
int num_enc, aom_codec_flags_t flags, aom_rational_t *dsf, int ver) {
|
||||
aom_codec_err_t res = AOM_CODEC_OK;
|
||||
|
||||
if (ver != AOM_ENCODER_ABI_VERSION)
|
||||
res = AOM_CODEC_ABI_MISMATCH;
|
||||
else if (!ctx || !iface || !cfg || (num_enc > 16 || num_enc < 1))
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else if (iface->abi_version != AOM_CODEC_INTERNAL_ABI_VERSION)
|
||||
res = AOM_CODEC_ABI_MISMATCH;
|
||||
else if (!(iface->caps & AOM_CODEC_CAP_ENCODER))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else if ((flags & AOM_CODEC_USE_PSNR) && !(iface->caps & AOM_CODEC_CAP_PSNR))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else if ((flags & AOM_CODEC_USE_OUTPUT_PARTITION) &&
|
||||
!(iface->caps & AOM_CODEC_CAP_OUTPUT_PARTITION))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else {
|
||||
int i;
|
||||
void *mem_loc = NULL;
|
||||
|
||||
if (!(res = iface->enc.mr_get_mem_loc(cfg, &mem_loc))) {
|
||||
for (i = 0; i < num_enc; i++) {
|
||||
aom_codec_priv_enc_mr_cfg_t mr_cfg;
|
||||
|
||||
/* Validate down-sampling factor. */
|
||||
if (dsf->num < 1 || dsf->num > 4096 || dsf->den < 1 ||
|
||||
dsf->den > dsf->num) {
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
break;
|
||||
}
|
||||
|
||||
mr_cfg.mr_low_res_mode_info = mem_loc;
|
||||
mr_cfg.mr_total_resolutions = num_enc;
|
||||
mr_cfg.mr_encoder_id = num_enc - 1 - i;
|
||||
mr_cfg.mr_down_sampling_factor.num = dsf->num;
|
||||
mr_cfg.mr_down_sampling_factor.den = dsf->den;
|
||||
|
||||
/* Force Key-frame synchronization. Namely, encoder at higher
|
||||
* resolution always use the same frame_type chosen by the
|
||||
* lowest-resolution encoder.
|
||||
*/
|
||||
if (mr_cfg.mr_encoder_id) cfg->kf_mode = AOM_KF_DISABLED;
|
||||
|
||||
ctx->iface = iface;
|
||||
ctx->name = iface->name;
|
||||
ctx->priv = NULL;
|
||||
ctx->init_flags = flags;
|
||||
ctx->config.enc = cfg;
|
||||
res = ctx->iface->init(ctx, &mr_cfg);
|
||||
|
||||
if (res) {
|
||||
const char *error_detail = ctx->priv ? ctx->priv->err_detail : NULL;
|
||||
/* Destroy current ctx */
|
||||
ctx->err_detail = error_detail;
|
||||
aom_codec_destroy(ctx);
|
||||
|
||||
/* Destroy already allocated high-level ctx */
|
||||
while (i) {
|
||||
ctx--;
|
||||
ctx->err_detail = error_detail;
|
||||
aom_codec_destroy(ctx);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
if (res) break;
|
||||
|
||||
ctx++;
|
||||
cfg++;
|
||||
dsf++;
|
||||
}
|
||||
ctx--;
|
||||
}
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface,
|
||||
aom_codec_enc_cfg_t *cfg,
|
||||
unsigned int usage) {
|
||||
aom_codec_err_t res;
|
||||
aom_codec_enc_cfg_map_t *map;
|
||||
int i;
|
||||
|
||||
if (!iface || !cfg || usage > INT_MAX)
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else if (!(iface->caps & AOM_CODEC_CAP_ENCODER))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else {
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
|
||||
for (i = 0; i < iface->enc.cfg_map_count; ++i) {
|
||||
map = iface->enc.cfg_maps + i;
|
||||
if (map->usage == (int)usage) {
|
||||
*cfg = map->cfg;
|
||||
cfg->g_usage = usage;
|
||||
res = AOM_CODEC_OK;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
/* On X86, disable the x87 unit's internal 80 bit precision for better
|
||||
* consistency with the SSE unit's 64 bit precision.
|
||||
*/
|
||||
#include "aom_ports/x86.h"
|
||||
#define FLOATING_POINT_INIT() \
|
||||
do { \
|
||||
unsigned short x87_orig_mode = x87_set_double_precision();
|
||||
#define FLOATING_POINT_RESTORE() \
|
||||
x87_set_control_word(x87_orig_mode); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
#else
|
||||
static void FLOATING_POINT_INIT() {}
|
||||
static void FLOATING_POINT_RESTORE() {}
|
||||
#endif
|
||||
|
||||
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img,
|
||||
aom_codec_pts_t pts, unsigned long duration,
|
||||
aom_enc_frame_flags_t flags,
|
||||
unsigned long deadline) {
|
||||
aom_codec_err_t res = AOM_CODEC_OK;
|
||||
|
||||
if (!ctx || (img && !duration))
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else if (!ctx->iface || !ctx->priv)
|
||||
res = AOM_CODEC_ERROR;
|
||||
else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else {
|
||||
unsigned int num_enc = ctx->priv->enc.total_encoders;
|
||||
|
||||
/* Execute in a normalized floating point environment, if the platform
|
||||
* requires it.
|
||||
*/
|
||||
FLOATING_POINT_INIT();
|
||||
|
||||
if (num_enc == 1)
|
||||
res = ctx->iface->enc.encode(get_alg_priv(ctx), img, pts, duration, flags,
|
||||
deadline);
|
||||
else {
|
||||
/* Multi-resolution encoding:
|
||||
* Encode multi-levels in reverse order. For example,
|
||||
* if mr_total_resolutions = 3, first encode level 2,
|
||||
* then encode level 1, and finally encode level 0.
|
||||
*/
|
||||
int i;
|
||||
|
||||
ctx += num_enc - 1;
|
||||
if (img) img += num_enc - 1;
|
||||
|
||||
for (i = num_enc - 1; i >= 0; i--) {
|
||||
if ((res = ctx->iface->enc.encode(get_alg_priv(ctx), img, pts, duration,
|
||||
flags, deadline)))
|
||||
break;
|
||||
|
||||
ctx--;
|
||||
if (img) img--;
|
||||
}
|
||||
ctx++;
|
||||
}
|
||||
|
||||
FLOATING_POINT_RESTORE();
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
const aom_codec_cx_pkt_t *aom_codec_get_cx_data(aom_codec_ctx_t *ctx,
|
||||
aom_codec_iter_t *iter) {
|
||||
const aom_codec_cx_pkt_t *pkt = NULL;
|
||||
|
||||
if (ctx) {
|
||||
if (!iter)
|
||||
ctx->err = AOM_CODEC_INVALID_PARAM;
|
||||
else if (!ctx->iface || !ctx->priv)
|
||||
ctx->err = AOM_CODEC_ERROR;
|
||||
else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
|
||||
ctx->err = AOM_CODEC_INCAPABLE;
|
||||
else
|
||||
pkt = ctx->iface->enc.get_cx_data(get_alg_priv(ctx), iter);
|
||||
}
|
||||
|
||||
if (pkt && pkt->kind == AOM_CODEC_CX_FRAME_PKT) {
|
||||
// If the application has specified a destination area for the
|
||||
// compressed data, and the codec has not placed the data there,
|
||||
// and it fits, copy it.
|
||||
aom_codec_priv_t *const priv = ctx->priv;
|
||||
char *const dst_buf = (char *)priv->enc.cx_data_dst_buf.buf;
|
||||
|
||||
if (dst_buf && pkt->data.raw.buf != dst_buf &&
|
||||
pkt->data.raw.sz + priv->enc.cx_data_pad_before +
|
||||
priv->enc.cx_data_pad_after <=
|
||||
priv->enc.cx_data_dst_buf.sz) {
|
||||
aom_codec_cx_pkt_t *modified_pkt = &priv->enc.cx_data_pkt;
|
||||
|
||||
memcpy(dst_buf + priv->enc.cx_data_pad_before, pkt->data.raw.buf,
|
||||
pkt->data.raw.sz);
|
||||
*modified_pkt = *pkt;
|
||||
modified_pkt->data.raw.buf = dst_buf;
|
||||
modified_pkt->data.raw.sz +=
|
||||
priv->enc.cx_data_pad_before + priv->enc.cx_data_pad_after;
|
||||
pkt = modified_pkt;
|
||||
}
|
||||
|
||||
if (dst_buf == pkt->data.raw.buf) {
|
||||
priv->enc.cx_data_dst_buf.buf = dst_buf + pkt->data.raw.sz;
|
||||
priv->enc.cx_data_dst_buf.sz -= pkt->data.raw.sz;
|
||||
}
|
||||
}
|
||||
|
||||
return pkt;
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_set_cx_data_buf(aom_codec_ctx_t *ctx,
|
||||
const aom_fixed_buf_t *buf,
|
||||
unsigned int pad_before,
|
||||
unsigned int pad_after) {
|
||||
if (!ctx || !ctx->priv) return AOM_CODEC_INVALID_PARAM;
|
||||
|
||||
if (buf) {
|
||||
ctx->priv->enc.cx_data_dst_buf = *buf;
|
||||
ctx->priv->enc.cx_data_pad_before = pad_before;
|
||||
ctx->priv->enc.cx_data_pad_after = pad_after;
|
||||
} else {
|
||||
ctx->priv->enc.cx_data_dst_buf.buf = NULL;
|
||||
ctx->priv->enc.cx_data_dst_buf.sz = 0;
|
||||
ctx->priv->enc.cx_data_pad_before = 0;
|
||||
ctx->priv->enc.cx_data_pad_after = 0;
|
||||
}
|
||||
|
||||
return AOM_CODEC_OK;
|
||||
}
|
||||
|
||||
const aom_image_t *aom_codec_get_preview_frame(aom_codec_ctx_t *ctx) {
|
||||
aom_image_t *img = NULL;
|
||||
|
||||
if (ctx) {
|
||||
if (!ctx->iface || !ctx->priv)
|
||||
ctx->err = AOM_CODEC_ERROR;
|
||||
else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
|
||||
ctx->err = AOM_CODEC_INCAPABLE;
|
||||
else if (!ctx->iface->enc.get_preview)
|
||||
ctx->err = AOM_CODEC_INCAPABLE;
|
||||
else
|
||||
img = ctx->iface->enc.get_preview(get_alg_priv(ctx));
|
||||
}
|
||||
|
||||
return img;
|
||||
}
|
||||
|
||||
aom_fixed_buf_t *aom_codec_get_global_headers(aom_codec_ctx_t *ctx) {
|
||||
aom_fixed_buf_t *buf = NULL;
|
||||
|
||||
if (ctx) {
|
||||
if (!ctx->iface || !ctx->priv)
|
||||
ctx->err = AOM_CODEC_ERROR;
|
||||
else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
|
||||
ctx->err = AOM_CODEC_INCAPABLE;
|
||||
else if (!ctx->iface->enc.get_glob_hdrs)
|
||||
ctx->err = AOM_CODEC_INCAPABLE;
|
||||
else
|
||||
buf = ctx->iface->enc.get_glob_hdrs(get_alg_priv(ctx));
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
aom_codec_err_t aom_codec_enc_config_set(aom_codec_ctx_t *ctx,
|
||||
const aom_codec_enc_cfg_t *cfg) {
|
||||
aom_codec_err_t res;
|
||||
|
||||
if (!ctx || !ctx->iface || !ctx->priv || !cfg)
|
||||
res = AOM_CODEC_INVALID_PARAM;
|
||||
else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
|
||||
res = AOM_CODEC_INCAPABLE;
|
||||
else
|
||||
res = ctx->iface->enc.cfg_set(get_alg_priv(ctx), cfg);
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
int aom_codec_pkt_list_add(struct aom_codec_pkt_list *list,
|
||||
const struct aom_codec_cx_pkt *pkt) {
|
||||
if (list->cnt < list->max) {
|
||||
list->pkts[list->cnt++] = *pkt;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
const aom_codec_cx_pkt_t *aom_codec_pkt_list_get(
|
||||
struct aom_codec_pkt_list *list, aom_codec_iter_t *iter) {
|
||||
const aom_codec_cx_pkt_t *pkt;
|
||||
|
||||
if (!(*iter)) {
|
||||
*iter = list->pkts;
|
||||
}
|
||||
|
||||
pkt = (const aom_codec_cx_pkt_t *)*iter;
|
||||
|
||||
if ((size_t)(pkt - list->pkts) < list->cnt)
|
||||
*iter = pkt + 1;
|
||||
else
|
||||
pkt = NULL;
|
||||
|
||||
return pkt;
|
||||
}
|
||||
@@ -1,240 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "aom/aom_image.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_mem/aom_mem.h"
|
||||
|
||||
static aom_image_t *img_alloc_helper(aom_image_t *img, aom_img_fmt_t fmt,
|
||||
unsigned int d_w, unsigned int d_h,
|
||||
unsigned int buf_align,
|
||||
unsigned int stride_align,
|
||||
unsigned char *img_data) {
|
||||
unsigned int h, w, s, xcs, ycs, bps;
|
||||
unsigned int stride_in_bytes;
|
||||
int align;
|
||||
|
||||
/* Treat align==0 like align==1 */
|
||||
if (!buf_align) buf_align = 1;
|
||||
|
||||
/* Validate alignment (must be power of 2) */
|
||||
if (buf_align & (buf_align - 1)) goto fail;
|
||||
|
||||
/* Treat align==0 like align==1 */
|
||||
if (!stride_align) stride_align = 1;
|
||||
|
||||
/* Validate alignment (must be power of 2) */
|
||||
if (stride_align & (stride_align - 1)) goto fail;
|
||||
|
||||
/* Get sample size for this format */
|
||||
switch (fmt) {
|
||||
case AOM_IMG_FMT_RGB32:
|
||||
case AOM_IMG_FMT_RGB32_LE:
|
||||
case AOM_IMG_FMT_ARGB:
|
||||
case AOM_IMG_FMT_ARGB_LE: bps = 32; break;
|
||||
case AOM_IMG_FMT_RGB24:
|
||||
case AOM_IMG_FMT_BGR24: bps = 24; break;
|
||||
case AOM_IMG_FMT_RGB565:
|
||||
case AOM_IMG_FMT_RGB565_LE:
|
||||
case AOM_IMG_FMT_RGB555:
|
||||
case AOM_IMG_FMT_RGB555_LE:
|
||||
case AOM_IMG_FMT_UYVY:
|
||||
case AOM_IMG_FMT_YUY2:
|
||||
case AOM_IMG_FMT_YVYU: bps = 16; break;
|
||||
case AOM_IMG_FMT_I420:
|
||||
case AOM_IMG_FMT_YV12:
|
||||
case AOM_IMG_FMT_AOMI420:
|
||||
case AOM_IMG_FMT_AOMYV12: bps = 12; break;
|
||||
case AOM_IMG_FMT_I422:
|
||||
case AOM_IMG_FMT_I440: bps = 16; break;
|
||||
case AOM_IMG_FMT_I444: bps = 24; break;
|
||||
case AOM_IMG_FMT_I42016: bps = 24; break;
|
||||
case AOM_IMG_FMT_I42216:
|
||||
case AOM_IMG_FMT_I44016: bps = 32; break;
|
||||
case AOM_IMG_FMT_I44416: bps = 48; break;
|
||||
default: bps = 16; break;
|
||||
}
|
||||
|
||||
/* Get chroma shift values for this format */
|
||||
switch (fmt) {
|
||||
case AOM_IMG_FMT_I420:
|
||||
case AOM_IMG_FMT_YV12:
|
||||
case AOM_IMG_FMT_AOMI420:
|
||||
case AOM_IMG_FMT_AOMYV12:
|
||||
case AOM_IMG_FMT_I422:
|
||||
case AOM_IMG_FMT_I42016:
|
||||
case AOM_IMG_FMT_I42216: xcs = 1; break;
|
||||
default: xcs = 0; break;
|
||||
}
|
||||
|
||||
switch (fmt) {
|
||||
case AOM_IMG_FMT_I420:
|
||||
case AOM_IMG_FMT_I440:
|
||||
case AOM_IMG_FMT_YV12:
|
||||
case AOM_IMG_FMT_AOMI420:
|
||||
case AOM_IMG_FMT_AOMYV12:
|
||||
case AOM_IMG_FMT_I42016:
|
||||
case AOM_IMG_FMT_I44016: ycs = 1; break;
|
||||
default: ycs = 0; break;
|
||||
}
|
||||
|
||||
/* Calculate storage sizes given the chroma subsampling */
|
||||
align = (1 << xcs) - 1;
|
||||
w = (d_w + align) & ~align;
|
||||
align = (1 << ycs) - 1;
|
||||
h = (d_h + align) & ~align;
|
||||
s = (fmt & AOM_IMG_FMT_PLANAR) ? w : bps * w / 8;
|
||||
s = (s + stride_align - 1) & ~(stride_align - 1);
|
||||
stride_in_bytes = (fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s;
|
||||
|
||||
/* Allocate the new image */
|
||||
if (!img) {
|
||||
img = (aom_image_t *)calloc(1, sizeof(aom_image_t));
|
||||
|
||||
if (!img) goto fail;
|
||||
|
||||
img->self_allocd = 1;
|
||||
} else {
|
||||
memset(img, 0, sizeof(aom_image_t));
|
||||
}
|
||||
|
||||
img->img_data = img_data;
|
||||
|
||||
if (!img_data) {
|
||||
const uint64_t alloc_size = (fmt & AOM_IMG_FMT_PLANAR)
|
||||
? (uint64_t)h * s * bps / 8
|
||||
: (uint64_t)h * s;
|
||||
|
||||
if (alloc_size != (size_t)alloc_size) goto fail;
|
||||
|
||||
img->img_data = (uint8_t *)aom_memalign(buf_align, (size_t)alloc_size);
|
||||
img->img_data_owner = 1;
|
||||
}
|
||||
|
||||
if (!img->img_data) goto fail;
|
||||
|
||||
img->fmt = fmt;
|
||||
img->bit_depth = (fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 16 : 8;
|
||||
img->w = w;
|
||||
img->h = h;
|
||||
img->x_chroma_shift = xcs;
|
||||
img->y_chroma_shift = ycs;
|
||||
img->bps = bps;
|
||||
|
||||
/* Calculate strides */
|
||||
img->stride[AOM_PLANE_Y] = img->stride[AOM_PLANE_ALPHA] = stride_in_bytes;
|
||||
img->stride[AOM_PLANE_U] = img->stride[AOM_PLANE_V] = stride_in_bytes >> xcs;
|
||||
|
||||
/* Default viewport to entire image */
|
||||
if (!aom_img_set_rect(img, 0, 0, d_w, d_h)) return img;
|
||||
|
||||
fail:
|
||||
aom_img_free(img);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
aom_image_t *aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt,
|
||||
unsigned int d_w, unsigned int d_h,
|
||||
unsigned int align) {
|
||||
return img_alloc_helper(img, fmt, d_w, d_h, align, align, NULL);
|
||||
}
|
||||
|
||||
aom_image_t *aom_img_wrap(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w,
|
||||
unsigned int d_h, unsigned int stride_align,
|
||||
unsigned char *img_data) {
|
||||
/* By setting buf_align = 1, we don't change buffer alignment in this
|
||||
* function. */
|
||||
return img_alloc_helper(img, fmt, d_w, d_h, 1, stride_align, img_data);
|
||||
}
|
||||
|
||||
int aom_img_set_rect(aom_image_t *img, unsigned int x, unsigned int y,
|
||||
unsigned int w, unsigned int h) {
|
||||
unsigned char *data;
|
||||
|
||||
if (x + w <= img->w && y + h <= img->h) {
|
||||
img->d_w = w;
|
||||
img->d_h = h;
|
||||
|
||||
/* Calculate plane pointers */
|
||||
if (!(img->fmt & AOM_IMG_FMT_PLANAR)) {
|
||||
img->planes[AOM_PLANE_PACKED] =
|
||||
img->img_data + x * img->bps / 8 + y * img->stride[AOM_PLANE_PACKED];
|
||||
} else {
|
||||
const int bytes_per_sample =
|
||||
(img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
|
||||
data = img->img_data;
|
||||
|
||||
if (img->fmt & AOM_IMG_FMT_HAS_ALPHA) {
|
||||
img->planes[AOM_PLANE_ALPHA] =
|
||||
data + x * bytes_per_sample + y * img->stride[AOM_PLANE_ALPHA];
|
||||
data += img->h * img->stride[AOM_PLANE_ALPHA];
|
||||
}
|
||||
|
||||
img->planes[AOM_PLANE_Y] =
|
||||
data + x * bytes_per_sample + y * img->stride[AOM_PLANE_Y];
|
||||
data += img->h * img->stride[AOM_PLANE_Y];
|
||||
|
||||
if (!(img->fmt & AOM_IMG_FMT_UV_FLIP)) {
|
||||
img->planes[AOM_PLANE_U] =
|
||||
data + (x >> img->x_chroma_shift) * bytes_per_sample +
|
||||
(y >> img->y_chroma_shift) * img->stride[AOM_PLANE_U];
|
||||
data += (img->h >> img->y_chroma_shift) * img->stride[AOM_PLANE_U];
|
||||
img->planes[AOM_PLANE_V] =
|
||||
data + (x >> img->x_chroma_shift) * bytes_per_sample +
|
||||
(y >> img->y_chroma_shift) * img->stride[AOM_PLANE_V];
|
||||
} else {
|
||||
img->planes[AOM_PLANE_V] =
|
||||
data + (x >> img->x_chroma_shift) * bytes_per_sample +
|
||||
(y >> img->y_chroma_shift) * img->stride[AOM_PLANE_V];
|
||||
data += (img->h >> img->y_chroma_shift) * img->stride[AOM_PLANE_V];
|
||||
img->planes[AOM_PLANE_U] =
|
||||
data + (x >> img->x_chroma_shift) * bytes_per_sample +
|
||||
(y >> img->y_chroma_shift) * img->stride[AOM_PLANE_U];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void aom_img_flip(aom_image_t *img) {
|
||||
/* Note: In the calculation pointer adjustment calculation, we want the
|
||||
* rhs to be promoted to a signed type. Section 6.3.1.8 of the ISO C99
|
||||
* standard indicates that if the adjustment parameter is unsigned, the
|
||||
* stride parameter will be promoted to unsigned, causing errors when
|
||||
* the lhs is a larger type than the rhs.
|
||||
*/
|
||||
img->planes[AOM_PLANE_Y] += (signed)(img->d_h - 1) * img->stride[AOM_PLANE_Y];
|
||||
img->stride[AOM_PLANE_Y] = -img->stride[AOM_PLANE_Y];
|
||||
|
||||
img->planes[AOM_PLANE_U] += (signed)((img->d_h >> img->y_chroma_shift) - 1) *
|
||||
img->stride[AOM_PLANE_U];
|
||||
img->stride[AOM_PLANE_U] = -img->stride[AOM_PLANE_U];
|
||||
|
||||
img->planes[AOM_PLANE_V] += (signed)((img->d_h >> img->y_chroma_shift) - 1) *
|
||||
img->stride[AOM_PLANE_V];
|
||||
img->stride[AOM_PLANE_V] = -img->stride[AOM_PLANE_V];
|
||||
|
||||
img->planes[AOM_PLANE_ALPHA] +=
|
||||
(signed)(img->d_h - 1) * img->stride[AOM_PLANE_ALPHA];
|
||||
img->stride[AOM_PLANE_ALPHA] = -img->stride[AOM_PLANE_ALPHA];
|
||||
}
|
||||
|
||||
void aom_img_free(aom_image_t *img) {
|
||||
if (img) {
|
||||
if (img->img_data && img->img_data_owner) aom_free(img->img_data);
|
||||
|
||||
if (img->self_allocd) free(img);
|
||||
}
|
||||
}
|
||||
@@ -1,72 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_ports/mem.h"
|
||||
|
||||
void aom_plane_add_noise_c(uint8_t *start, char *noise, char blackclamp[16],
|
||||
char whiteclamp[16], char bothclamp[16],
|
||||
unsigned int width, unsigned int height, int pitch) {
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < height; ++i) {
|
||||
uint8_t *pos = start + i * pitch;
|
||||
char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
|
||||
|
||||
for (j = 0; j < width; ++j) {
|
||||
int v = pos[j];
|
||||
|
||||
v = clamp(v - blackclamp[0], 0, 255);
|
||||
v = clamp(v + bothclamp[0], 0, 255);
|
||||
v = clamp(v - whiteclamp[0], 0, 255);
|
||||
|
||||
pos[j] = v + ref[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static double gaussian(double sigma, double mu, double x) {
|
||||
return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
|
||||
(exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
|
||||
}
|
||||
|
||||
int aom_setup_noise(double sigma, int size, char *noise) {
|
||||
char char_dist[256];
|
||||
int next = 0, i, j;
|
||||
|
||||
// set up a 256 entry lookup that matches gaussian distribution
|
||||
for (i = -32; i < 32; ++i) {
|
||||
const int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i));
|
||||
if (a_i) {
|
||||
for (j = 0; j < a_i; ++j) {
|
||||
char_dist[next + j] = (char)i;
|
||||
}
|
||||
next = next + j;
|
||||
}
|
||||
}
|
||||
|
||||
// Rounding error - might mean we have less than 256.
|
||||
for (; next < 256; ++next) {
|
||||
char_dist[next] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
noise[i] = char_dist[rand() & 0xff]; // NOLINT
|
||||
}
|
||||
|
||||
// Returns the highest non 0 value used in distribution.
|
||||
return -char_dist[0];
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/ans.h"
|
||||
#include "aom_dsp/prob.h"
|
||||
|
||||
static int find_largest(const aom_cdf_prob *const pdf_tab, int num_syms) {
|
||||
int largest_idx = -1;
|
||||
int largest_p = -1;
|
||||
int i;
|
||||
for (i = 0; i < num_syms; ++i) {
|
||||
int p = pdf_tab[i];
|
||||
if (p > largest_p) {
|
||||
largest_p = p;
|
||||
largest_idx = i;
|
||||
}
|
||||
}
|
||||
return largest_idx;
|
||||
}
|
||||
|
||||
void aom_rans_merge_prob8_pdf(aom_cdf_prob *const out_pdf,
|
||||
const AnsP8 node_prob,
|
||||
const aom_cdf_prob *const src_pdf, int in_syms) {
|
||||
int i;
|
||||
int adjustment = RANS_PRECISION;
|
||||
const int round_fact = ANS_P8_PRECISION >> 1;
|
||||
const AnsP8 p1 = ANS_P8_PRECISION - node_prob;
|
||||
const int out_syms = in_syms + 1;
|
||||
assert(src_pdf != out_pdf);
|
||||
|
||||
out_pdf[0] = node_prob << (RANS_PROB_BITS - ANS_P8_SHIFT);
|
||||
adjustment -= out_pdf[0];
|
||||
for (i = 0; i < in_syms; ++i) {
|
||||
int p = (p1 * src_pdf[i] + round_fact) >> ANS_P8_SHIFT;
|
||||
p = AOMMIN(p, (int)RANS_PRECISION - in_syms);
|
||||
p = AOMMAX(p, 1);
|
||||
out_pdf[i + 1] = p;
|
||||
adjustment -= p;
|
||||
}
|
||||
|
||||
// Adjust probabilities so they sum to the total probability
|
||||
if (adjustment > 0) {
|
||||
i = find_largest(out_pdf, out_syms);
|
||||
out_pdf[i] += adjustment;
|
||||
} else {
|
||||
while (adjustment < 0) {
|
||||
i = find_largest(out_pdf, out_syms);
|
||||
--out_pdf[i];
|
||||
assert(out_pdf[i] > 0);
|
||||
adjustment++;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_ANS_H_
|
||||
#define AOM_DSP_ANS_H_
|
||||
// Constants, types and utilities for Asymmetric Numeral Systems
|
||||
// http://arxiv.org/abs/1311.2540v2
|
||||
|
||||
#include <assert.h>
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/prob.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
typedef uint8_t AnsP8;
|
||||
#define ANS_P8_PRECISION 256u
|
||||
#define ANS_P8_SHIFT 8
|
||||
#define RANS_PROB_BITS 15
|
||||
#define RANS_PRECISION (1u << RANS_PROB_BITS)
|
||||
|
||||
// L_BASE % PRECISION must be 0. Increasing L_BASE beyond 2**15 will cause uabs
|
||||
// to overflow.
|
||||
#define L_BASE (RANS_PRECISION)
|
||||
#define IO_BASE 256
|
||||
// Range I = { L_BASE, L_BASE + 1, ..., L_BASE * IO_BASE - 1 }
|
||||
|
||||
void aom_rans_merge_prob8_pdf(aom_cdf_prob *const out_pdf,
|
||||
const AnsP8 node_prob,
|
||||
const aom_cdf_prob *const src_pdf, int in_syms);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
#endif // AOM_DSP_ANS_H_
|
||||
@@ -1,146 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_ANSREADER_H_
|
||||
#define AOM_DSP_ANSREADER_H_
|
||||
// A uABS and rANS decoder implementation of Asymmetric Numeral Systems
|
||||
// http://arxiv.org/abs/1311.2540v2
|
||||
|
||||
#include <assert.h>
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/prob.h"
|
||||
#include "aom_dsp/ans.h"
|
||||
#include "aom_ports/mem_ops.h"
|
||||
#if CONFIG_ACCOUNTING
|
||||
#include "av1/common/accounting.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
struct AnsDecoder {
|
||||
const uint8_t *buf;
|
||||
int buf_offset;
|
||||
uint32_t state;
|
||||
#if CONFIG_ACCOUNTING
|
||||
Accounting *accounting;
|
||||
#endif
|
||||
};
|
||||
|
||||
static INLINE int uabs_read(struct AnsDecoder *ans, AnsP8 p0) {
|
||||
AnsP8 p = ANS_P8_PRECISION - p0;
|
||||
int s;
|
||||
unsigned xp, sp;
|
||||
unsigned state = ans->state;
|
||||
while (state < L_BASE && ans->buf_offset > 0) {
|
||||
state = state * IO_BASE + ans->buf[--ans->buf_offset];
|
||||
}
|
||||
sp = state * p;
|
||||
xp = sp / ANS_P8_PRECISION;
|
||||
s = (sp & 0xFF) >= p0;
|
||||
if (s)
|
||||
ans->state = xp;
|
||||
else
|
||||
ans->state = state - xp;
|
||||
return s;
|
||||
}
|
||||
|
||||
static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
|
||||
int s;
|
||||
unsigned state = ans->state;
|
||||
while (state < L_BASE && ans->buf_offset > 0) {
|
||||
state = state * IO_BASE + ans->buf[--ans->buf_offset];
|
||||
}
|
||||
s = (int)(state & 1);
|
||||
ans->state = state >> 1;
|
||||
return s;
|
||||
}
|
||||
|
||||
struct rans_dec_sym {
|
||||
uint8_t val;
|
||||
aom_cdf_prob prob;
|
||||
aom_cdf_prob cum_prob; // not-inclusive
|
||||
};
|
||||
|
||||
static INLINE void fetch_sym(struct rans_dec_sym *out, const aom_cdf_prob *cdf,
|
||||
aom_cdf_prob rem) {
|
||||
int i;
|
||||
aom_cdf_prob cum_prob = 0, top_prob;
|
||||
// TODO(skal): if critical, could be a binary search.
|
||||
// Or, better, an O(1) alias-table.
|
||||
for (i = 0; rem >= (top_prob = cdf[i]); ++i) {
|
||||
cum_prob = top_prob;
|
||||
}
|
||||
out->val = i;
|
||||
out->prob = top_prob - cum_prob;
|
||||
out->cum_prob = cum_prob;
|
||||
}
|
||||
|
||||
static INLINE int rans_read(struct AnsDecoder *ans, const aom_cdf_prob *tab) {
|
||||
unsigned rem;
|
||||
unsigned quo;
|
||||
struct rans_dec_sym sym;
|
||||
while (ans->state < L_BASE && ans->buf_offset > 0) {
|
||||
ans->state = ans->state * IO_BASE + ans->buf[--ans->buf_offset];
|
||||
}
|
||||
quo = ans->state / RANS_PRECISION;
|
||||
rem = ans->state % RANS_PRECISION;
|
||||
fetch_sym(&sym, tab, rem);
|
||||
ans->state = quo * sym.prob + rem - sym.cum_prob;
|
||||
return sym.val;
|
||||
}
|
||||
|
||||
static INLINE int ans_read_init(struct AnsDecoder *const ans,
|
||||
const uint8_t *const buf, int offset) {
|
||||
unsigned x;
|
||||
if (offset < 1) return 1;
|
||||
ans->buf = buf;
|
||||
x = buf[offset - 1] >> 6;
|
||||
if (x == 0) {
|
||||
ans->buf_offset = offset - 1;
|
||||
ans->state = buf[offset - 1] & 0x3F;
|
||||
} else if (x == 1) {
|
||||
if (offset < 2) return 1;
|
||||
ans->buf_offset = offset - 2;
|
||||
ans->state = mem_get_le16(buf + offset - 2) & 0x3FFF;
|
||||
} else if (x == 2) {
|
||||
if (offset < 3) return 1;
|
||||
ans->buf_offset = offset - 3;
|
||||
ans->state = mem_get_le24(buf + offset - 3) & 0x3FFFFF;
|
||||
} else if ((buf[offset - 1] & 0xE0) == 0xE0) {
|
||||
if (offset < 4) return 1;
|
||||
ans->buf_offset = offset - 4;
|
||||
ans->state = mem_get_le32(buf + offset - 4) & 0x1FFFFFFF;
|
||||
} else {
|
||||
// 110xxxxx implies this byte is a superframe marker
|
||||
return 1;
|
||||
}
|
||||
#if CONFIG_ACCOUNTING
|
||||
ans->accounting = NULL;
|
||||
#endif
|
||||
ans->state += L_BASE;
|
||||
if (ans->state >= L_BASE * IO_BASE) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static INLINE int ans_read_end(struct AnsDecoder *const ans) {
|
||||
return ans->state == L_BASE;
|
||||
}
|
||||
|
||||
static INLINE int ans_reader_has_error(const struct AnsDecoder *const ans) {
|
||||
return ans->state < L_BASE && ans->buf_offset == 0;
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
#endif // AOM_DSP_ANSREADER_H_
|
||||
@@ -1,120 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_ANSWRITER_H_
|
||||
#define AOM_DSP_ANSWRITER_H_
|
||||
// A uABS and rANS encoder implementation of Asymmetric Numeral Systems
|
||||
// http://arxiv.org/abs/1311.2540v2
|
||||
|
||||
#include <assert.h>
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/ans.h"
|
||||
#include "aom_dsp/prob.h"
|
||||
#include "aom_ports/mem_ops.h"
|
||||
#include "av1/common/odintrin.h"
|
||||
|
||||
#if RANS_PRECISION <= OD_DIVU_DMAX
|
||||
#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
|
||||
do { \
|
||||
quotient = OD_DIVU_SMALL((dividend), (divisor)); \
|
||||
remainder = (dividend) - (quotient) * (divisor); \
|
||||
} while (0)
|
||||
#else
|
||||
#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
|
||||
do { \
|
||||
quotient = (dividend) / (divisor); \
|
||||
remainder = (dividend) % (divisor); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#define ANS_DIV8(dividend, divisor) OD_DIVU_SMALL((dividend), (divisor))
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
struct AnsCoder {
|
||||
uint8_t *buf;
|
||||
int buf_offset;
|
||||
uint32_t state;
|
||||
};
|
||||
|
||||
static INLINE void ans_write_init(struct AnsCoder *const ans,
|
||||
uint8_t *const buf) {
|
||||
ans->buf = buf;
|
||||
ans->buf_offset = 0;
|
||||
ans->state = L_BASE;
|
||||
}
|
||||
|
||||
static INLINE int ans_write_end(struct AnsCoder *const ans) {
|
||||
uint32_t state;
|
||||
assert(ans->state >= L_BASE);
|
||||
assert(ans->state < L_BASE * IO_BASE);
|
||||
state = ans->state - L_BASE;
|
||||
if (state < (1 << 6)) {
|
||||
ans->buf[ans->buf_offset] = (0x00 << 6) + state;
|
||||
return ans->buf_offset + 1;
|
||||
} else if (state < (1 << 14)) {
|
||||
mem_put_le16(ans->buf + ans->buf_offset, (0x01 << 14) + state);
|
||||
return ans->buf_offset + 2;
|
||||
} else if (state < (1 << 22)) {
|
||||
mem_put_le24(ans->buf + ans->buf_offset, (0x02 << 22) + state);
|
||||
return ans->buf_offset + 3;
|
||||
} else if (state < (1 << 29)) {
|
||||
mem_put_le32(ans->buf + ans->buf_offset, (0x07 << 29) + state);
|
||||
return ans->buf_offset + 4;
|
||||
} else {
|
||||
assert(0 && "State is too large to be serialized");
|
||||
return ans->buf_offset;
|
||||
}
|
||||
}
|
||||
|
||||
// uABS with normalization
|
||||
static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) {
|
||||
AnsP8 p = ANS_P8_PRECISION - p0;
|
||||
const unsigned l_s = val ? p : p0;
|
||||
while (ans->state >= L_BASE / ANS_P8_PRECISION * IO_BASE * l_s) {
|
||||
ans->buf[ans->buf_offset++] = ans->state % IO_BASE;
|
||||
ans->state /= IO_BASE;
|
||||
}
|
||||
if (!val)
|
||||
ans->state = ANS_DIV8(ans->state * ANS_P8_PRECISION, p0);
|
||||
else
|
||||
ans->state = ANS_DIV8((ans->state + 1) * ANS_P8_PRECISION + p - 1, p) - 1;
|
||||
}
|
||||
|
||||
struct rans_sym {
|
||||
aom_cdf_prob prob;
|
||||
aom_cdf_prob cum_prob; // not-inclusive
|
||||
};
|
||||
|
||||
// rANS with normalization
|
||||
// sym->prob takes the place of l_s from the paper
|
||||
// ANS_P10_PRECISION is m
|
||||
static INLINE void rans_write(struct AnsCoder *ans,
|
||||
const struct rans_sym *const sym) {
|
||||
const aom_cdf_prob p = sym->prob;
|
||||
unsigned quot, rem;
|
||||
while (ans->state >= L_BASE / RANS_PRECISION * IO_BASE * p) {
|
||||
ans->buf[ans->buf_offset++] = ans->state % IO_BASE;
|
||||
ans->state /= IO_BASE;
|
||||
}
|
||||
ANS_DIVREM(quot, rem, ans->state, p);
|
||||
ans->state = quot * RANS_PRECISION + rem + sym->cum_prob;
|
||||
}
|
||||
|
||||
#undef ANS_DIV8
|
||||
#undef ANS_DIVREM
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
#endif // AOM_DSP_ANSWRITER_H_
|
||||
@@ -1,57 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
#ifndef AOM_DSP_AOM_CONVOLVE_H_
|
||||
#define AOM_DSP_AOM_CONVOLVE_H_
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Note: Fixed size intermediate buffers, place limits on parameters
|
||||
// of some functions. 2d filtering proceeds in 2 steps:
|
||||
// (1) Interpolate horizontally into an intermediate buffer, temp.
|
||||
// (2) Interpolate temp vertically to derive the sub-pixel result.
|
||||
// Deriving the maximum number of rows in the temp buffer (135):
|
||||
// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
|
||||
// --Largest block size is 64x64 pixels.
|
||||
// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
|
||||
// original frame (in 1/16th pixel units).
|
||||
// --Must round-up because block may be located at sub-pixel position.
|
||||
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
|
||||
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
|
||||
#if CONFIG_AV1 && CONFIG_EXT_PARTITION
|
||||
#define MAX_EXT_SIZE 263
|
||||
#else
|
||||
#define MAX_EXT_SIZE 135
|
||||
#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION
|
||||
|
||||
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4, int w,
|
||||
int h);
|
||||
|
||||
#if CONFIG_AOM_HIGHBITDEPTH
|
||||
typedef void (*highbd_convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h, int bd);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_AOM_CONVOLVE_H_
|
||||
@@ -1,102 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_AOM_DSP_COMMON_H_
|
||||
#define AOM_DSP_AOM_DSP_COMMON_H_
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef MAX_SB_SIZE
|
||||
#if CONFIG_AV1 && CONFIG_EXT_PARTITION
|
||||
#define MAX_SB_SIZE 128
|
||||
#else
|
||||
#define MAX_SB_SIZE 64
|
||||
#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION
|
||||
#endif // ndef MAX_SB_SIZE
|
||||
|
||||
#define AOMMIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
#define AOMMAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
|
||||
#define IMPLIES(a, b) (!(a) || (b)) // Logical 'a implies b' (or 'a -> b')
|
||||
|
||||
#define IS_POWER_OF_TWO(x) (((x) & ((x)-1)) == 0)
|
||||
|
||||
// These can be used to give a hint about branch outcomes.
|
||||
// This can have an effect, even if your target processor has a
|
||||
// good branch predictor, as these hints can affect basic block
|
||||
// ordering by the compiler.
|
||||
#ifdef __GNUC__
|
||||
#define LIKELY(v) __builtin_expect(v, 1)
|
||||
#define UNLIKELY(v) __builtin_expect(v, 0)
|
||||
#else
|
||||
#define LIKELY(v) (v)
|
||||
#define UNLIKELY(v) (v)
|
||||
#endif
|
||||
|
||||
#define AOM_SWAP(type, a, b) \
|
||||
do { \
|
||||
type c = (b); \
|
||||
b = a; \
|
||||
a = c; \
|
||||
} while (0)
|
||||
|
||||
#if CONFIG_AOM_QM
|
||||
typedef uint16_t qm_val_t;
|
||||
#define AOM_QM_BITS 6
|
||||
#endif
|
||||
#if CONFIG_AOM_HIGHBITDEPTH
|
||||
// Note:
|
||||
// tran_low_t is the datatype used for final transform coefficients.
|
||||
// tran_high_t is the datatype used for intermediate transform stages.
|
||||
typedef int64_t tran_high_t;
|
||||
typedef int32_t tran_low_t;
|
||||
#else
|
||||
// Note:
|
||||
// tran_low_t is the datatype used for final transform coefficients.
|
||||
// tran_high_t is the datatype used for intermediate transform stages.
|
||||
typedef int32_t tran_high_t;
|
||||
typedef int16_t tran_low_t;
|
||||
#endif // CONFIG_AOM_HIGHBITDEPTH
|
||||
|
||||
static INLINE uint8_t clip_pixel(int val) {
|
||||
return (val > 255) ? 255 : (val < 0) ? 0 : val;
|
||||
}
|
||||
|
||||
static INLINE int clamp(int value, int low, int high) {
|
||||
return value < low ? low : (value > high ? high : value);
|
||||
}
|
||||
|
||||
static INLINE double fclamp(double value, double low, double high) {
|
||||
return value < low ? low : (value > high ? high : value);
|
||||
}
|
||||
|
||||
#if CONFIG_AOM_HIGHBITDEPTH
|
||||
static INLINE uint16_t clip_pixel_highbd(int val, int bd) {
|
||||
switch (bd) {
|
||||
case 8:
|
||||
default: return (uint16_t)clamp(val, 0, 255);
|
||||
case 10: return (uint16_t)clamp(val, 0, 1023);
|
||||
case 12: return (uint16_t)clamp(val, 0, 4095);
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_AOM_HIGHBITDEPTH
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_AOM_DSP_COMMON_H_
|
||||
@@ -1,16 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
#include "./aom_config.h"
|
||||
#define RTCD_C
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
#include "aom_ports/aom_once.h"
|
||||
|
||||
void aom_dsp_rtcd() { once(setup_rtcd_internal); }
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_AOM_FILTER_H_
|
||||
#define AOM_DSP_AOM_FILTER_H_
|
||||
|
||||
#include "aom/aom_integer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define FILTER_BITS 7
|
||||
|
||||
#define SUBPEL_BITS 4
|
||||
#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
|
||||
#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
|
||||
#define SUBPEL_TAPS 8
|
||||
|
||||
typedef int16_t InterpKernel[SUBPEL_TAPS];
|
||||
|
||||
#define BIL_SUBPEL_BITS 3
|
||||
#define BIL_SUBPEL_SHIFTS (1 << BIL_SUBPEL_BITS)
|
||||
|
||||
// 2 tap bilinear filters
|
||||
static const uint8_t bilinear_filters_2t[BIL_SUBPEL_SHIFTS][2] = {
|
||||
{ 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
|
||||
{ 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 },
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_AOM_FILTER_H_
|
||||
@@ -1,13 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
// Set to 1 to add some sanity checks in the fallback C code
|
||||
const int simd_check = 1;
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_AOM_AOM_SIMD_H_
|
||||
#define AOM_DSP_AOM_AOM_SIMD_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "./aom_simd_inline.h"
|
||||
|
||||
#if HAVE_NEON
|
||||
#include "simd/v256_intrinsics_arm.h"
|
||||
#elif HAVE_SSE2
|
||||
#include "simd/v256_intrinsics_x86.h"
|
||||
#else
|
||||
#include "simd/v256_intrinsics.h"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_AOM_AOM_SIMD_H_
|
||||
@@ -1,21 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_AOM_SIMD_INLINE_H_
|
||||
#define AOM_DSP_AOM_SIMD_INLINE_H_
|
||||
|
||||
#include "aom/aom_integer.h"
|
||||
|
||||
#ifndef SIMD_INLINE
|
||||
#define SIMD_INLINE static AOM_FORCE_INLINE
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_AOM_SIMD_INLINE_H_
|
||||
@@ -1,66 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
#include "aom_dsp/aom_dsp_common.h"
|
||||
#include "aom_ports/mem.h"
|
||||
|
||||
void aom_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
||||
int x_step_q4, const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
/* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
|
||||
* maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
|
||||
*/
|
||||
DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);
|
||||
|
||||
// Account for the vertical phase needing 3 lines prior and 4 lines post
|
||||
int intermediate_height = h + 7;
|
||||
|
||||
assert(y_step_q4 == 16);
|
||||
assert(x_step_q4 == 16);
|
||||
|
||||
/* Filter starting 3 lines back. The neon implementation will ignore the
|
||||
* given height and filter a multiple of 4 lines. Since this goes in to
|
||||
* the temp buffer which has lots of extra room and is subsequently discarded
|
||||
* this is safe if somewhat less than ideal.
|
||||
*/
|
||||
aom_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, 64, filter_x,
|
||||
x_step_q4, filter_y, y_step_q4, w,
|
||||
intermediate_height);
|
||||
|
||||
/* Step into the temp buffer 3 lines to get the actual frame data */
|
||||
aom_convolve8_vert_neon(temp + 64 * 3, 64, dst, dst_stride, filter_x,
|
||||
x_step_q4, filter_y, y_step_q4, w, h);
|
||||
}
|
||||
|
||||
void aom_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4, int w,
|
||||
int h) {
|
||||
DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);
|
||||
int intermediate_height = h + 7;
|
||||
|
||||
assert(y_step_q4 == 16);
|
||||
assert(x_step_q4 == 16);
|
||||
|
||||
/* This implementation has the same issues as above. In addition, we only want
|
||||
* to average the values after both passes.
|
||||
*/
|
||||
aom_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, 64, filter_x,
|
||||
x_step_q4, filter_y, y_step_q4, w,
|
||||
intermediate_height);
|
||||
aom_convolve8_avg_vert_neon(temp + 64 * 3, 64, dst, dst_stride, filter_x,
|
||||
x_step_q4, filter_y, y_step_q4, w, h);
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "aom_dsp/inv_txfm.h"
|
||||
#include "aom_ports/mem.h"
|
||||
|
||||
void aom_idct16x16_1_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||
uint8x8_t d2u8, d3u8, d30u8, d31u8;
|
||||
uint64x1_t d2u64, d3u64, d4u64, d5u64;
|
||||
uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16;
|
||||
int16x8_t q0s16;
|
||||
uint8_t *d1, *d2;
|
||||
int16_t i, j, a1;
|
||||
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
||||
out = dct_const_round_shift(out * cospi_16_64);
|
||||
a1 = ROUND_POWER_OF_TWO(out, 6);
|
||||
|
||||
q0s16 = vdupq_n_s16(a1);
|
||||
q0u16 = vreinterpretq_u16_s16(q0s16);
|
||||
|
||||
for (d1 = d2 = dest, i = 0; i < 4; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
d2u64 = vld1_u64((const uint64_t *)d1);
|
||||
d3u64 = vld1_u64((const uint64_t *)(d1 + 8));
|
||||
d1 += dest_stride;
|
||||
d4u64 = vld1_u64((const uint64_t *)d1);
|
||||
d5u64 = vld1_u64((const uint64_t *)(d1 + 8));
|
||||
d1 += dest_stride;
|
||||
|
||||
q9u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d2u64));
|
||||
q10u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d3u64));
|
||||
q11u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d4u64));
|
||||
q12u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d5u64));
|
||||
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
|
||||
d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
|
||||
d30u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
|
||||
d31u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16));
|
||||
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
|
||||
vst1_u64((uint64_t *)(d2 + 8), vreinterpret_u64_u8(d3u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d30u8));
|
||||
vst1_u64((uint64_t *)(d2 + 8), vreinterpret_u64_u8(d31u8));
|
||||
d2 += dest_stride;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,152 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include "aom_dsp/aom_dsp_common.h"
|
||||
|
||||
void aom_idct16x16_256_add_neon_pass1(const int16_t *input, int16_t *output,
|
||||
int output_stride);
|
||||
void aom_idct16x16_256_add_neon_pass2(const int16_t *src, int16_t *output,
|
||||
int16_t *pass1Output, int16_t skip_adding,
|
||||
uint8_t *dest, int dest_stride);
|
||||
void aom_idct16x16_10_add_neon_pass1(const int16_t *input, int16_t *output,
|
||||
int output_stride);
|
||||
void aom_idct16x16_10_add_neon_pass2(const int16_t *src, int16_t *output,
|
||||
int16_t *pass1Output, int16_t skip_adding,
|
||||
uint8_t *dest, int dest_stride);
|
||||
|
||||
#if HAVE_NEON_ASM
|
||||
/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
|
||||
extern void aom_push_neon(int64_t *store);
|
||||
extern void aom_pop_neon(int64_t *store);
|
||||
#endif // HAVE_NEON_ASM
|
||||
|
||||
void aom_idct16x16_256_add_neon(const int16_t *input, uint8_t *dest,
|
||||
int dest_stride) {
|
||||
#if HAVE_NEON_ASM
|
||||
int64_t store_reg[8];
|
||||
#endif
|
||||
int16_t pass1_output[16 * 16] = { 0 };
|
||||
int16_t row_idct_output[16 * 16] = { 0 };
|
||||
|
||||
#if HAVE_NEON_ASM
|
||||
// save d8-d15 register values.
|
||||
aom_push_neon(store_reg);
|
||||
#endif
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
aom_idct16x16_256_add_neon_pass1(input, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
aom_idct16x16_256_add_neon_pass2(input + 1, row_idct_output, pass1_output, 0,
|
||||
dest, dest_stride);
|
||||
|
||||
/* Parallel idct on the lower 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
aom_idct16x16_256_add_neon_pass1(input + 8 * 16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
aom_idct16x16_256_add_neon_pass2(input + 8 * 16 + 1, row_idct_output + 8,
|
||||
pass1_output, 0, dest, dest_stride);
|
||||
|
||||
/* Parallel idct on the left 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
aom_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
aom_idct16x16_256_add_neon_pass2(row_idct_output + 1, row_idct_output,
|
||||
pass1_output, 1, dest, dest_stride);
|
||||
|
||||
/* Parallel idct on the right 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
aom_idct16x16_256_add_neon_pass1(row_idct_output + 8 * 16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
aom_idct16x16_256_add_neon_pass2(row_idct_output + 8 * 16 + 1,
|
||||
row_idct_output + 8, pass1_output, 1,
|
||||
dest + 8, dest_stride);
|
||||
|
||||
#if HAVE_NEON_ASM
|
||||
// restore d8-d15 register values.
|
||||
aom_pop_neon(store_reg);
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void aom_idct16x16_10_add_neon(const int16_t *input, uint8_t *dest,
|
||||
int dest_stride) {
|
||||
#if HAVE_NEON_ASM
|
||||
int64_t store_reg[8];
|
||||
#endif
|
||||
int16_t pass1_output[16 * 16] = { 0 };
|
||||
int16_t row_idct_output[16 * 16] = { 0 };
|
||||
|
||||
#if HAVE_NEON_ASM
|
||||
// save d8-d15 register values.
|
||||
aom_push_neon(store_reg);
|
||||
#endif
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
aom_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
aom_idct16x16_10_add_neon_pass2(input + 1, row_idct_output, pass1_output, 0,
|
||||
dest, dest_stride);
|
||||
|
||||
/* Skip Parallel idct on the lower 8 rows as they are all 0s */
|
||||
|
||||
/* Parallel idct on the left 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
aom_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
aom_idct16x16_256_add_neon_pass2(row_idct_output + 1, row_idct_output,
|
||||
pass1_output, 1, dest, dest_stride);
|
||||
|
||||
/* Parallel idct on the right 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
aom_idct16x16_256_add_neon_pass1(row_idct_output + 8 * 16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
aom_idct16x16_256_add_neon_pass2(row_idct_output + 8 * 16 + 1,
|
||||
row_idct_output + 8, pass1_output, 1,
|
||||
dest + 8, dest_stride);
|
||||
|
||||
#if HAVE_NEON_ASM
|
||||
// restore d8-d15 register values.
|
||||
aom_pop_neon(store_reg);
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -1,141 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "./aom_config.h"
|
||||
|
||||
#include "aom_dsp/inv_txfm.h"
|
||||
#include "aom_ports/mem.h"
|
||||
|
||||
static INLINE void LD_16x8(uint8_t *d, int d_stride, uint8x16_t *q8u8,
|
||||
uint8x16_t *q9u8, uint8x16_t *q10u8,
|
||||
uint8x16_t *q11u8, uint8x16_t *q12u8,
|
||||
uint8x16_t *q13u8, uint8x16_t *q14u8,
|
||||
uint8x16_t *q15u8) {
|
||||
*q8u8 = vld1q_u8(d);
|
||||
d += d_stride;
|
||||
*q9u8 = vld1q_u8(d);
|
||||
d += d_stride;
|
||||
*q10u8 = vld1q_u8(d);
|
||||
d += d_stride;
|
||||
*q11u8 = vld1q_u8(d);
|
||||
d += d_stride;
|
||||
*q12u8 = vld1q_u8(d);
|
||||
d += d_stride;
|
||||
*q13u8 = vld1q_u8(d);
|
||||
d += d_stride;
|
||||
*q14u8 = vld1q_u8(d);
|
||||
d += d_stride;
|
||||
*q15u8 = vld1q_u8(d);
|
||||
return;
|
||||
}
|
||||
|
||||
static INLINE void ADD_DIFF_16x8(uint8x16_t qdiffu8, uint8x16_t *q8u8,
|
||||
uint8x16_t *q9u8, uint8x16_t *q10u8,
|
||||
uint8x16_t *q11u8, uint8x16_t *q12u8,
|
||||
uint8x16_t *q13u8, uint8x16_t *q14u8,
|
||||
uint8x16_t *q15u8) {
|
||||
*q8u8 = vqaddq_u8(*q8u8, qdiffu8);
|
||||
*q9u8 = vqaddq_u8(*q9u8, qdiffu8);
|
||||
*q10u8 = vqaddq_u8(*q10u8, qdiffu8);
|
||||
*q11u8 = vqaddq_u8(*q11u8, qdiffu8);
|
||||
*q12u8 = vqaddq_u8(*q12u8, qdiffu8);
|
||||
*q13u8 = vqaddq_u8(*q13u8, qdiffu8);
|
||||
*q14u8 = vqaddq_u8(*q14u8, qdiffu8);
|
||||
*q15u8 = vqaddq_u8(*q15u8, qdiffu8);
|
||||
return;
|
||||
}
|
||||
|
||||
static INLINE void SUB_DIFF_16x8(uint8x16_t qdiffu8, uint8x16_t *q8u8,
|
||||
uint8x16_t *q9u8, uint8x16_t *q10u8,
|
||||
uint8x16_t *q11u8, uint8x16_t *q12u8,
|
||||
uint8x16_t *q13u8, uint8x16_t *q14u8,
|
||||
uint8x16_t *q15u8) {
|
||||
*q8u8 = vqsubq_u8(*q8u8, qdiffu8);
|
||||
*q9u8 = vqsubq_u8(*q9u8, qdiffu8);
|
||||
*q10u8 = vqsubq_u8(*q10u8, qdiffu8);
|
||||
*q11u8 = vqsubq_u8(*q11u8, qdiffu8);
|
||||
*q12u8 = vqsubq_u8(*q12u8, qdiffu8);
|
||||
*q13u8 = vqsubq_u8(*q13u8, qdiffu8);
|
||||
*q14u8 = vqsubq_u8(*q14u8, qdiffu8);
|
||||
*q15u8 = vqsubq_u8(*q15u8, qdiffu8);
|
||||
return;
|
||||
}
|
||||
|
||||
static INLINE void ST_16x8(uint8_t *d, int d_stride, uint8x16_t *q8u8,
|
||||
uint8x16_t *q9u8, uint8x16_t *q10u8,
|
||||
uint8x16_t *q11u8, uint8x16_t *q12u8,
|
||||
uint8x16_t *q13u8, uint8x16_t *q14u8,
|
||||
uint8x16_t *q15u8) {
|
||||
vst1q_u8(d, *q8u8);
|
||||
d += d_stride;
|
||||
vst1q_u8(d, *q9u8);
|
||||
d += d_stride;
|
||||
vst1q_u8(d, *q10u8);
|
||||
d += d_stride;
|
||||
vst1q_u8(d, *q11u8);
|
||||
d += d_stride;
|
||||
vst1q_u8(d, *q12u8);
|
||||
d += d_stride;
|
||||
vst1q_u8(d, *q13u8);
|
||||
d += d_stride;
|
||||
vst1q_u8(d, *q14u8);
|
||||
d += d_stride;
|
||||
vst1q_u8(d, *q15u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void aom_idct32x32_1_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||
uint8x16_t q0u8, q8u8, q9u8, q10u8, q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
int i, j, dest_stride8;
|
||||
uint8_t *d;
|
||||
int16_t a1;
|
||||
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
||||
|
||||
out = dct_const_round_shift(out * cospi_16_64);
|
||||
a1 = ROUND_POWER_OF_TWO(out, 6);
|
||||
|
||||
dest_stride8 = dest_stride * 8;
|
||||
if (a1 >= 0) { // diff_positive_32_32
|
||||
a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1;
|
||||
q0u8 = vdupq_n_u8(a1);
|
||||
for (i = 0; i < 2; i++, dest += 16) { // diff_positive_32_32_loop
|
||||
d = dest;
|
||||
for (j = 0; j < 4; j++) {
|
||||
LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8,
|
||||
&q14u8, &q15u8);
|
||||
ADD_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8,
|
||||
&q14u8, &q15u8);
|
||||
ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8,
|
||||
&q14u8, &q15u8);
|
||||
d += dest_stride8;
|
||||
}
|
||||
}
|
||||
} else { // diff_negative_32_32
|
||||
a1 = -a1;
|
||||
a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1;
|
||||
q0u8 = vdupq_n_u8(a1);
|
||||
for (i = 0; i < 2; i++, dest += 16) { // diff_negative_32_32_loop
|
||||
d = dest;
|
||||
for (j = 0; j < 4; j++) {
|
||||
LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8,
|
||||
&q14u8, &q15u8);
|
||||
SUB_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8,
|
||||
&q14u8, &q15u8);
|
||||
ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, &q12u8, &q13u8,
|
||||
&q14u8, &q15u8);
|
||||
d += dest_stride8;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -1,686 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "aom_dsp/txfm_common.h"
|
||||
|
||||
#define LOAD_FROM_TRANSPOSED(prev, first, second) \
|
||||
q14s16 = vld1q_s16(trans_buf + first * 8); \
|
||||
q13s16 = vld1q_s16(trans_buf + second * 8);
|
||||
|
||||
#define LOAD_FROM_OUTPUT(prev, first, second, qA, qB) \
|
||||
qA = vld1q_s16(out + first * 32); \
|
||||
qB = vld1q_s16(out + second * 32);
|
||||
|
||||
#define STORE_IN_OUTPUT(prev, first, second, qA, qB) \
|
||||
vst1q_s16(out + first * 32, qA); \
|
||||
vst1q_s16(out + second * 32, qB);
|
||||
|
||||
#define STORE_COMBINE_CENTER_RESULTS(r10, r9) \
|
||||
__STORE_COMBINE_CENTER_RESULTS(r10, r9, stride, q6s16, q7s16, q8s16, q9s16);
|
||||
static INLINE void __STORE_COMBINE_CENTER_RESULTS(uint8_t *p1, uint8_t *p2,
|
||||
int stride, int16x8_t q6s16,
|
||||
int16x8_t q7s16,
|
||||
int16x8_t q8s16,
|
||||
int16x8_t q9s16) {
|
||||
int16x4_t d8s16, d9s16, d10s16, d11s16;
|
||||
|
||||
d8s16 = vld1_s16((int16_t *)p1);
|
||||
p1 += stride;
|
||||
d11s16 = vld1_s16((int16_t *)p2);
|
||||
p2 -= stride;
|
||||
d9s16 = vld1_s16((int16_t *)p1);
|
||||
d10s16 = vld1_s16((int16_t *)p2);
|
||||
|
||||
q7s16 = vrshrq_n_s16(q7s16, 6);
|
||||
q8s16 = vrshrq_n_s16(q8s16, 6);
|
||||
q9s16 = vrshrq_n_s16(q9s16, 6);
|
||||
q6s16 = vrshrq_n_s16(q6s16, 6);
|
||||
|
||||
q7s16 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q7s16), vreinterpret_u8_s16(d9s16)));
|
||||
q8s16 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_s16(d10s16)));
|
||||
q9s16 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_s16(d11s16)));
|
||||
q6s16 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q6s16), vreinterpret_u8_s16(d8s16)));
|
||||
|
||||
d9s16 = vreinterpret_s16_u8(vqmovun_s16(q7s16));
|
||||
d10s16 = vreinterpret_s16_u8(vqmovun_s16(q8s16));
|
||||
d11s16 = vreinterpret_s16_u8(vqmovun_s16(q9s16));
|
||||
d8s16 = vreinterpret_s16_u8(vqmovun_s16(q6s16));
|
||||
|
||||
vst1_s16((int16_t *)p1, d9s16);
|
||||
p1 -= stride;
|
||||
vst1_s16((int16_t *)p2, d10s16);
|
||||
p2 += stride;
|
||||
vst1_s16((int16_t *)p1, d8s16);
|
||||
vst1_s16((int16_t *)p2, d11s16);
|
||||
return;
|
||||
}
|
||||
|
||||
#define STORE_COMBINE_EXTREME_RESULTS(r7, r6) \
|
||||
; \
|
||||
__STORE_COMBINE_EXTREME_RESULTS(r7, r6, stride, q4s16, q5s16, q6s16, q7s16);
|
||||
static INLINE void __STORE_COMBINE_EXTREME_RESULTS(uint8_t *p1, uint8_t *p2,
|
||||
int stride, int16x8_t q4s16,
|
||||
int16x8_t q5s16,
|
||||
int16x8_t q6s16,
|
||||
int16x8_t q7s16) {
|
||||
int16x4_t d4s16, d5s16, d6s16, d7s16;
|
||||
|
||||
d4s16 = vld1_s16((int16_t *)p1);
|
||||
p1 += stride;
|
||||
d7s16 = vld1_s16((int16_t *)p2);
|
||||
p2 -= stride;
|
||||
d5s16 = vld1_s16((int16_t *)p1);
|
||||
d6s16 = vld1_s16((int16_t *)p2);
|
||||
|
||||
q5s16 = vrshrq_n_s16(q5s16, 6);
|
||||
q6s16 = vrshrq_n_s16(q6s16, 6);
|
||||
q7s16 = vrshrq_n_s16(q7s16, 6);
|
||||
q4s16 = vrshrq_n_s16(q4s16, 6);
|
||||
|
||||
q5s16 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q5s16), vreinterpret_u8_s16(d5s16)));
|
||||
q6s16 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q6s16), vreinterpret_u8_s16(d6s16)));
|
||||
q7s16 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q7s16), vreinterpret_u8_s16(d7s16)));
|
||||
q4s16 = vreinterpretq_s16_u16(
|
||||
vaddw_u8(vreinterpretq_u16_s16(q4s16), vreinterpret_u8_s16(d4s16)));
|
||||
|
||||
d5s16 = vreinterpret_s16_u8(vqmovun_s16(q5s16));
|
||||
d6s16 = vreinterpret_s16_u8(vqmovun_s16(q6s16));
|
||||
d7s16 = vreinterpret_s16_u8(vqmovun_s16(q7s16));
|
||||
d4s16 = vreinterpret_s16_u8(vqmovun_s16(q4s16));
|
||||
|
||||
vst1_s16((int16_t *)p1, d5s16);
|
||||
p1 -= stride;
|
||||
vst1_s16((int16_t *)p2, d6s16);
|
||||
p2 += stride;
|
||||
vst1_s16((int16_t *)p2, d7s16);
|
||||
vst1_s16((int16_t *)p1, d4s16);
|
||||
return;
|
||||
}
|
||||
|
||||
#define DO_BUTTERFLY_STD(const_1, const_2, qA, qB) \
|
||||
DO_BUTTERFLY(q14s16, q13s16, const_1, const_2, qA, qB);
|
||||
static INLINE void DO_BUTTERFLY(int16x8_t q14s16, int16x8_t q13s16,
|
||||
int16_t first_const, int16_t second_const,
|
||||
int16x8_t *qAs16, int16x8_t *qBs16) {
|
||||
int16x4_t d30s16, d31s16;
|
||||
int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q15s32;
|
||||
int16x4_t dCs16, dDs16, dAs16, dBs16;
|
||||
|
||||
dCs16 = vget_low_s16(q14s16);
|
||||
dDs16 = vget_high_s16(q14s16);
|
||||
dAs16 = vget_low_s16(q13s16);
|
||||
dBs16 = vget_high_s16(q13s16);
|
||||
|
||||
d30s16 = vdup_n_s16(first_const);
|
||||
d31s16 = vdup_n_s16(second_const);
|
||||
|
||||
q8s32 = vmull_s16(dCs16, d30s16);
|
||||
q10s32 = vmull_s16(dAs16, d31s16);
|
||||
q9s32 = vmull_s16(dDs16, d30s16);
|
||||
q11s32 = vmull_s16(dBs16, d31s16);
|
||||
q12s32 = vmull_s16(dCs16, d31s16);
|
||||
|
||||
q8s32 = vsubq_s32(q8s32, q10s32);
|
||||
q9s32 = vsubq_s32(q9s32, q11s32);
|
||||
|
||||
q10s32 = vmull_s16(dDs16, d31s16);
|
||||
q11s32 = vmull_s16(dAs16, d30s16);
|
||||
q15s32 = vmull_s16(dBs16, d30s16);
|
||||
|
||||
q11s32 = vaddq_s32(q12s32, q11s32);
|
||||
q10s32 = vaddq_s32(q10s32, q15s32);
|
||||
|
||||
*qAs16 = vcombine_s16(vqrshrn_n_s32(q8s32, 14), vqrshrn_n_s32(q9s32, 14));
|
||||
*qBs16 = vcombine_s16(vqrshrn_n_s32(q11s32, 14), vqrshrn_n_s32(q10s32, 14));
|
||||
return;
|
||||
}
|
||||
|
||||
static INLINE void idct32_transpose_pair(int16_t *input, int16_t *t_buf) {
|
||||
int16_t *in;
|
||||
int i;
|
||||
const int stride = 32;
|
||||
int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
|
||||
int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
|
||||
int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
|
||||
int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32;
|
||||
int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16;
|
||||
|
||||
for (i = 0; i < 4; i++, input += 8) {
|
||||
in = input;
|
||||
q8s16 = vld1q_s16(in);
|
||||
in += stride;
|
||||
q9s16 = vld1q_s16(in);
|
||||
in += stride;
|
||||
q10s16 = vld1q_s16(in);
|
||||
in += stride;
|
||||
q11s16 = vld1q_s16(in);
|
||||
in += stride;
|
||||
q12s16 = vld1q_s16(in);
|
||||
in += stride;
|
||||
q13s16 = vld1q_s16(in);
|
||||
in += stride;
|
||||
q14s16 = vld1q_s16(in);
|
||||
in += stride;
|
||||
q15s16 = vld1q_s16(in);
|
||||
|
||||
d16s16 = vget_low_s16(q8s16);
|
||||
d17s16 = vget_high_s16(q8s16);
|
||||
d18s16 = vget_low_s16(q9s16);
|
||||
d19s16 = vget_high_s16(q9s16);
|
||||
d20s16 = vget_low_s16(q10s16);
|
||||
d21s16 = vget_high_s16(q10s16);
|
||||
d22s16 = vget_low_s16(q11s16);
|
||||
d23s16 = vget_high_s16(q11s16);
|
||||
d24s16 = vget_low_s16(q12s16);
|
||||
d25s16 = vget_high_s16(q12s16);
|
||||
d26s16 = vget_low_s16(q13s16);
|
||||
d27s16 = vget_high_s16(q13s16);
|
||||
d28s16 = vget_low_s16(q14s16);
|
||||
d29s16 = vget_high_s16(q14s16);
|
||||
d30s16 = vget_low_s16(q15s16);
|
||||
d31s16 = vget_high_s16(q15s16);
|
||||
|
||||
q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24
|
||||
q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26
|
||||
q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28
|
||||
q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30
|
||||
q12s16 = vcombine_s16(d17s16, d25s16);
|
||||
q13s16 = vcombine_s16(d19s16, d27s16);
|
||||
q14s16 = vcombine_s16(d21s16, d29s16);
|
||||
q15s16 = vcombine_s16(d23s16, d31s16);
|
||||
|
||||
q0x2s32 =
|
||||
vtrnq_s32(vreinterpretq_s32_s16(q8s16), vreinterpretq_s32_s16(q10s16));
|
||||
q1x2s32 =
|
||||
vtrnq_s32(vreinterpretq_s32_s16(q9s16), vreinterpretq_s32_s16(q11s16));
|
||||
q2x2s32 =
|
||||
vtrnq_s32(vreinterpretq_s32_s16(q12s16), vreinterpretq_s32_s16(q14s16));
|
||||
q3x2s32 =
|
||||
vtrnq_s32(vreinterpretq_s32_s16(q13s16), vreinterpretq_s32_s16(q15s16));
|
||||
|
||||
q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8
|
||||
vreinterpretq_s16_s32(q1x2s32.val[0])); // q9
|
||||
q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10
|
||||
vreinterpretq_s16_s32(q1x2s32.val[1])); // q11
|
||||
q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12
|
||||
vreinterpretq_s16_s32(q3x2s32.val[0])); // q13
|
||||
q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14
|
||||
vreinterpretq_s16_s32(q3x2s32.val[1])); // q15
|
||||
|
||||
vst1q_s16(t_buf, q0x2s16.val[0]);
|
||||
t_buf += 8;
|
||||
vst1q_s16(t_buf, q0x2s16.val[1]);
|
||||
t_buf += 8;
|
||||
vst1q_s16(t_buf, q1x2s16.val[0]);
|
||||
t_buf += 8;
|
||||
vst1q_s16(t_buf, q1x2s16.val[1]);
|
||||
t_buf += 8;
|
||||
vst1q_s16(t_buf, q2x2s16.val[0]);
|
||||
t_buf += 8;
|
||||
vst1q_s16(t_buf, q2x2s16.val[1]);
|
||||
t_buf += 8;
|
||||
vst1q_s16(t_buf, q3x2s16.val[0]);
|
||||
t_buf += 8;
|
||||
vst1q_s16(t_buf, q3x2s16.val[1]);
|
||||
t_buf += 8;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static INLINE void idct32_bands_end_1st_pass(int16_t *out, int16x8_t q2s16,
|
||||
int16x8_t q3s16, int16x8_t q6s16,
|
||||
int16x8_t q7s16, int16x8_t q8s16,
|
||||
int16x8_t q9s16, int16x8_t q10s16,
|
||||
int16x8_t q11s16, int16x8_t q12s16,
|
||||
int16x8_t q13s16, int16x8_t q14s16,
|
||||
int16x8_t q15s16) {
|
||||
int16x8_t q0s16, q1s16, q4s16, q5s16;
|
||||
|
||||
STORE_IN_OUTPUT(17, 16, 17, q6s16, q7s16);
|
||||
STORE_IN_OUTPUT(17, 14, 15, q8s16, q9s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(15, 30, 31, q0s16, q1s16);
|
||||
q4s16 = vaddq_s16(q2s16, q1s16);
|
||||
q5s16 = vaddq_s16(q3s16, q0s16);
|
||||
q6s16 = vsubq_s16(q3s16, q0s16);
|
||||
q7s16 = vsubq_s16(q2s16, q1s16);
|
||||
STORE_IN_OUTPUT(31, 30, 31, q6s16, q7s16);
|
||||
STORE_IN_OUTPUT(31, 0, 1, q4s16, q5s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(1, 12, 13, q0s16, q1s16);
|
||||
q2s16 = vaddq_s16(q10s16, q1s16);
|
||||
q3s16 = vaddq_s16(q11s16, q0s16);
|
||||
q4s16 = vsubq_s16(q11s16, q0s16);
|
||||
q5s16 = vsubq_s16(q10s16, q1s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(13, 18, 19, q0s16, q1s16);
|
||||
q8s16 = vaddq_s16(q4s16, q1s16);
|
||||
q9s16 = vaddq_s16(q5s16, q0s16);
|
||||
q6s16 = vsubq_s16(q5s16, q0s16);
|
||||
q7s16 = vsubq_s16(q4s16, q1s16);
|
||||
STORE_IN_OUTPUT(19, 18, 19, q6s16, q7s16);
|
||||
STORE_IN_OUTPUT(19, 12, 13, q8s16, q9s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(13, 28, 29, q0s16, q1s16);
|
||||
q4s16 = vaddq_s16(q2s16, q1s16);
|
||||
q5s16 = vaddq_s16(q3s16, q0s16);
|
||||
q6s16 = vsubq_s16(q3s16, q0s16);
|
||||
q7s16 = vsubq_s16(q2s16, q1s16);
|
||||
STORE_IN_OUTPUT(29, 28, 29, q6s16, q7s16);
|
||||
STORE_IN_OUTPUT(29, 2, 3, q4s16, q5s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(3, 10, 11, q0s16, q1s16);
|
||||
q2s16 = vaddq_s16(q12s16, q1s16);
|
||||
q3s16 = vaddq_s16(q13s16, q0s16);
|
||||
q4s16 = vsubq_s16(q13s16, q0s16);
|
||||
q5s16 = vsubq_s16(q12s16, q1s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(11, 20, 21, q0s16, q1s16);
|
||||
q8s16 = vaddq_s16(q4s16, q1s16);
|
||||
q9s16 = vaddq_s16(q5s16, q0s16);
|
||||
q6s16 = vsubq_s16(q5s16, q0s16);
|
||||
q7s16 = vsubq_s16(q4s16, q1s16);
|
||||
STORE_IN_OUTPUT(21, 20, 21, q6s16, q7s16);
|
||||
STORE_IN_OUTPUT(21, 10, 11, q8s16, q9s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(11, 26, 27, q0s16, q1s16);
|
||||
q4s16 = vaddq_s16(q2s16, q1s16);
|
||||
q5s16 = vaddq_s16(q3s16, q0s16);
|
||||
q6s16 = vsubq_s16(q3s16, q0s16);
|
||||
q7s16 = vsubq_s16(q2s16, q1s16);
|
||||
STORE_IN_OUTPUT(27, 26, 27, q6s16, q7s16);
|
||||
STORE_IN_OUTPUT(27, 4, 5, q4s16, q5s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(5, 8, 9, q0s16, q1s16);
|
||||
q2s16 = vaddq_s16(q14s16, q1s16);
|
||||
q3s16 = vaddq_s16(q15s16, q0s16);
|
||||
q4s16 = vsubq_s16(q15s16, q0s16);
|
||||
q5s16 = vsubq_s16(q14s16, q1s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(9, 22, 23, q0s16, q1s16);
|
||||
q8s16 = vaddq_s16(q4s16, q1s16);
|
||||
q9s16 = vaddq_s16(q5s16, q0s16);
|
||||
q6s16 = vsubq_s16(q5s16, q0s16);
|
||||
q7s16 = vsubq_s16(q4s16, q1s16);
|
||||
STORE_IN_OUTPUT(23, 22, 23, q6s16, q7s16);
|
||||
STORE_IN_OUTPUT(23, 8, 9, q8s16, q9s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(9, 24, 25, q0s16, q1s16);
|
||||
q4s16 = vaddq_s16(q2s16, q1s16);
|
||||
q5s16 = vaddq_s16(q3s16, q0s16);
|
||||
q6s16 = vsubq_s16(q3s16, q0s16);
|
||||
q7s16 = vsubq_s16(q2s16, q1s16);
|
||||
STORE_IN_OUTPUT(25, 24, 25, q6s16, q7s16);
|
||||
STORE_IN_OUTPUT(25, 6, 7, q4s16, q5s16);
|
||||
return;
|
||||
}
|
||||
|
||||
static INLINE void idct32_bands_end_2nd_pass(
|
||||
int16_t *out, uint8_t *dest, int stride, int16x8_t q2s16, int16x8_t q3s16,
|
||||
int16x8_t q6s16, int16x8_t q7s16, int16x8_t q8s16, int16x8_t q9s16,
|
||||
int16x8_t q10s16, int16x8_t q11s16, int16x8_t q12s16, int16x8_t q13s16,
|
||||
int16x8_t q14s16, int16x8_t q15s16) {
|
||||
uint8_t *r6 = dest + 31 * stride;
|
||||
uint8_t *r7 = dest /* + 0 * stride*/;
|
||||
uint8_t *r9 = dest + 15 * stride;
|
||||
uint8_t *r10 = dest + 16 * stride;
|
||||
int str2 = stride << 1;
|
||||
int16x8_t q0s16, q1s16, q4s16, q5s16;
|
||||
|
||||
STORE_COMBINE_CENTER_RESULTS(r10, r9);
|
||||
r10 += str2;
|
||||
r9 -= str2;
|
||||
|
||||
LOAD_FROM_OUTPUT(17, 30, 31, q0s16, q1s16)
|
||||
q4s16 = vaddq_s16(q2s16, q1s16);
|
||||
q5s16 = vaddq_s16(q3s16, q0s16);
|
||||
q6s16 = vsubq_s16(q3s16, q0s16);
|
||||
q7s16 = vsubq_s16(q2s16, q1s16);
|
||||
STORE_COMBINE_EXTREME_RESULTS(r7, r6);
|
||||
r7 += str2;
|
||||
r6 -= str2;
|
||||
|
||||
LOAD_FROM_OUTPUT(31, 12, 13, q0s16, q1s16)
|
||||
q2s16 = vaddq_s16(q10s16, q1s16);
|
||||
q3s16 = vaddq_s16(q11s16, q0s16);
|
||||
q4s16 = vsubq_s16(q11s16, q0s16);
|
||||
q5s16 = vsubq_s16(q10s16, q1s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(13, 18, 19, q0s16, q1s16)
|
||||
q8s16 = vaddq_s16(q4s16, q1s16);
|
||||
q9s16 = vaddq_s16(q5s16, q0s16);
|
||||
q6s16 = vsubq_s16(q5s16, q0s16);
|
||||
q7s16 = vsubq_s16(q4s16, q1s16);
|
||||
STORE_COMBINE_CENTER_RESULTS(r10, r9);
|
||||
r10 += str2;
|
||||
r9 -= str2;
|
||||
|
||||
LOAD_FROM_OUTPUT(19, 28, 29, q0s16, q1s16)
|
||||
q4s16 = vaddq_s16(q2s16, q1s16);
|
||||
q5s16 = vaddq_s16(q3s16, q0s16);
|
||||
q6s16 = vsubq_s16(q3s16, q0s16);
|
||||
q7s16 = vsubq_s16(q2s16, q1s16);
|
||||
STORE_COMBINE_EXTREME_RESULTS(r7, r6);
|
||||
r7 += str2;
|
||||
r6 -= str2;
|
||||
|
||||
LOAD_FROM_OUTPUT(29, 10, 11, q0s16, q1s16)
|
||||
q2s16 = vaddq_s16(q12s16, q1s16);
|
||||
q3s16 = vaddq_s16(q13s16, q0s16);
|
||||
q4s16 = vsubq_s16(q13s16, q0s16);
|
||||
q5s16 = vsubq_s16(q12s16, q1s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(11, 20, 21, q0s16, q1s16)
|
||||
q8s16 = vaddq_s16(q4s16, q1s16);
|
||||
q9s16 = vaddq_s16(q5s16, q0s16);
|
||||
q6s16 = vsubq_s16(q5s16, q0s16);
|
||||
q7s16 = vsubq_s16(q4s16, q1s16);
|
||||
STORE_COMBINE_CENTER_RESULTS(r10, r9);
|
||||
r10 += str2;
|
||||
r9 -= str2;
|
||||
|
||||
LOAD_FROM_OUTPUT(21, 26, 27, q0s16, q1s16)
|
||||
q4s16 = vaddq_s16(q2s16, q1s16);
|
||||
q5s16 = vaddq_s16(q3s16, q0s16);
|
||||
q6s16 = vsubq_s16(q3s16, q0s16);
|
||||
q7s16 = vsubq_s16(q2s16, q1s16);
|
||||
STORE_COMBINE_EXTREME_RESULTS(r7, r6);
|
||||
r7 += str2;
|
||||
r6 -= str2;
|
||||
|
||||
LOAD_FROM_OUTPUT(27, 8, 9, q0s16, q1s16)
|
||||
q2s16 = vaddq_s16(q14s16, q1s16);
|
||||
q3s16 = vaddq_s16(q15s16, q0s16);
|
||||
q4s16 = vsubq_s16(q15s16, q0s16);
|
||||
q5s16 = vsubq_s16(q14s16, q1s16);
|
||||
|
||||
LOAD_FROM_OUTPUT(9, 22, 23, q0s16, q1s16)
|
||||
q8s16 = vaddq_s16(q4s16, q1s16);
|
||||
q9s16 = vaddq_s16(q5s16, q0s16);
|
||||
q6s16 = vsubq_s16(q5s16, q0s16);
|
||||
q7s16 = vsubq_s16(q4s16, q1s16);
|
||||
STORE_COMBINE_CENTER_RESULTS(r10, r9);
|
||||
|
||||
LOAD_FROM_OUTPUT(23, 24, 25, q0s16, q1s16)
|
||||
q4s16 = vaddq_s16(q2s16, q1s16);
|
||||
q5s16 = vaddq_s16(q3s16, q0s16);
|
||||
q6s16 = vsubq_s16(q3s16, q0s16);
|
||||
q7s16 = vsubq_s16(q2s16, q1s16);
|
||||
STORE_COMBINE_EXTREME_RESULTS(r7, r6);
|
||||
return;
|
||||
}
|
||||
|
||||
void aom_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int stride) {
|
||||
int i, idct32_pass_loop;
|
||||
int16_t trans_buf[32 * 8];
|
||||
int16_t pass1[32 * 32];
|
||||
int16_t pass2[32 * 32];
|
||||
int16_t *out;
|
||||
int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
|
||||
int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
|
||||
|
||||
for (idct32_pass_loop = 0, out = pass1; idct32_pass_loop < 2;
|
||||
idct32_pass_loop++,
|
||||
input = pass1, // the input of pass2 is the result of pass1
|
||||
out = pass2) {
|
||||
for (i = 0; i < 4; i++, input += 32 * 8, out += 8) { // idct32_bands_loop
|
||||
idct32_transpose_pair(input, trans_buf);
|
||||
|
||||
// -----------------------------------------
|
||||
// BLOCK A: 16-19,28-31
|
||||
// -----------------------------------------
|
||||
// generate 16,17,30,31
|
||||
// part of stage 1
|
||||
LOAD_FROM_TRANSPOSED(0, 1, 31)
|
||||
DO_BUTTERFLY_STD(cospi_31_64, cospi_1_64, &q0s16, &q2s16)
|
||||
LOAD_FROM_TRANSPOSED(31, 17, 15)
|
||||
DO_BUTTERFLY_STD(cospi_15_64, cospi_17_64, &q1s16, &q3s16)
|
||||
// part of stage 2
|
||||
q4s16 = vaddq_s16(q0s16, q1s16);
|
||||
q13s16 = vsubq_s16(q0s16, q1s16);
|
||||
q6s16 = vaddq_s16(q2s16, q3s16);
|
||||
q14s16 = vsubq_s16(q2s16, q3s16);
|
||||
// part of stage 3
|
||||
DO_BUTTERFLY_STD(cospi_28_64, cospi_4_64, &q5s16, &q7s16)
|
||||
|
||||
// generate 18,19,28,29
|
||||
// part of stage 1
|
||||
LOAD_FROM_TRANSPOSED(15, 9, 23)
|
||||
DO_BUTTERFLY_STD(cospi_23_64, cospi_9_64, &q0s16, &q2s16)
|
||||
LOAD_FROM_TRANSPOSED(23, 25, 7)
|
||||
DO_BUTTERFLY_STD(cospi_7_64, cospi_25_64, &q1s16, &q3s16)
|
||||
// part of stage 2
|
||||
q13s16 = vsubq_s16(q3s16, q2s16);
|
||||
q3s16 = vaddq_s16(q3s16, q2s16);
|
||||
q14s16 = vsubq_s16(q1s16, q0s16);
|
||||
q2s16 = vaddq_s16(q1s16, q0s16);
|
||||
// part of stage 3
|
||||
DO_BUTTERFLY_STD(-cospi_4_64, -cospi_28_64, &q1s16, &q0s16)
|
||||
// part of stage 4
|
||||
q8s16 = vaddq_s16(q4s16, q2s16);
|
||||
q9s16 = vaddq_s16(q5s16, q0s16);
|
||||
q10s16 = vaddq_s16(q7s16, q1s16);
|
||||
q15s16 = vaddq_s16(q6s16, q3s16);
|
||||
q13s16 = vsubq_s16(q5s16, q0s16);
|
||||
q14s16 = vsubq_s16(q7s16, q1s16);
|
||||
STORE_IN_OUTPUT(0, 16, 31, q8s16, q15s16)
|
||||
STORE_IN_OUTPUT(31, 17, 30, q9s16, q10s16)
|
||||
// part of stage 5
|
||||
DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q0s16, &q1s16)
|
||||
STORE_IN_OUTPUT(30, 29, 18, q1s16, q0s16)
|
||||
// part of stage 4
|
||||
q13s16 = vsubq_s16(q4s16, q2s16);
|
||||
q14s16 = vsubq_s16(q6s16, q3s16);
|
||||
// part of stage 5
|
||||
DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q4s16, &q6s16)
|
||||
STORE_IN_OUTPUT(18, 19, 28, q4s16, q6s16)
|
||||
|
||||
// -----------------------------------------
|
||||
// BLOCK B: 20-23,24-27
|
||||
// -----------------------------------------
|
||||
// generate 20,21,26,27
|
||||
// part of stage 1
|
||||
LOAD_FROM_TRANSPOSED(7, 5, 27)
|
||||
DO_BUTTERFLY_STD(cospi_27_64, cospi_5_64, &q0s16, &q2s16)
|
||||
LOAD_FROM_TRANSPOSED(27, 21, 11)
|
||||
DO_BUTTERFLY_STD(cospi_11_64, cospi_21_64, &q1s16, &q3s16)
|
||||
// part of stage 2
|
||||
q13s16 = vsubq_s16(q0s16, q1s16);
|
||||
q0s16 = vaddq_s16(q0s16, q1s16);
|
||||
q14s16 = vsubq_s16(q2s16, q3s16);
|
||||
q2s16 = vaddq_s16(q2s16, q3s16);
|
||||
// part of stage 3
|
||||
DO_BUTTERFLY_STD(cospi_12_64, cospi_20_64, &q1s16, &q3s16)
|
||||
|
||||
// generate 22,23,24,25
|
||||
// part of stage 1
|
||||
LOAD_FROM_TRANSPOSED(11, 13, 19)
|
||||
DO_BUTTERFLY_STD(cospi_19_64, cospi_13_64, &q5s16, &q7s16)
|
||||
LOAD_FROM_TRANSPOSED(19, 29, 3)
|
||||
DO_BUTTERFLY_STD(cospi_3_64, cospi_29_64, &q4s16, &q6s16)
|
||||
// part of stage 2
|
||||
q14s16 = vsubq_s16(q4s16, q5s16);
|
||||
q5s16 = vaddq_s16(q4s16, q5s16);
|
||||
q13s16 = vsubq_s16(q6s16, q7s16);
|
||||
q6s16 = vaddq_s16(q6s16, q7s16);
|
||||
// part of stage 3
|
||||
DO_BUTTERFLY_STD(-cospi_20_64, -cospi_12_64, &q4s16, &q7s16)
|
||||
// part of stage 4
|
||||
q10s16 = vaddq_s16(q7s16, q1s16);
|
||||
q11s16 = vaddq_s16(q5s16, q0s16);
|
||||
q12s16 = vaddq_s16(q6s16, q2s16);
|
||||
q15s16 = vaddq_s16(q4s16, q3s16);
|
||||
// part of stage 6
|
||||
LOAD_FROM_OUTPUT(28, 16, 17, q14s16, q13s16)
|
||||
q8s16 = vaddq_s16(q14s16, q11s16);
|
||||
q9s16 = vaddq_s16(q13s16, q10s16);
|
||||
q13s16 = vsubq_s16(q13s16, q10s16);
|
||||
q11s16 = vsubq_s16(q14s16, q11s16);
|
||||
STORE_IN_OUTPUT(17, 17, 16, q9s16, q8s16)
|
||||
LOAD_FROM_OUTPUT(16, 30, 31, q14s16, q9s16)
|
||||
q8s16 = vsubq_s16(q9s16, q12s16);
|
||||
q10s16 = vaddq_s16(q14s16, q15s16);
|
||||
q14s16 = vsubq_s16(q14s16, q15s16);
|
||||
q12s16 = vaddq_s16(q9s16, q12s16);
|
||||
STORE_IN_OUTPUT(31, 30, 31, q10s16, q12s16)
|
||||
// part of stage 7
|
||||
DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16)
|
||||
STORE_IN_OUTPUT(31, 25, 22, q14s16, q13s16)
|
||||
q13s16 = q11s16;
|
||||
q14s16 = q8s16;
|
||||
DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16)
|
||||
STORE_IN_OUTPUT(22, 24, 23, q14s16, q13s16)
|
||||
// part of stage 4
|
||||
q14s16 = vsubq_s16(q5s16, q0s16);
|
||||
q13s16 = vsubq_s16(q6s16, q2s16);
|
||||
DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q5s16, &q6s16);
|
||||
q14s16 = vsubq_s16(q7s16, q1s16);
|
||||
q13s16 = vsubq_s16(q4s16, q3s16);
|
||||
DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q0s16, &q1s16);
|
||||
// part of stage 6
|
||||
LOAD_FROM_OUTPUT(23, 18, 19, q14s16, q13s16)
|
||||
q8s16 = vaddq_s16(q14s16, q1s16);
|
||||
q9s16 = vaddq_s16(q13s16, q6s16);
|
||||
q13s16 = vsubq_s16(q13s16, q6s16);
|
||||
q1s16 = vsubq_s16(q14s16, q1s16);
|
||||
STORE_IN_OUTPUT(19, 18, 19, q8s16, q9s16)
|
||||
LOAD_FROM_OUTPUT(19, 28, 29, q8s16, q9s16)
|
||||
q14s16 = vsubq_s16(q8s16, q5s16);
|
||||
q10s16 = vaddq_s16(q8s16, q5s16);
|
||||
q11s16 = vaddq_s16(q9s16, q0s16);
|
||||
q0s16 = vsubq_s16(q9s16, q0s16);
|
||||
STORE_IN_OUTPUT(29, 28, 29, q10s16, q11s16)
|
||||
// part of stage 7
|
||||
DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16)
|
||||
STORE_IN_OUTPUT(29, 20, 27, q13s16, q14s16)
|
||||
DO_BUTTERFLY(q0s16, q1s16, cospi_16_64, cospi_16_64, &q1s16, &q0s16);
|
||||
STORE_IN_OUTPUT(27, 21, 26, q1s16, q0s16)
|
||||
|
||||
// -----------------------------------------
|
||||
// BLOCK C: 8-10,11-15
|
||||
// -----------------------------------------
|
||||
// generate 8,9,14,15
|
||||
// part of stage 2
|
||||
LOAD_FROM_TRANSPOSED(3, 2, 30)
|
||||
DO_BUTTERFLY_STD(cospi_30_64, cospi_2_64, &q0s16, &q2s16)
|
||||
LOAD_FROM_TRANSPOSED(30, 18, 14)
|
||||
DO_BUTTERFLY_STD(cospi_14_64, cospi_18_64, &q1s16, &q3s16)
|
||||
// part of stage 3
|
||||
q13s16 = vsubq_s16(q0s16, q1s16);
|
||||
q0s16 = vaddq_s16(q0s16, q1s16);
|
||||
q14s16 = vsubq_s16(q2s16, q3s16);
|
||||
q2s16 = vaddq_s16(q2s16, q3s16);
|
||||
// part of stage 4
|
||||
DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q1s16, &q3s16)
|
||||
|
||||
// generate 10,11,12,13
|
||||
// part of stage 2
|
||||
LOAD_FROM_TRANSPOSED(14, 10, 22)
|
||||
DO_BUTTERFLY_STD(cospi_22_64, cospi_10_64, &q5s16, &q7s16)
|
||||
LOAD_FROM_TRANSPOSED(22, 26, 6)
|
||||
DO_BUTTERFLY_STD(cospi_6_64, cospi_26_64, &q4s16, &q6s16)
|
||||
// part of stage 3
|
||||
q14s16 = vsubq_s16(q4s16, q5s16);
|
||||
q5s16 = vaddq_s16(q4s16, q5s16);
|
||||
q13s16 = vsubq_s16(q6s16, q7s16);
|
||||
q6s16 = vaddq_s16(q6s16, q7s16);
|
||||
// part of stage 4
|
||||
DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q4s16, &q7s16)
|
||||
// part of stage 5
|
||||
q8s16 = vaddq_s16(q0s16, q5s16);
|
||||
q9s16 = vaddq_s16(q1s16, q7s16);
|
||||
q13s16 = vsubq_s16(q1s16, q7s16);
|
||||
q14s16 = vsubq_s16(q3s16, q4s16);
|
||||
q10s16 = vaddq_s16(q3s16, q4s16);
|
||||
q15s16 = vaddq_s16(q2s16, q6s16);
|
||||
STORE_IN_OUTPUT(26, 8, 15, q8s16, q15s16)
|
||||
STORE_IN_OUTPUT(15, 9, 14, q9s16, q10s16)
|
||||
// part of stage 6
|
||||
DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16)
|
||||
STORE_IN_OUTPUT(14, 13, 10, q3s16, q1s16)
|
||||
q13s16 = vsubq_s16(q0s16, q5s16);
|
||||
q14s16 = vsubq_s16(q2s16, q6s16);
|
||||
DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16)
|
||||
STORE_IN_OUTPUT(10, 11, 12, q1s16, q3s16)
|
||||
|
||||
// -----------------------------------------
|
||||
// BLOCK D: 0-3,4-7
|
||||
// -----------------------------------------
|
||||
// generate 4,5,6,7
|
||||
// part of stage 3
|
||||
LOAD_FROM_TRANSPOSED(6, 4, 28)
|
||||
DO_BUTTERFLY_STD(cospi_28_64, cospi_4_64, &q0s16, &q2s16)
|
||||
LOAD_FROM_TRANSPOSED(28, 20, 12)
|
||||
DO_BUTTERFLY_STD(cospi_12_64, cospi_20_64, &q1s16, &q3s16)
|
||||
// part of stage 4
|
||||
q13s16 = vsubq_s16(q0s16, q1s16);
|
||||
q0s16 = vaddq_s16(q0s16, q1s16);
|
||||
q14s16 = vsubq_s16(q2s16, q3s16);
|
||||
q2s16 = vaddq_s16(q2s16, q3s16);
|
||||
// part of stage 5
|
||||
DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16)
|
||||
|
||||
// generate 0,1,2,3
|
||||
// part of stage 4
|
||||
LOAD_FROM_TRANSPOSED(12, 0, 16)
|
||||
DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q5s16, &q7s16)
|
||||
LOAD_FROM_TRANSPOSED(16, 8, 24)
|
||||
DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q14s16, &q6s16)
|
||||
// part of stage 5
|
||||
q4s16 = vaddq_s16(q7s16, q6s16);
|
||||
q7s16 = vsubq_s16(q7s16, q6s16);
|
||||
q6s16 = vsubq_s16(q5s16, q14s16);
|
||||
q5s16 = vaddq_s16(q5s16, q14s16);
|
||||
// part of stage 6
|
||||
q8s16 = vaddq_s16(q4s16, q2s16);
|
||||
q9s16 = vaddq_s16(q5s16, q3s16);
|
||||
q10s16 = vaddq_s16(q6s16, q1s16);
|
||||
q11s16 = vaddq_s16(q7s16, q0s16);
|
||||
q12s16 = vsubq_s16(q7s16, q0s16);
|
||||
q13s16 = vsubq_s16(q6s16, q1s16);
|
||||
q14s16 = vsubq_s16(q5s16, q3s16);
|
||||
q15s16 = vsubq_s16(q4s16, q2s16);
|
||||
// part of stage 7
|
||||
LOAD_FROM_OUTPUT(12, 14, 15, q0s16, q1s16)
|
||||
q2s16 = vaddq_s16(q8s16, q1s16);
|
||||
q3s16 = vaddq_s16(q9s16, q0s16);
|
||||
q4s16 = vsubq_s16(q9s16, q0s16);
|
||||
q5s16 = vsubq_s16(q8s16, q1s16);
|
||||
LOAD_FROM_OUTPUT(15, 16, 17, q0s16, q1s16)
|
||||
q8s16 = vaddq_s16(q4s16, q1s16);
|
||||
q9s16 = vaddq_s16(q5s16, q0s16);
|
||||
q6s16 = vsubq_s16(q5s16, q0s16);
|
||||
q7s16 = vsubq_s16(q4s16, q1s16);
|
||||
|
||||
if (idct32_pass_loop == 0) {
|
||||
idct32_bands_end_1st_pass(out, q2s16, q3s16, q6s16, q7s16, q8s16, q9s16,
|
||||
q10s16, q11s16, q12s16, q13s16, q14s16,
|
||||
q15s16);
|
||||
} else {
|
||||
idct32_bands_end_2nd_pass(out, dest, stride, q2s16, q3s16, q6s16, q7s16,
|
||||
q8s16, q9s16, q10s16, q11s16, q12s16, q13s16,
|
||||
q14s16, q15s16);
|
||||
dest += 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "aom_dsp/inv_txfm.h"
|
||||
#include "aom_ports/mem.h"
|
||||
|
||||
void aom_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||
uint8x8_t d6u8;
|
||||
uint32x2_t d2u32 = vdup_n_u32(0);
|
||||
uint16x8_t q8u16;
|
||||
int16x8_t q0s16;
|
||||
uint8_t *d1, *d2;
|
||||
int16_t i, a1;
|
||||
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
||||
out = dct_const_round_shift(out * cospi_16_64);
|
||||
a1 = ROUND_POWER_OF_TWO(out, 4);
|
||||
|
||||
q0s16 = vdupq_n_s16(a1);
|
||||
|
||||
// dc_only_idct_add
|
||||
d1 = d2 = dest;
|
||||
for (i = 0; i < 2; i++) {
|
||||
d2u32 = vld1_lane_u32((const uint32_t *)d1, d2u32, 0);
|
||||
d1 += dest_stride;
|
||||
d2u32 = vld1_lane_u32((const uint32_t *)d1, d2u32, 1);
|
||||
d1 += dest_stride;
|
||||
|
||||
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q0s16), vreinterpret_u8_u32(d2u32));
|
||||
d6u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
|
||||
|
||||
vst1_lane_u32((uint32_t *)d2, vreinterpret_u32_u8(d6u8), 0);
|
||||
d2 += dest_stride;
|
||||
vst1_lane_u32((uint32_t *)d2, vreinterpret_u32_u8(d6u8), 1);
|
||||
d2 += dest_stride;
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -1,146 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "aom_dsp/txfm_common.h"
|
||||
|
||||
void aom_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||
uint8x8_t d26u8, d27u8;
|
||||
uint32x2_t d26u32, d27u32;
|
||||
uint16x8_t q8u16, q9u16;
|
||||
int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16;
|
||||
int16x4_t d22s16, d23s16, d24s16, d26s16, d27s16, d28s16, d29s16;
|
||||
int16x8_t q8s16, q9s16, q13s16, q14s16;
|
||||
int32x4_t q1s32, q13s32, q14s32, q15s32;
|
||||
int16x4x2_t d0x2s16, d1x2s16;
|
||||
int32x4x2_t q0x2s32;
|
||||
uint8_t *d;
|
||||
|
||||
d26u32 = d27u32 = vdup_n_u32(0);
|
||||
|
||||
q8s16 = vld1q_s16(input);
|
||||
q9s16 = vld1q_s16(input + 8);
|
||||
|
||||
d16s16 = vget_low_s16(q8s16);
|
||||
d17s16 = vget_high_s16(q8s16);
|
||||
d18s16 = vget_low_s16(q9s16);
|
||||
d19s16 = vget_high_s16(q9s16);
|
||||
|
||||
d0x2s16 = vtrn_s16(d16s16, d17s16);
|
||||
d1x2s16 = vtrn_s16(d18s16, d19s16);
|
||||
q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]);
|
||||
q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]);
|
||||
|
||||
d20s16 = vdup_n_s16((int16_t)cospi_8_64);
|
||||
d21s16 = vdup_n_s16((int16_t)cospi_16_64);
|
||||
|
||||
q0x2s32 =
|
||||
vtrnq_s32(vreinterpretq_s32_s16(q8s16), vreinterpretq_s32_s16(q9s16));
|
||||
d16s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[0]));
|
||||
d17s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[0]));
|
||||
d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
|
||||
d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
|
||||
|
||||
d22s16 = vdup_n_s16((int16_t)cospi_24_64);
|
||||
|
||||
// stage 1
|
||||
d23s16 = vadd_s16(d16s16, d18s16);
|
||||
d24s16 = vsub_s16(d16s16, d18s16);
|
||||
|
||||
q15s32 = vmull_s16(d17s16, d22s16);
|
||||
q1s32 = vmull_s16(d17s16, d20s16);
|
||||
q13s32 = vmull_s16(d23s16, d21s16);
|
||||
q14s32 = vmull_s16(d24s16, d21s16);
|
||||
|
||||
q15s32 = vmlsl_s16(q15s32, d19s16, d20s16);
|
||||
q1s32 = vmlal_s16(q1s32, d19s16, d22s16);
|
||||
|
||||
d26s16 = vqrshrn_n_s32(q13s32, 14);
|
||||
d27s16 = vqrshrn_n_s32(q14s32, 14);
|
||||
d29s16 = vqrshrn_n_s32(q15s32, 14);
|
||||
d28s16 = vqrshrn_n_s32(q1s32, 14);
|
||||
q13s16 = vcombine_s16(d26s16, d27s16);
|
||||
q14s16 = vcombine_s16(d28s16, d29s16);
|
||||
|
||||
// stage 2
|
||||
q8s16 = vaddq_s16(q13s16, q14s16);
|
||||
q9s16 = vsubq_s16(q13s16, q14s16);
|
||||
|
||||
d16s16 = vget_low_s16(q8s16);
|
||||
d17s16 = vget_high_s16(q8s16);
|
||||
d18s16 = vget_high_s16(q9s16); // vswp d18 d19
|
||||
d19s16 = vget_low_s16(q9s16);
|
||||
|
||||
d0x2s16 = vtrn_s16(d16s16, d17s16);
|
||||
d1x2s16 = vtrn_s16(d18s16, d19s16);
|
||||
q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]);
|
||||
q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]);
|
||||
|
||||
q0x2s32 =
|
||||
vtrnq_s32(vreinterpretq_s32_s16(q8s16), vreinterpretq_s32_s16(q9s16));
|
||||
d16s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[0]));
|
||||
d17s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[0]));
|
||||
d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
|
||||
d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
|
||||
|
||||
// do the transform on columns
|
||||
// stage 1
|
||||
d23s16 = vadd_s16(d16s16, d18s16);
|
||||
d24s16 = vsub_s16(d16s16, d18s16);
|
||||
|
||||
q15s32 = vmull_s16(d17s16, d22s16);
|
||||
q1s32 = vmull_s16(d17s16, d20s16);
|
||||
q13s32 = vmull_s16(d23s16, d21s16);
|
||||
q14s32 = vmull_s16(d24s16, d21s16);
|
||||
|
||||
q15s32 = vmlsl_s16(q15s32, d19s16, d20s16);
|
||||
q1s32 = vmlal_s16(q1s32, d19s16, d22s16);
|
||||
|
||||
d26s16 = vqrshrn_n_s32(q13s32, 14);
|
||||
d27s16 = vqrshrn_n_s32(q14s32, 14);
|
||||
d29s16 = vqrshrn_n_s32(q15s32, 14);
|
||||
d28s16 = vqrshrn_n_s32(q1s32, 14);
|
||||
q13s16 = vcombine_s16(d26s16, d27s16);
|
||||
q14s16 = vcombine_s16(d28s16, d29s16);
|
||||
|
||||
// stage 2
|
||||
q8s16 = vaddq_s16(q13s16, q14s16);
|
||||
q9s16 = vsubq_s16(q13s16, q14s16);
|
||||
|
||||
q8s16 = vrshrq_n_s16(q8s16, 4);
|
||||
q9s16 = vrshrq_n_s16(q9s16, 4);
|
||||
|
||||
d = dest;
|
||||
d26u32 = vld1_lane_u32((const uint32_t *)d, d26u32, 0);
|
||||
d += dest_stride;
|
||||
d26u32 = vld1_lane_u32((const uint32_t *)d, d26u32, 1);
|
||||
d += dest_stride;
|
||||
d27u32 = vld1_lane_u32((const uint32_t *)d, d27u32, 1);
|
||||
d += dest_stride;
|
||||
d27u32 = vld1_lane_u32((const uint32_t *)d, d27u32, 0);
|
||||
|
||||
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32));
|
||||
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32));
|
||||
|
||||
d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
|
||||
d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
|
||||
|
||||
d = dest;
|
||||
vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d26u8), 0);
|
||||
d += dest_stride;
|
||||
vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d26u8), 1);
|
||||
d += dest_stride;
|
||||
vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d27u8), 1);
|
||||
d += dest_stride;
|
||||
vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d27u8), 0);
|
||||
return;
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "aom_dsp/inv_txfm.h"
|
||||
#include "aom_ports/mem.h"
|
||||
|
||||
void aom_idct8x8_1_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||
uint8x8_t d2u8, d3u8, d30u8, d31u8;
|
||||
uint64x1_t d2u64, d3u64, d4u64, d5u64;
|
||||
uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16;
|
||||
int16x8_t q0s16;
|
||||
uint8_t *d1, *d2;
|
||||
int16_t i, a1;
|
||||
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
||||
out = dct_const_round_shift(out * cospi_16_64);
|
||||
a1 = ROUND_POWER_OF_TWO(out, 5);
|
||||
|
||||
q0s16 = vdupq_n_s16(a1);
|
||||
q0u16 = vreinterpretq_u16_s16(q0s16);
|
||||
|
||||
d1 = d2 = dest;
|
||||
for (i = 0; i < 2; i++) {
|
||||
d2u64 = vld1_u64((const uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d3u64 = vld1_u64((const uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d4u64 = vld1_u64((const uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d5u64 = vld1_u64((const uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
|
||||
q9u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d2u64));
|
||||
q10u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d3u64));
|
||||
q11u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d4u64));
|
||||
q12u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d5u64));
|
||||
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
|
||||
d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
|
||||
d30u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
|
||||
d31u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16));
|
||||
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d30u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d31u8));
|
||||
d2 += dest_stride;
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -1,509 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "aom_dsp/txfm_common.h"
|
||||
|
||||
static INLINE void TRANSPOSE8X8(int16x8_t *q8s16, int16x8_t *q9s16,
|
||||
int16x8_t *q10s16, int16x8_t *q11s16,
|
||||
int16x8_t *q12s16, int16x8_t *q13s16,
|
||||
int16x8_t *q14s16, int16x8_t *q15s16) {
|
||||
int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
|
||||
int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
|
||||
int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32;
|
||||
int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16;
|
||||
|
||||
d16s16 = vget_low_s16(*q8s16);
|
||||
d17s16 = vget_high_s16(*q8s16);
|
||||
d18s16 = vget_low_s16(*q9s16);
|
||||
d19s16 = vget_high_s16(*q9s16);
|
||||
d20s16 = vget_low_s16(*q10s16);
|
||||
d21s16 = vget_high_s16(*q10s16);
|
||||
d22s16 = vget_low_s16(*q11s16);
|
||||
d23s16 = vget_high_s16(*q11s16);
|
||||
d24s16 = vget_low_s16(*q12s16);
|
||||
d25s16 = vget_high_s16(*q12s16);
|
||||
d26s16 = vget_low_s16(*q13s16);
|
||||
d27s16 = vget_high_s16(*q13s16);
|
||||
d28s16 = vget_low_s16(*q14s16);
|
||||
d29s16 = vget_high_s16(*q14s16);
|
||||
d30s16 = vget_low_s16(*q15s16);
|
||||
d31s16 = vget_high_s16(*q15s16);
|
||||
|
||||
*q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24
|
||||
*q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26
|
||||
*q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28
|
||||
*q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30
|
||||
*q12s16 = vcombine_s16(d17s16, d25s16);
|
||||
*q13s16 = vcombine_s16(d19s16, d27s16);
|
||||
*q14s16 = vcombine_s16(d21s16, d29s16);
|
||||
*q15s16 = vcombine_s16(d23s16, d31s16);
|
||||
|
||||
q0x2s32 =
|
||||
vtrnq_s32(vreinterpretq_s32_s16(*q8s16), vreinterpretq_s32_s16(*q10s16));
|
||||
q1x2s32 =
|
||||
vtrnq_s32(vreinterpretq_s32_s16(*q9s16), vreinterpretq_s32_s16(*q11s16));
|
||||
q2x2s32 =
|
||||
vtrnq_s32(vreinterpretq_s32_s16(*q12s16), vreinterpretq_s32_s16(*q14s16));
|
||||
q3x2s32 =
|
||||
vtrnq_s32(vreinterpretq_s32_s16(*q13s16), vreinterpretq_s32_s16(*q15s16));
|
||||
|
||||
q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8
|
||||
vreinterpretq_s16_s32(q1x2s32.val[0])); // q9
|
||||
q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10
|
||||
vreinterpretq_s16_s32(q1x2s32.val[1])); // q11
|
||||
q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12
|
||||
vreinterpretq_s16_s32(q3x2s32.val[0])); // q13
|
||||
q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14
|
||||
vreinterpretq_s16_s32(q3x2s32.val[1])); // q15
|
||||
|
||||
*q8s16 = q0x2s16.val[0];
|
||||
*q9s16 = q0x2s16.val[1];
|
||||
*q10s16 = q1x2s16.val[0];
|
||||
*q11s16 = q1x2s16.val[1];
|
||||
*q12s16 = q2x2s16.val[0];
|
||||
*q13s16 = q2x2s16.val[1];
|
||||
*q14s16 = q3x2s16.val[0];
|
||||
*q15s16 = q3x2s16.val[1];
|
||||
return;
|
||||
}
|
||||
|
||||
static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
|
||||
int16x8_t *q10s16, int16x8_t *q11s16,
|
||||
int16x8_t *q12s16, int16x8_t *q13s16,
|
||||
int16x8_t *q14s16, int16x8_t *q15s16) {
|
||||
int16x4_t d0s16, d1s16, d2s16, d3s16;
|
||||
int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
|
||||
int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
|
||||
int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
|
||||
int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
|
||||
int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
|
||||
int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
|
||||
|
||||
d0s16 = vdup_n_s16((int16_t)cospi_28_64);
|
||||
d1s16 = vdup_n_s16((int16_t)cospi_4_64);
|
||||
d2s16 = vdup_n_s16((int16_t)cospi_12_64);
|
||||
d3s16 = vdup_n_s16((int16_t)cospi_20_64);
|
||||
|
||||
d16s16 = vget_low_s16(*q8s16);
|
||||
d17s16 = vget_high_s16(*q8s16);
|
||||
d18s16 = vget_low_s16(*q9s16);
|
||||
d19s16 = vget_high_s16(*q9s16);
|
||||
d20s16 = vget_low_s16(*q10s16);
|
||||
d21s16 = vget_high_s16(*q10s16);
|
||||
d22s16 = vget_low_s16(*q11s16);
|
||||
d23s16 = vget_high_s16(*q11s16);
|
||||
d24s16 = vget_low_s16(*q12s16);
|
||||
d25s16 = vget_high_s16(*q12s16);
|
||||
d26s16 = vget_low_s16(*q13s16);
|
||||
d27s16 = vget_high_s16(*q13s16);
|
||||
d28s16 = vget_low_s16(*q14s16);
|
||||
d29s16 = vget_high_s16(*q14s16);
|
||||
d30s16 = vget_low_s16(*q15s16);
|
||||
d31s16 = vget_high_s16(*q15s16);
|
||||
|
||||
q2s32 = vmull_s16(d18s16, d0s16);
|
||||
q3s32 = vmull_s16(d19s16, d0s16);
|
||||
q5s32 = vmull_s16(d26s16, d2s16);
|
||||
q6s32 = vmull_s16(d27s16, d2s16);
|
||||
|
||||
q2s32 = vmlsl_s16(q2s32, d30s16, d1s16);
|
||||
q3s32 = vmlsl_s16(q3s32, d31s16, d1s16);
|
||||
q5s32 = vmlsl_s16(q5s32, d22s16, d3s16);
|
||||
q6s32 = vmlsl_s16(q6s32, d23s16, d3s16);
|
||||
|
||||
d8s16 = vqrshrn_n_s32(q2s32, 14);
|
||||
d9s16 = vqrshrn_n_s32(q3s32, 14);
|
||||
d10s16 = vqrshrn_n_s32(q5s32, 14);
|
||||
d11s16 = vqrshrn_n_s32(q6s32, 14);
|
||||
q4s16 = vcombine_s16(d8s16, d9s16);
|
||||
q5s16 = vcombine_s16(d10s16, d11s16);
|
||||
|
||||
q2s32 = vmull_s16(d18s16, d1s16);
|
||||
q3s32 = vmull_s16(d19s16, d1s16);
|
||||
q9s32 = vmull_s16(d26s16, d3s16);
|
||||
q13s32 = vmull_s16(d27s16, d3s16);
|
||||
|
||||
q2s32 = vmlal_s16(q2s32, d30s16, d0s16);
|
||||
q3s32 = vmlal_s16(q3s32, d31s16, d0s16);
|
||||
q9s32 = vmlal_s16(q9s32, d22s16, d2s16);
|
||||
q13s32 = vmlal_s16(q13s32, d23s16, d2s16);
|
||||
|
||||
d14s16 = vqrshrn_n_s32(q2s32, 14);
|
||||
d15s16 = vqrshrn_n_s32(q3s32, 14);
|
||||
d12s16 = vqrshrn_n_s32(q9s32, 14);
|
||||
d13s16 = vqrshrn_n_s32(q13s32, 14);
|
||||
q6s16 = vcombine_s16(d12s16, d13s16);
|
||||
q7s16 = vcombine_s16(d14s16, d15s16);
|
||||
|
||||
d0s16 = vdup_n_s16((int16_t)cospi_16_64);
|
||||
|
||||
q2s32 = vmull_s16(d16s16, d0s16);
|
||||
q3s32 = vmull_s16(d17s16, d0s16);
|
||||
q13s32 = vmull_s16(d16s16, d0s16);
|
||||
q15s32 = vmull_s16(d17s16, d0s16);
|
||||
|
||||
q2s32 = vmlal_s16(q2s32, d24s16, d0s16);
|
||||
q3s32 = vmlal_s16(q3s32, d25s16, d0s16);
|
||||
q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
|
||||
q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
|
||||
|
||||
d0s16 = vdup_n_s16((int16_t)cospi_24_64);
|
||||
d1s16 = vdup_n_s16((int16_t)cospi_8_64);
|
||||
|
||||
d18s16 = vqrshrn_n_s32(q2s32, 14);
|
||||
d19s16 = vqrshrn_n_s32(q3s32, 14);
|
||||
d22s16 = vqrshrn_n_s32(q13s32, 14);
|
||||
d23s16 = vqrshrn_n_s32(q15s32, 14);
|
||||
*q9s16 = vcombine_s16(d18s16, d19s16);
|
||||
*q11s16 = vcombine_s16(d22s16, d23s16);
|
||||
|
||||
q2s32 = vmull_s16(d20s16, d0s16);
|
||||
q3s32 = vmull_s16(d21s16, d0s16);
|
||||
q8s32 = vmull_s16(d20s16, d1s16);
|
||||
q12s32 = vmull_s16(d21s16, d1s16);
|
||||
|
||||
q2s32 = vmlsl_s16(q2s32, d28s16, d1s16);
|
||||
q3s32 = vmlsl_s16(q3s32, d29s16, d1s16);
|
||||
q8s32 = vmlal_s16(q8s32, d28s16, d0s16);
|
||||
q12s32 = vmlal_s16(q12s32, d29s16, d0s16);
|
||||
|
||||
d26s16 = vqrshrn_n_s32(q2s32, 14);
|
||||
d27s16 = vqrshrn_n_s32(q3s32, 14);
|
||||
d30s16 = vqrshrn_n_s32(q8s32, 14);
|
||||
d31s16 = vqrshrn_n_s32(q12s32, 14);
|
||||
*q13s16 = vcombine_s16(d26s16, d27s16);
|
||||
*q15s16 = vcombine_s16(d30s16, d31s16);
|
||||
|
||||
q0s16 = vaddq_s16(*q9s16, *q15s16);
|
||||
q1s16 = vaddq_s16(*q11s16, *q13s16);
|
||||
q2s16 = vsubq_s16(*q11s16, *q13s16);
|
||||
q3s16 = vsubq_s16(*q9s16, *q15s16);
|
||||
|
||||
*q13s16 = vsubq_s16(q4s16, q5s16);
|
||||
q4s16 = vaddq_s16(q4s16, q5s16);
|
||||
*q14s16 = vsubq_s16(q7s16, q6s16);
|
||||
q7s16 = vaddq_s16(q7s16, q6s16);
|
||||
d26s16 = vget_low_s16(*q13s16);
|
||||
d27s16 = vget_high_s16(*q13s16);
|
||||
d28s16 = vget_low_s16(*q14s16);
|
||||
d29s16 = vget_high_s16(*q14s16);
|
||||
|
||||
d16s16 = vdup_n_s16((int16_t)cospi_16_64);
|
||||
|
||||
q9s32 = vmull_s16(d28s16, d16s16);
|
||||
q10s32 = vmull_s16(d29s16, d16s16);
|
||||
q11s32 = vmull_s16(d28s16, d16s16);
|
||||
q12s32 = vmull_s16(d29s16, d16s16);
|
||||
|
||||
q9s32 = vmlsl_s16(q9s32, d26s16, d16s16);
|
||||
q10s32 = vmlsl_s16(q10s32, d27s16, d16s16);
|
||||
q11s32 = vmlal_s16(q11s32, d26s16, d16s16);
|
||||
q12s32 = vmlal_s16(q12s32, d27s16, d16s16);
|
||||
|
||||
d10s16 = vqrshrn_n_s32(q9s32, 14);
|
||||
d11s16 = vqrshrn_n_s32(q10s32, 14);
|
||||
d12s16 = vqrshrn_n_s32(q11s32, 14);
|
||||
d13s16 = vqrshrn_n_s32(q12s32, 14);
|
||||
q5s16 = vcombine_s16(d10s16, d11s16);
|
||||
q6s16 = vcombine_s16(d12s16, d13s16);
|
||||
|
||||
*q8s16 = vaddq_s16(q0s16, q7s16);
|
||||
*q9s16 = vaddq_s16(q1s16, q6s16);
|
||||
*q10s16 = vaddq_s16(q2s16, q5s16);
|
||||
*q11s16 = vaddq_s16(q3s16, q4s16);
|
||||
*q12s16 = vsubq_s16(q3s16, q4s16);
|
||||
*q13s16 = vsubq_s16(q2s16, q5s16);
|
||||
*q14s16 = vsubq_s16(q1s16, q6s16);
|
||||
*q15s16 = vsubq_s16(q0s16, q7s16);
|
||||
return;
|
||||
}
|
||||
|
||||
void aom_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||
uint8_t *d1, *d2;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8;
|
||||
uint64x1_t d0u64, d1u64, d2u64, d3u64;
|
||||
int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
|
||||
uint16x8_t q8u16, q9u16, q10u16, q11u16;
|
||||
|
||||
q8s16 = vld1q_s16(input);
|
||||
q9s16 = vld1q_s16(input + 8);
|
||||
q10s16 = vld1q_s16(input + 16);
|
||||
q11s16 = vld1q_s16(input + 24);
|
||||
q12s16 = vld1q_s16(input + 32);
|
||||
q13s16 = vld1q_s16(input + 40);
|
||||
q14s16 = vld1q_s16(input + 48);
|
||||
q15s16 = vld1q_s16(input + 56);
|
||||
|
||||
TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
|
||||
IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
|
||||
TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
|
||||
IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
|
||||
q8s16 = vrshrq_n_s16(q8s16, 5);
|
||||
q9s16 = vrshrq_n_s16(q9s16, 5);
|
||||
q10s16 = vrshrq_n_s16(q10s16, 5);
|
||||
q11s16 = vrshrq_n_s16(q11s16, 5);
|
||||
q12s16 = vrshrq_n_s16(q12s16, 5);
|
||||
q13s16 = vrshrq_n_s16(q13s16, 5);
|
||||
q14s16 = vrshrq_n_s16(q14s16, 5);
|
||||
q15s16 = vrshrq_n_s16(q15s16, 5);
|
||||
|
||||
d1 = d2 = dest;
|
||||
|
||||
d0u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d1u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d2u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d3u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
|
||||
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
|
||||
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
|
||||
q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
|
||||
q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
|
||||
|
||||
d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
|
||||
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
|
||||
d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
|
||||
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
|
||||
d2 += dest_stride;
|
||||
|
||||
q8s16 = q12s16;
|
||||
q9s16 = q13s16;
|
||||
q10s16 = q14s16;
|
||||
q11s16 = q15s16;
|
||||
|
||||
d0u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d1u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d2u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d3u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
|
||||
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
|
||||
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
|
||||
q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
|
||||
q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
|
||||
|
||||
d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
|
||||
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
|
||||
d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
|
||||
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
|
||||
d2 += dest_stride;
|
||||
return;
|
||||
}
|
||||
|
||||
void aom_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||
uint8_t *d1, *d2;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8;
|
||||
int16x4_t d10s16, d11s16, d12s16, d13s16, d16s16;
|
||||
int16x4_t d26s16, d27s16, d28s16, d29s16;
|
||||
uint64x1_t d0u64, d1u64, d2u64, d3u64;
|
||||
int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
|
||||
int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
|
||||
uint16x8_t q8u16, q9u16, q10u16, q11u16;
|
||||
int32x4_t q9s32, q10s32, q11s32, q12s32;
|
||||
|
||||
q8s16 = vld1q_s16(input);
|
||||
q9s16 = vld1q_s16(input + 8);
|
||||
q10s16 = vld1q_s16(input + 16);
|
||||
q11s16 = vld1q_s16(input + 24);
|
||||
q12s16 = vld1q_s16(input + 32);
|
||||
q13s16 = vld1q_s16(input + 40);
|
||||
q14s16 = vld1q_s16(input + 48);
|
||||
q15s16 = vld1q_s16(input + 56);
|
||||
|
||||
TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
|
||||
// First transform rows
|
||||
// stage 1
|
||||
q0s16 = vdupq_n_s16((int16_t)cospi_28_64 * 2);
|
||||
q1s16 = vdupq_n_s16((int16_t)cospi_4_64 * 2);
|
||||
|
||||
q4s16 = vqrdmulhq_s16(q9s16, q0s16);
|
||||
|
||||
q0s16 = vdupq_n_s16(-(int16_t)cospi_20_64 * 2);
|
||||
|
||||
q7s16 = vqrdmulhq_s16(q9s16, q1s16);
|
||||
|
||||
q1s16 = vdupq_n_s16((int16_t)cospi_12_64 * 2);
|
||||
|
||||
q5s16 = vqrdmulhq_s16(q11s16, q0s16);
|
||||
|
||||
q0s16 = vdupq_n_s16((int16_t)cospi_16_64 * 2);
|
||||
|
||||
q6s16 = vqrdmulhq_s16(q11s16, q1s16);
|
||||
|
||||
// stage 2 & stage 3 - even half
|
||||
q1s16 = vdupq_n_s16((int16_t)cospi_24_64 * 2);
|
||||
|
||||
q9s16 = vqrdmulhq_s16(q8s16, q0s16);
|
||||
|
||||
q0s16 = vdupq_n_s16((int16_t)cospi_8_64 * 2);
|
||||
|
||||
q13s16 = vqrdmulhq_s16(q10s16, q1s16);
|
||||
|
||||
q15s16 = vqrdmulhq_s16(q10s16, q0s16);
|
||||
|
||||
// stage 3 -odd half
|
||||
q0s16 = vaddq_s16(q9s16, q15s16);
|
||||
q1s16 = vaddq_s16(q9s16, q13s16);
|
||||
q2s16 = vsubq_s16(q9s16, q13s16);
|
||||
q3s16 = vsubq_s16(q9s16, q15s16);
|
||||
|
||||
// stage 2 - odd half
|
||||
q13s16 = vsubq_s16(q4s16, q5s16);
|
||||
q4s16 = vaddq_s16(q4s16, q5s16);
|
||||
q14s16 = vsubq_s16(q7s16, q6s16);
|
||||
q7s16 = vaddq_s16(q7s16, q6s16);
|
||||
d26s16 = vget_low_s16(q13s16);
|
||||
d27s16 = vget_high_s16(q13s16);
|
||||
d28s16 = vget_low_s16(q14s16);
|
||||
d29s16 = vget_high_s16(q14s16);
|
||||
|
||||
d16s16 = vdup_n_s16((int16_t)cospi_16_64);
|
||||
q9s32 = vmull_s16(d28s16, d16s16);
|
||||
q10s32 = vmull_s16(d29s16, d16s16);
|
||||
q11s32 = vmull_s16(d28s16, d16s16);
|
||||
q12s32 = vmull_s16(d29s16, d16s16);
|
||||
|
||||
q9s32 = vmlsl_s16(q9s32, d26s16, d16s16);
|
||||
q10s32 = vmlsl_s16(q10s32, d27s16, d16s16);
|
||||
q11s32 = vmlal_s16(q11s32, d26s16, d16s16);
|
||||
q12s32 = vmlal_s16(q12s32, d27s16, d16s16);
|
||||
|
||||
d10s16 = vqrshrn_n_s32(q9s32, 14);
|
||||
d11s16 = vqrshrn_n_s32(q10s32, 14);
|
||||
d12s16 = vqrshrn_n_s32(q11s32, 14);
|
||||
d13s16 = vqrshrn_n_s32(q12s32, 14);
|
||||
q5s16 = vcombine_s16(d10s16, d11s16);
|
||||
q6s16 = vcombine_s16(d12s16, d13s16);
|
||||
|
||||
// stage 4
|
||||
q8s16 = vaddq_s16(q0s16, q7s16);
|
||||
q9s16 = vaddq_s16(q1s16, q6s16);
|
||||
q10s16 = vaddq_s16(q2s16, q5s16);
|
||||
q11s16 = vaddq_s16(q3s16, q4s16);
|
||||
q12s16 = vsubq_s16(q3s16, q4s16);
|
||||
q13s16 = vsubq_s16(q2s16, q5s16);
|
||||
q14s16 = vsubq_s16(q1s16, q6s16);
|
||||
q15s16 = vsubq_s16(q0s16, q7s16);
|
||||
|
||||
TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
|
||||
IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
|
||||
q8s16 = vrshrq_n_s16(q8s16, 5);
|
||||
q9s16 = vrshrq_n_s16(q9s16, 5);
|
||||
q10s16 = vrshrq_n_s16(q10s16, 5);
|
||||
q11s16 = vrshrq_n_s16(q11s16, 5);
|
||||
q12s16 = vrshrq_n_s16(q12s16, 5);
|
||||
q13s16 = vrshrq_n_s16(q13s16, 5);
|
||||
q14s16 = vrshrq_n_s16(q14s16, 5);
|
||||
q15s16 = vrshrq_n_s16(q15s16, 5);
|
||||
|
||||
d1 = d2 = dest;
|
||||
|
||||
d0u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d1u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d2u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d3u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
|
||||
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
|
||||
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
|
||||
q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
|
||||
q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
|
||||
|
||||
d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
|
||||
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
|
||||
d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
|
||||
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
|
||||
d2 += dest_stride;
|
||||
|
||||
q8s16 = q12s16;
|
||||
q9s16 = q13s16;
|
||||
q10s16 = q14s16;
|
||||
q11s16 = q15s16;
|
||||
|
||||
d0u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d1u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d2u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
d3u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += dest_stride;
|
||||
|
||||
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
|
||||
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
|
||||
q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
|
||||
q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
|
||||
|
||||
d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
|
||||
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
|
||||
d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
|
||||
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
|
||||
d2 += dest_stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
|
||||
d2 += dest_stride;
|
||||
return;
|
||||
}
|
||||
@@ -1,174 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
|
||||
static INLINE void loop_filter_neon_16(uint8x16_t qblimit, // blimit
|
||||
uint8x16_t qlimit, // limit
|
||||
uint8x16_t qthresh, // thresh
|
||||
uint8x16_t q3, // p3
|
||||
uint8x16_t q4, // p2
|
||||
uint8x16_t q5, // p1
|
||||
uint8x16_t q6, // p0
|
||||
uint8x16_t q7, // q0
|
||||
uint8x16_t q8, // q1
|
||||
uint8x16_t q9, // q2
|
||||
uint8x16_t q10, // q3
|
||||
uint8x16_t *q5r, // p1
|
||||
uint8x16_t *q6r, // p0
|
||||
uint8x16_t *q7r, // q0
|
||||
uint8x16_t *q8r) { // q1
|
||||
uint8x16_t q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q11s16;
|
||||
uint16x8_t q4u16;
|
||||
int8x16_t q0s8, q1s8, q2s8, q11s8, q12s8, q13s8;
|
||||
int8x8_t d2s8, d3s8;
|
||||
|
||||
q11u8 = vabdq_u8(q3, q4);
|
||||
q12u8 = vabdq_u8(q4, q5);
|
||||
q13u8 = vabdq_u8(q5, q6);
|
||||
q14u8 = vabdq_u8(q8, q7);
|
||||
q3 = vabdq_u8(q9, q8);
|
||||
q4 = vabdq_u8(q10, q9);
|
||||
|
||||
q11u8 = vmaxq_u8(q11u8, q12u8);
|
||||
q12u8 = vmaxq_u8(q13u8, q14u8);
|
||||
q3 = vmaxq_u8(q3, q4);
|
||||
q15u8 = vmaxq_u8(q11u8, q12u8);
|
||||
|
||||
q9 = vabdq_u8(q6, q7);
|
||||
|
||||
// aom_hevmask
|
||||
q13u8 = vcgtq_u8(q13u8, qthresh);
|
||||
q14u8 = vcgtq_u8(q14u8, qthresh);
|
||||
q15u8 = vmaxq_u8(q15u8, q3);
|
||||
|
||||
q2u8 = vabdq_u8(q5, q8);
|
||||
q9 = vqaddq_u8(q9, q9);
|
||||
|
||||
q15u8 = vcgeq_u8(qlimit, q15u8);
|
||||
|
||||
// aom_filter() function
|
||||
// convert to signed
|
||||
q10 = vdupq_n_u8(0x80);
|
||||
q8 = veorq_u8(q8, q10);
|
||||
q7 = veorq_u8(q7, q10);
|
||||
q6 = veorq_u8(q6, q10);
|
||||
q5 = veorq_u8(q5, q10);
|
||||
|
||||
q2u8 = vshrq_n_u8(q2u8, 1);
|
||||
q9 = vqaddq_u8(q9, q2u8);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6)));
|
||||
q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6)));
|
||||
|
||||
q9 = vcgeq_u8(qblimit, q9);
|
||||
|
||||
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), vreinterpretq_s8_u8(q8));
|
||||
|
||||
q14u8 = vorrq_u8(q13u8, q14u8);
|
||||
|
||||
q4u16 = vdupq_n_u16(3);
|
||||
q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
|
||||
q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
|
||||
|
||||
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
|
||||
q15u8 = vandq_u8(q15u8, q9);
|
||||
|
||||
q1s8 = vreinterpretq_s8_u8(q1u8);
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
|
||||
q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
|
||||
|
||||
q4 = vdupq_n_u8(3);
|
||||
q9 = vdupq_n_u8(4);
|
||||
// aom_filter = clamp(aom_filter + 3 * ( qs0 - ps0))
|
||||
d2s8 = vqmovn_s16(q2s16);
|
||||
d3s8 = vqmovn_s16(q11s16);
|
||||
q1s8 = vcombine_s8(d2s8, d3s8);
|
||||
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
|
||||
q1s8 = vreinterpretq_s8_u8(q1u8);
|
||||
|
||||
q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q4));
|
||||
q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q1s8 = vshrq_n_s8(q1s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
|
||||
q0s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
|
||||
|
||||
q1s8 = vrshrq_n_s8(q1s8, 1);
|
||||
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
|
||||
|
||||
q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
|
||||
q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
|
||||
|
||||
*q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q10);
|
||||
*q7r = veorq_u8(vreinterpretq_u8_s8(q0s8), q10);
|
||||
*q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q10);
|
||||
*q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q10);
|
||||
return;
|
||||
}
|
||||
|
||||
void aom_lpf_horizontal_4_dual_neon(
|
||||
uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
|
||||
const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
|
||||
const uint8_t *limit1, const uint8_t *thresh1) {
|
||||
uint8x8_t dblimit0, dlimit0, dthresh0, dblimit1, dlimit1, dthresh1;
|
||||
uint8x16_t qblimit, qlimit, qthresh;
|
||||
uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8;
|
||||
|
||||
dblimit0 = vld1_u8(blimit0);
|
||||
dlimit0 = vld1_u8(limit0);
|
||||
dthresh0 = vld1_u8(thresh0);
|
||||
dblimit1 = vld1_u8(blimit1);
|
||||
dlimit1 = vld1_u8(limit1);
|
||||
dthresh1 = vld1_u8(thresh1);
|
||||
qblimit = vcombine_u8(dblimit0, dblimit1);
|
||||
qlimit = vcombine_u8(dlimit0, dlimit1);
|
||||
qthresh = vcombine_u8(dthresh0, dthresh1);
|
||||
|
||||
s -= (p << 2);
|
||||
|
||||
q3u8 = vld1q_u8(s);
|
||||
s += p;
|
||||
q4u8 = vld1q_u8(s);
|
||||
s += p;
|
||||
q5u8 = vld1q_u8(s);
|
||||
s += p;
|
||||
q6u8 = vld1q_u8(s);
|
||||
s += p;
|
||||
q7u8 = vld1q_u8(s);
|
||||
s += p;
|
||||
q8u8 = vld1q_u8(s);
|
||||
s += p;
|
||||
q9u8 = vld1q_u8(s);
|
||||
s += p;
|
||||
q10u8 = vld1q_u8(s);
|
||||
|
||||
loop_filter_neon_16(qblimit, qlimit, qthresh, q3u8, q4u8, q5u8, q6u8, q7u8,
|
||||
q8u8, q9u8, q10u8, &q5u8, &q6u8, &q7u8, &q8u8);
|
||||
|
||||
s -= (p * 5);
|
||||
vst1q_u8(s, q5u8);
|
||||
s += p;
|
||||
vst1q_u8(s, q6u8);
|
||||
s += p;
|
||||
vst1q_u8(s, q7u8);
|
||||
s += p;
|
||||
vst1q_u8(s, q8u8);
|
||||
return;
|
||||
}
|
||||
@@ -1,250 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
|
||||
static INLINE void loop_filter_neon(uint8x8_t dblimit, // flimit
|
||||
uint8x8_t dlimit, // limit
|
||||
uint8x8_t dthresh, // thresh
|
||||
uint8x8_t d3u8, // p3
|
||||
uint8x8_t d4u8, // p2
|
||||
uint8x8_t d5u8, // p1
|
||||
uint8x8_t d6u8, // p0
|
||||
uint8x8_t d7u8, // q0
|
||||
uint8x8_t d16u8, // q1
|
||||
uint8x8_t d17u8, // q2
|
||||
uint8x8_t d18u8, // q3
|
||||
uint8x8_t *d4ru8, // p1
|
||||
uint8x8_t *d5ru8, // p0
|
||||
uint8x8_t *d6ru8, // q0
|
||||
uint8x8_t *d7ru8) { // q1
|
||||
uint8x8_t d19u8, d20u8, d21u8, d22u8, d23u8, d27u8, d28u8;
|
||||
int16x8_t q12s16;
|
||||
int8x8_t d19s8, d20s8, d21s8, d26s8, d27s8, d28s8;
|
||||
|
||||
d19u8 = vabd_u8(d3u8, d4u8);
|
||||
d20u8 = vabd_u8(d4u8, d5u8);
|
||||
d21u8 = vabd_u8(d5u8, d6u8);
|
||||
d22u8 = vabd_u8(d16u8, d7u8);
|
||||
d3u8 = vabd_u8(d17u8, d16u8);
|
||||
d4u8 = vabd_u8(d18u8, d17u8);
|
||||
|
||||
d19u8 = vmax_u8(d19u8, d20u8);
|
||||
d20u8 = vmax_u8(d21u8, d22u8);
|
||||
d3u8 = vmax_u8(d3u8, d4u8);
|
||||
d23u8 = vmax_u8(d19u8, d20u8);
|
||||
|
||||
d17u8 = vabd_u8(d6u8, d7u8);
|
||||
|
||||
d21u8 = vcgt_u8(d21u8, dthresh);
|
||||
d22u8 = vcgt_u8(d22u8, dthresh);
|
||||
d23u8 = vmax_u8(d23u8, d3u8);
|
||||
|
||||
d28u8 = vabd_u8(d5u8, d16u8);
|
||||
d17u8 = vqadd_u8(d17u8, d17u8);
|
||||
|
||||
d23u8 = vcge_u8(dlimit, d23u8);
|
||||
|
||||
d18u8 = vdup_n_u8(0x80);
|
||||
d5u8 = veor_u8(d5u8, d18u8);
|
||||
d6u8 = veor_u8(d6u8, d18u8);
|
||||
d7u8 = veor_u8(d7u8, d18u8);
|
||||
d16u8 = veor_u8(d16u8, d18u8);
|
||||
|
||||
d28u8 = vshr_n_u8(d28u8, 1);
|
||||
d17u8 = vqadd_u8(d17u8, d28u8);
|
||||
|
||||
d19u8 = vdup_n_u8(3);
|
||||
|
||||
d28s8 = vsub_s8(vreinterpret_s8_u8(d7u8), vreinterpret_s8_u8(d6u8));
|
||||
|
||||
d17u8 = vcge_u8(dblimit, d17u8);
|
||||
|
||||
d27s8 = vqsub_s8(vreinterpret_s8_u8(d5u8), vreinterpret_s8_u8(d16u8));
|
||||
|
||||
d22u8 = vorr_u8(d21u8, d22u8);
|
||||
|
||||
q12s16 = vmull_s8(d28s8, vreinterpret_s8_u8(d19u8));
|
||||
|
||||
d27u8 = vand_u8(vreinterpret_u8_s8(d27s8), d22u8);
|
||||
d23u8 = vand_u8(d23u8, d17u8);
|
||||
|
||||
q12s16 = vaddw_s8(q12s16, vreinterpret_s8_u8(d27u8));
|
||||
|
||||
d17u8 = vdup_n_u8(4);
|
||||
|
||||
d27s8 = vqmovn_s16(q12s16);
|
||||
d27u8 = vand_u8(vreinterpret_u8_s8(d27s8), d23u8);
|
||||
d27s8 = vreinterpret_s8_u8(d27u8);
|
||||
|
||||
d28s8 = vqadd_s8(d27s8, vreinterpret_s8_u8(d19u8));
|
||||
d27s8 = vqadd_s8(d27s8, vreinterpret_s8_u8(d17u8));
|
||||
d28s8 = vshr_n_s8(d28s8, 3);
|
||||
d27s8 = vshr_n_s8(d27s8, 3);
|
||||
|
||||
d19s8 = vqadd_s8(vreinterpret_s8_u8(d6u8), d28s8);
|
||||
d26s8 = vqsub_s8(vreinterpret_s8_u8(d7u8), d27s8);
|
||||
|
||||
d27s8 = vrshr_n_s8(d27s8, 1);
|
||||
d27s8 = vbic_s8(d27s8, vreinterpret_s8_u8(d22u8));
|
||||
|
||||
d21s8 = vqadd_s8(vreinterpret_s8_u8(d5u8), d27s8);
|
||||
d20s8 = vqsub_s8(vreinterpret_s8_u8(d16u8), d27s8);
|
||||
|
||||
*d4ru8 = veor_u8(vreinterpret_u8_s8(d21s8), d18u8);
|
||||
*d5ru8 = veor_u8(vreinterpret_u8_s8(d19s8), d18u8);
|
||||
*d6ru8 = veor_u8(vreinterpret_u8_s8(d26s8), d18u8);
|
||||
*d7ru8 = veor_u8(vreinterpret_u8_s8(d20s8), d18u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void aom_lpf_horizontal_4_neon(uint8_t *src, int pitch, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
int i;
|
||||
uint8_t *s, *psrc;
|
||||
uint8x8_t dblimit, dlimit, dthresh;
|
||||
uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
|
||||
|
||||
dblimit = vld1_u8(blimit);
|
||||
dlimit = vld1_u8(limit);
|
||||
dthresh = vld1_u8(thresh);
|
||||
|
||||
psrc = src - (pitch << 2);
|
||||
for (i = 0; i < 1; i++) {
|
||||
s = psrc + i * 8;
|
||||
|
||||
d3u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d4u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d5u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d6u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d7u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d16u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d17u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d18u8 = vld1_u8(s);
|
||||
|
||||
loop_filter_neon(dblimit, dlimit, dthresh, d3u8, d4u8, d5u8, d6u8, d7u8,
|
||||
d16u8, d17u8, d18u8, &d4u8, &d5u8, &d6u8, &d7u8);
|
||||
|
||||
s -= (pitch * 5);
|
||||
vst1_u8(s, d4u8);
|
||||
s += pitch;
|
||||
vst1_u8(s, d5u8);
|
||||
s += pitch;
|
||||
vst1_u8(s, d6u8);
|
||||
s += pitch;
|
||||
vst1_u8(s, d7u8);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void aom_lpf_vertical_4_neon(uint8_t *src, int pitch, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
int i, pitch8;
|
||||
uint8_t *s;
|
||||
uint8x8_t dblimit, dlimit, dthresh;
|
||||
uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
|
||||
uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3;
|
||||
uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7;
|
||||
uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
|
||||
uint8x8x4_t d4Result;
|
||||
|
||||
dblimit = vld1_u8(blimit);
|
||||
dlimit = vld1_u8(limit);
|
||||
dthresh = vld1_u8(thresh);
|
||||
|
||||
pitch8 = pitch * 8;
|
||||
for (i = 0; i < 1; i++, src += pitch8) {
|
||||
s = src - (i + 1) * 4;
|
||||
|
||||
d3u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d4u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d5u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d6u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d7u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d16u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d17u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d18u8 = vld1_u8(s);
|
||||
|
||||
d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), vreinterpret_u32_u8(d7u8));
|
||||
d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), vreinterpret_u32_u8(d16u8));
|
||||
d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), vreinterpret_u32_u8(d17u8));
|
||||
d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), vreinterpret_u32_u8(d18u8));
|
||||
|
||||
d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]),
|
||||
vreinterpret_u16_u32(d2tmp2.val[0]));
|
||||
d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]),
|
||||
vreinterpret_u16_u32(d2tmp3.val[0]));
|
||||
d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]),
|
||||
vreinterpret_u16_u32(d2tmp2.val[1]));
|
||||
d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]),
|
||||
vreinterpret_u16_u32(d2tmp3.val[1]));
|
||||
|
||||
d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]),
|
||||
vreinterpret_u8_u16(d2tmp5.val[0]));
|
||||
d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]),
|
||||
vreinterpret_u8_u16(d2tmp5.val[1]));
|
||||
d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]),
|
||||
vreinterpret_u8_u16(d2tmp7.val[0]));
|
||||
d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]),
|
||||
vreinterpret_u8_u16(d2tmp7.val[1]));
|
||||
|
||||
d3u8 = d2tmp8.val[0];
|
||||
d4u8 = d2tmp8.val[1];
|
||||
d5u8 = d2tmp9.val[0];
|
||||
d6u8 = d2tmp9.val[1];
|
||||
d7u8 = d2tmp10.val[0];
|
||||
d16u8 = d2tmp10.val[1];
|
||||
d17u8 = d2tmp11.val[0];
|
||||
d18u8 = d2tmp11.val[1];
|
||||
|
||||
loop_filter_neon(dblimit, dlimit, dthresh, d3u8, d4u8, d5u8, d6u8, d7u8,
|
||||
d16u8, d17u8, d18u8, &d4u8, &d5u8, &d6u8, &d7u8);
|
||||
|
||||
d4Result.val[0] = d4u8;
|
||||
d4Result.val[1] = d5u8;
|
||||
d4Result.val[2] = d6u8;
|
||||
d4Result.val[3] = d7u8;
|
||||
|
||||
src -= 2;
|
||||
vst4_lane_u8(src, d4Result, 0);
|
||||
src += pitch;
|
||||
vst4_lane_u8(src, d4Result, 1);
|
||||
src += pitch;
|
||||
vst4_lane_u8(src, d4Result, 2);
|
||||
src += pitch;
|
||||
vst4_lane_u8(src, d4Result, 3);
|
||||
src += pitch;
|
||||
vst4_lane_u8(src, d4Result, 4);
|
||||
src += pitch;
|
||||
vst4_lane_u8(src, d4Result, 5);
|
||||
src += pitch;
|
||||
vst4_lane_u8(src, d4Result, 6);
|
||||
src += pitch;
|
||||
vst4_lane_u8(src, d4Result, 7);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -1,430 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
|
||||
static INLINE void mbloop_filter_neon(uint8x8_t dblimit, // mblimit
|
||||
uint8x8_t dlimit, // limit
|
||||
uint8x8_t dthresh, // thresh
|
||||
uint8x8_t d3u8, // p2
|
||||
uint8x8_t d4u8, // p2
|
||||
uint8x8_t d5u8, // p1
|
||||
uint8x8_t d6u8, // p0
|
||||
uint8x8_t d7u8, // q0
|
||||
uint8x8_t d16u8, // q1
|
||||
uint8x8_t d17u8, // q2
|
||||
uint8x8_t d18u8, // q3
|
||||
uint8x8_t *d0ru8, // p1
|
||||
uint8x8_t *d1ru8, // p1
|
||||
uint8x8_t *d2ru8, // p0
|
||||
uint8x8_t *d3ru8, // q0
|
||||
uint8x8_t *d4ru8, // q1
|
||||
uint8x8_t *d5ru8) { // q1
|
||||
uint32_t flat;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8;
|
||||
uint8x8_t d25u8, d26u8, d27u8, d28u8, d29u8, d30u8, d31u8;
|
||||
int16x8_t q15s16;
|
||||
uint16x8_t q10u16, q14u16;
|
||||
int8x8_t d21s8, d24s8, d25s8, d26s8, d28s8, d29s8, d30s8;
|
||||
|
||||
d19u8 = vabd_u8(d3u8, d4u8);
|
||||
d20u8 = vabd_u8(d4u8, d5u8);
|
||||
d21u8 = vabd_u8(d5u8, d6u8);
|
||||
d22u8 = vabd_u8(d16u8, d7u8);
|
||||
d23u8 = vabd_u8(d17u8, d16u8);
|
||||
d24u8 = vabd_u8(d18u8, d17u8);
|
||||
|
||||
d19u8 = vmax_u8(d19u8, d20u8);
|
||||
d20u8 = vmax_u8(d21u8, d22u8);
|
||||
|
||||
d25u8 = vabd_u8(d6u8, d4u8);
|
||||
|
||||
d23u8 = vmax_u8(d23u8, d24u8);
|
||||
|
||||
d26u8 = vabd_u8(d7u8, d17u8);
|
||||
|
||||
d19u8 = vmax_u8(d19u8, d20u8);
|
||||
|
||||
d24u8 = vabd_u8(d6u8, d7u8);
|
||||
d27u8 = vabd_u8(d3u8, d6u8);
|
||||
d28u8 = vabd_u8(d18u8, d7u8);
|
||||
|
||||
d19u8 = vmax_u8(d19u8, d23u8);
|
||||
|
||||
d23u8 = vabd_u8(d5u8, d16u8);
|
||||
d24u8 = vqadd_u8(d24u8, d24u8);
|
||||
|
||||
d19u8 = vcge_u8(dlimit, d19u8);
|
||||
|
||||
d25u8 = vmax_u8(d25u8, d26u8);
|
||||
d26u8 = vmax_u8(d27u8, d28u8);
|
||||
|
||||
d23u8 = vshr_n_u8(d23u8, 1);
|
||||
|
||||
d25u8 = vmax_u8(d25u8, d26u8);
|
||||
|
||||
d24u8 = vqadd_u8(d24u8, d23u8);
|
||||
|
||||
d20u8 = vmax_u8(d20u8, d25u8);
|
||||
|
||||
d23u8 = vdup_n_u8(1);
|
||||
d24u8 = vcge_u8(dblimit, d24u8);
|
||||
|
||||
d21u8 = vcgt_u8(d21u8, dthresh);
|
||||
|
||||
d20u8 = vcge_u8(d23u8, d20u8);
|
||||
|
||||
d19u8 = vand_u8(d19u8, d24u8);
|
||||
|
||||
d23u8 = vcgt_u8(d22u8, dthresh);
|
||||
|
||||
d20u8 = vand_u8(d20u8, d19u8);
|
||||
|
||||
d22u8 = vdup_n_u8(0x80);
|
||||
|
||||
d23u8 = vorr_u8(d21u8, d23u8);
|
||||
|
||||
q10u16 = vcombine_u16(vreinterpret_u16_u8(d20u8), vreinterpret_u16_u8(d21u8));
|
||||
|
||||
d30u8 = vshrn_n_u16(q10u16, 4);
|
||||
flat = vget_lane_u32(vreinterpret_u32_u8(d30u8), 0);
|
||||
|
||||
if (flat == 0xffffffff) { // Check for all 1's, power_branch_only
|
||||
d27u8 = vdup_n_u8(3);
|
||||
d21u8 = vdup_n_u8(2);
|
||||
q14u16 = vaddl_u8(d6u8, d7u8);
|
||||
q14u16 = vmlal_u8(q14u16, d3u8, d27u8);
|
||||
q14u16 = vmlal_u8(q14u16, d4u8, d21u8);
|
||||
q14u16 = vaddw_u8(q14u16, d5u8);
|
||||
*d0ru8 = vqrshrn_n_u16(q14u16, 3);
|
||||
|
||||
q14u16 = vsubw_u8(q14u16, d3u8);
|
||||
q14u16 = vsubw_u8(q14u16, d4u8);
|
||||
q14u16 = vaddw_u8(q14u16, d5u8);
|
||||
q14u16 = vaddw_u8(q14u16, d16u8);
|
||||
*d1ru8 = vqrshrn_n_u16(q14u16, 3);
|
||||
|
||||
q14u16 = vsubw_u8(q14u16, d3u8);
|
||||
q14u16 = vsubw_u8(q14u16, d5u8);
|
||||
q14u16 = vaddw_u8(q14u16, d6u8);
|
||||
q14u16 = vaddw_u8(q14u16, d17u8);
|
||||
*d2ru8 = vqrshrn_n_u16(q14u16, 3);
|
||||
|
||||
q14u16 = vsubw_u8(q14u16, d3u8);
|
||||
q14u16 = vsubw_u8(q14u16, d6u8);
|
||||
q14u16 = vaddw_u8(q14u16, d7u8);
|
||||
q14u16 = vaddw_u8(q14u16, d18u8);
|
||||
*d3ru8 = vqrshrn_n_u16(q14u16, 3);
|
||||
|
||||
q14u16 = vsubw_u8(q14u16, d4u8);
|
||||
q14u16 = vsubw_u8(q14u16, d7u8);
|
||||
q14u16 = vaddw_u8(q14u16, d16u8);
|
||||
q14u16 = vaddw_u8(q14u16, d18u8);
|
||||
*d4ru8 = vqrshrn_n_u16(q14u16, 3);
|
||||
|
||||
q14u16 = vsubw_u8(q14u16, d5u8);
|
||||
q14u16 = vsubw_u8(q14u16, d16u8);
|
||||
q14u16 = vaddw_u8(q14u16, d17u8);
|
||||
q14u16 = vaddw_u8(q14u16, d18u8);
|
||||
*d5ru8 = vqrshrn_n_u16(q14u16, 3);
|
||||
} else {
|
||||
d21u8 = veor_u8(d7u8, d22u8);
|
||||
d24u8 = veor_u8(d6u8, d22u8);
|
||||
d25u8 = veor_u8(d5u8, d22u8);
|
||||
d26u8 = veor_u8(d16u8, d22u8);
|
||||
|
||||
d27u8 = vdup_n_u8(3);
|
||||
|
||||
d28s8 = vsub_s8(vreinterpret_s8_u8(d21u8), vreinterpret_s8_u8(d24u8));
|
||||
d29s8 = vqsub_s8(vreinterpret_s8_u8(d25u8), vreinterpret_s8_u8(d26u8));
|
||||
|
||||
q15s16 = vmull_s8(d28s8, vreinterpret_s8_u8(d27u8));
|
||||
|
||||
d29s8 = vand_s8(d29s8, vreinterpret_s8_u8(d23u8));
|
||||
|
||||
q15s16 = vaddw_s8(q15s16, d29s8);
|
||||
|
||||
d29u8 = vdup_n_u8(4);
|
||||
|
||||
d28s8 = vqmovn_s16(q15s16);
|
||||
|
||||
d28s8 = vand_s8(d28s8, vreinterpret_s8_u8(d19u8));
|
||||
|
||||
d30s8 = vqadd_s8(d28s8, vreinterpret_s8_u8(d27u8));
|
||||
d29s8 = vqadd_s8(d28s8, vreinterpret_s8_u8(d29u8));
|
||||
d30s8 = vshr_n_s8(d30s8, 3);
|
||||
d29s8 = vshr_n_s8(d29s8, 3);
|
||||
|
||||
d24s8 = vqadd_s8(vreinterpret_s8_u8(d24u8), d30s8);
|
||||
d21s8 = vqsub_s8(vreinterpret_s8_u8(d21u8), d29s8);
|
||||
|
||||
d29s8 = vrshr_n_s8(d29s8, 1);
|
||||
d29s8 = vbic_s8(d29s8, vreinterpret_s8_u8(d23u8));
|
||||
|
||||
d25s8 = vqadd_s8(vreinterpret_s8_u8(d25u8), d29s8);
|
||||
d26s8 = vqsub_s8(vreinterpret_s8_u8(d26u8), d29s8);
|
||||
|
||||
if (flat == 0) { // filter_branch_only
|
||||
*d0ru8 = d4u8;
|
||||
*d1ru8 = veor_u8(vreinterpret_u8_s8(d25s8), d22u8);
|
||||
*d2ru8 = veor_u8(vreinterpret_u8_s8(d24s8), d22u8);
|
||||
*d3ru8 = veor_u8(vreinterpret_u8_s8(d21s8), d22u8);
|
||||
*d4ru8 = veor_u8(vreinterpret_u8_s8(d26s8), d22u8);
|
||||
*d5ru8 = d17u8;
|
||||
return;
|
||||
}
|
||||
|
||||
d21u8 = veor_u8(vreinterpret_u8_s8(d21s8), d22u8);
|
||||
d24u8 = veor_u8(vreinterpret_u8_s8(d24s8), d22u8);
|
||||
d25u8 = veor_u8(vreinterpret_u8_s8(d25s8), d22u8);
|
||||
d26u8 = veor_u8(vreinterpret_u8_s8(d26s8), d22u8);
|
||||
|
||||
d23u8 = vdup_n_u8(2);
|
||||
q14u16 = vaddl_u8(d6u8, d7u8);
|
||||
q14u16 = vmlal_u8(q14u16, d3u8, d27u8);
|
||||
q14u16 = vmlal_u8(q14u16, d4u8, d23u8);
|
||||
|
||||
d0u8 = vbsl_u8(d20u8, dblimit, d4u8);
|
||||
|
||||
q14u16 = vaddw_u8(q14u16, d5u8);
|
||||
|
||||
d1u8 = vbsl_u8(d20u8, dlimit, d25u8);
|
||||
|
||||
d30u8 = vqrshrn_n_u16(q14u16, 3);
|
||||
|
||||
q14u16 = vsubw_u8(q14u16, d3u8);
|
||||
q14u16 = vsubw_u8(q14u16, d4u8);
|
||||
q14u16 = vaddw_u8(q14u16, d5u8);
|
||||
q14u16 = vaddw_u8(q14u16, d16u8);
|
||||
|
||||
d2u8 = vbsl_u8(d20u8, dthresh, d24u8);
|
||||
|
||||
d31u8 = vqrshrn_n_u16(q14u16, 3);
|
||||
|
||||
q14u16 = vsubw_u8(q14u16, d3u8);
|
||||
q14u16 = vsubw_u8(q14u16, d5u8);
|
||||
q14u16 = vaddw_u8(q14u16, d6u8);
|
||||
q14u16 = vaddw_u8(q14u16, d17u8);
|
||||
|
||||
*d0ru8 = vbsl_u8(d20u8, d30u8, d0u8);
|
||||
|
||||
d23u8 = vqrshrn_n_u16(q14u16, 3);
|
||||
|
||||
q14u16 = vsubw_u8(q14u16, d3u8);
|
||||
q14u16 = vsubw_u8(q14u16, d6u8);
|
||||
q14u16 = vaddw_u8(q14u16, d7u8);
|
||||
|
||||
*d1ru8 = vbsl_u8(d20u8, d31u8, d1u8);
|
||||
|
||||
q14u16 = vaddw_u8(q14u16, d18u8);
|
||||
|
||||
*d2ru8 = vbsl_u8(d20u8, d23u8, d2u8);
|
||||
|
||||
d22u8 = vqrshrn_n_u16(q14u16, 3);
|
||||
|
||||
q14u16 = vsubw_u8(q14u16, d4u8);
|
||||
q14u16 = vsubw_u8(q14u16, d7u8);
|
||||
q14u16 = vaddw_u8(q14u16, d16u8);
|
||||
|
||||
d3u8 = vbsl_u8(d20u8, d3u8, d21u8);
|
||||
|
||||
q14u16 = vaddw_u8(q14u16, d18u8);
|
||||
|
||||
d4u8 = vbsl_u8(d20u8, d4u8, d26u8);
|
||||
|
||||
d6u8 = vqrshrn_n_u16(q14u16, 3);
|
||||
|
||||
q14u16 = vsubw_u8(q14u16, d5u8);
|
||||
q14u16 = vsubw_u8(q14u16, d16u8);
|
||||
q14u16 = vaddw_u8(q14u16, d17u8);
|
||||
q14u16 = vaddw_u8(q14u16, d18u8);
|
||||
|
||||
d5u8 = vbsl_u8(d20u8, d5u8, d17u8);
|
||||
|
||||
d7u8 = vqrshrn_n_u16(q14u16, 3);
|
||||
|
||||
*d3ru8 = vbsl_u8(d20u8, d22u8, d3u8);
|
||||
*d4ru8 = vbsl_u8(d20u8, d6u8, d4u8);
|
||||
*d5ru8 = vbsl_u8(d20u8, d7u8, d5u8);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void aom_lpf_horizontal_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
int i;
|
||||
uint8_t *s, *psrc;
|
||||
uint8x8_t dblimit, dlimit, dthresh;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
|
||||
uint8x8_t d16u8, d17u8, d18u8;
|
||||
|
||||
dblimit = vld1_u8(blimit);
|
||||
dlimit = vld1_u8(limit);
|
||||
dthresh = vld1_u8(thresh);
|
||||
|
||||
psrc = src - (pitch << 2);
|
||||
for (i = 0; i < 1; i++) {
|
||||
s = psrc + i * 8;
|
||||
|
||||
d3u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d4u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d5u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d6u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d7u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d16u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d17u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d18u8 = vld1_u8(s);
|
||||
|
||||
mbloop_filter_neon(dblimit, dlimit, dthresh, d3u8, d4u8, d5u8, d6u8, d7u8,
|
||||
d16u8, d17u8, d18u8, &d0u8, &d1u8, &d2u8, &d3u8, &d4u8,
|
||||
&d5u8);
|
||||
|
||||
s -= (pitch * 6);
|
||||
vst1_u8(s, d0u8);
|
||||
s += pitch;
|
||||
vst1_u8(s, d1u8);
|
||||
s += pitch;
|
||||
vst1_u8(s, d2u8);
|
||||
s += pitch;
|
||||
vst1_u8(s, d3u8);
|
||||
s += pitch;
|
||||
vst1_u8(s, d4u8);
|
||||
s += pitch;
|
||||
vst1_u8(s, d5u8);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void aom_lpf_vertical_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
int i;
|
||||
uint8_t *s;
|
||||
uint8x8_t dblimit, dlimit, dthresh;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
|
||||
uint8x8_t d16u8, d17u8, d18u8;
|
||||
uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3;
|
||||
uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7;
|
||||
uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
|
||||
uint8x8x4_t d4Result;
|
||||
uint8x8x2_t d2Result;
|
||||
|
||||
dblimit = vld1_u8(blimit);
|
||||
dlimit = vld1_u8(limit);
|
||||
dthresh = vld1_u8(thresh);
|
||||
|
||||
for (i = 0; i < 1; i++) {
|
||||
s = src + (i * (pitch << 3)) - 4;
|
||||
|
||||
d3u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d4u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d5u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d6u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d7u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d16u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d17u8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d18u8 = vld1_u8(s);
|
||||
|
||||
d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), vreinterpret_u32_u8(d7u8));
|
||||
d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), vreinterpret_u32_u8(d16u8));
|
||||
d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), vreinterpret_u32_u8(d17u8));
|
||||
d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), vreinterpret_u32_u8(d18u8));
|
||||
|
||||
d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]),
|
||||
vreinterpret_u16_u32(d2tmp2.val[0]));
|
||||
d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]),
|
||||
vreinterpret_u16_u32(d2tmp3.val[0]));
|
||||
d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]),
|
||||
vreinterpret_u16_u32(d2tmp2.val[1]));
|
||||
d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]),
|
||||
vreinterpret_u16_u32(d2tmp3.val[1]));
|
||||
|
||||
d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]),
|
||||
vreinterpret_u8_u16(d2tmp5.val[0]));
|
||||
d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]),
|
||||
vreinterpret_u8_u16(d2tmp5.val[1]));
|
||||
d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]),
|
||||
vreinterpret_u8_u16(d2tmp7.val[0]));
|
||||
d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]),
|
||||
vreinterpret_u8_u16(d2tmp7.val[1]));
|
||||
|
||||
d3u8 = d2tmp8.val[0];
|
||||
d4u8 = d2tmp8.val[1];
|
||||
d5u8 = d2tmp9.val[0];
|
||||
d6u8 = d2tmp9.val[1];
|
||||
d7u8 = d2tmp10.val[0];
|
||||
d16u8 = d2tmp10.val[1];
|
||||
d17u8 = d2tmp11.val[0];
|
||||
d18u8 = d2tmp11.val[1];
|
||||
|
||||
mbloop_filter_neon(dblimit, dlimit, dthresh, d3u8, d4u8, d5u8, d6u8, d7u8,
|
||||
d16u8, d17u8, d18u8, &d0u8, &d1u8, &d2u8, &d3u8, &d4u8,
|
||||
&d5u8);
|
||||
|
||||
d4Result.val[0] = d0u8;
|
||||
d4Result.val[1] = d1u8;
|
||||
d4Result.val[2] = d2u8;
|
||||
d4Result.val[3] = d3u8;
|
||||
|
||||
d2Result.val[0] = d4u8;
|
||||
d2Result.val[1] = d5u8;
|
||||
|
||||
s = src - 3;
|
||||
vst4_lane_u8(s, d4Result, 0);
|
||||
s += pitch;
|
||||
vst4_lane_u8(s, d4Result, 1);
|
||||
s += pitch;
|
||||
vst4_lane_u8(s, d4Result, 2);
|
||||
s += pitch;
|
||||
vst4_lane_u8(s, d4Result, 3);
|
||||
s += pitch;
|
||||
vst4_lane_u8(s, d4Result, 4);
|
||||
s += pitch;
|
||||
vst4_lane_u8(s, d4Result, 5);
|
||||
s += pitch;
|
||||
vst4_lane_u8(s, d4Result, 6);
|
||||
s += pitch;
|
||||
vst4_lane_u8(s, d4Result, 7);
|
||||
|
||||
s = src + 1;
|
||||
vst2_lane_u8(s, d2Result, 0);
|
||||
s += pitch;
|
||||
vst2_lane_u8(s, d2Result, 1);
|
||||
s += pitch;
|
||||
vst2_lane_u8(s, d2Result, 2);
|
||||
s += pitch;
|
||||
vst2_lane_u8(s, d2Result, 3);
|
||||
s += pitch;
|
||||
vst2_lane_u8(s, d2Result, 4);
|
||||
s += pitch;
|
||||
vst2_lane_u8(s, d2Result, 5);
|
||||
s += pitch;
|
||||
vst2_lane_u8(s, d2Result, 6);
|
||||
s += pitch;
|
||||
vst2_lane_u8(s, d2Result, 7);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
|
||||
void aom_lpf_vertical_4_dual_neon(uint8_t *s, int p, const uint8_t *blimit0,
|
||||
const uint8_t *limit0, const uint8_t *thresh0,
|
||||
const uint8_t *blimit1, const uint8_t *limit1,
|
||||
const uint8_t *thresh1) {
|
||||
aom_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0);
|
||||
aom_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||
}
|
||||
|
||||
#if HAVE_NEON_ASM
|
||||
void aom_lpf_horizontal_8_dual_neon(
|
||||
uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
|
||||
const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
|
||||
const uint8_t *limit1, const uint8_t *thresh1) {
|
||||
aom_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0);
|
||||
aom_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1);
|
||||
}
|
||||
|
||||
void aom_lpf_vertical_8_dual_neon(uint8_t *s, int p, const uint8_t *blimit0,
|
||||
const uint8_t *limit0, const uint8_t *thresh0,
|
||||
const uint8_t *blimit1, const uint8_t *limit1,
|
||||
const uint8_t *thresh1) {
|
||||
aom_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0);
|
||||
aom_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||
}
|
||||
|
||||
void aom_lpf_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
|
||||
const uint8_t *limit,
|
||||
const uint8_t *thresh) {
|
||||
aom_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
|
||||
aom_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);
|
||||
}
|
||||
#endif // HAVE_NEON_ASM
|
||||
@@ -1,39 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
;
|
||||
; This source code is subject to the terms of the BSD 2 Clause License and
|
||||
; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
; was not distributed with this source code in the LICENSE file, you can
|
||||
; obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
; Media Patent License 1.0 was not distributed with this source code in the
|
||||
; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
;
|
||||
|
||||
;
|
||||
|
||||
|
||||
EXPORT |aom_push_neon|
|
||||
EXPORT |aom_pop_neon|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
|aom_push_neon| PROC
|
||||
vst1.i64 {d8, d9, d10, d11}, [r0]!
|
||||
vst1.i64 {d12, d13, d14, d15}, [r0]!
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
|aom_pop_neon| PROC
|
||||
vld1.i64 {d8, d9, d10, d11}, [r0]!
|
||||
vld1.i64 {d12, d13, d14, d15}, [r0]!
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_ports/mem.h"
|
||||
|
||||
#if HAVE_MEDIA
|
||||
static const int16_t bilinear_filters_media[8][2] = { { 128, 0 }, { 112, 16 },
|
||||
{ 96, 32 }, { 80, 48 },
|
||||
{ 64, 64 }, { 48, 80 },
|
||||
{ 32, 96 }, { 16, 112 } };
|
||||
|
||||
extern void aom_filter_block2d_bil_first_pass_media(
|
||||
const uint8_t *src_ptr, uint16_t *dst_ptr, uint32_t src_pitch,
|
||||
uint32_t height, uint32_t width, const int16_t *filter);
|
||||
|
||||
extern void aom_filter_block2d_bil_second_pass_media(
|
||||
const uint16_t *src_ptr, uint8_t *dst_ptr, int32_t src_pitch,
|
||||
uint32_t height, uint32_t width, const int16_t *filter);
|
||||
|
||||
unsigned int aom_sub_pixel_variance8x8_media(
|
||||
const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset,
|
||||
const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) {
|
||||
uint16_t first_pass[10 * 8];
|
||||
uint8_t second_pass[8 * 8];
|
||||
const int16_t *HFilter, *VFilter;
|
||||
|
||||
HFilter = bilinear_filters_media[xoffset];
|
||||
VFilter = bilinear_filters_media[yoffset];
|
||||
|
||||
aom_filter_block2d_bil_first_pass_media(src_ptr, first_pass,
|
||||
src_pixels_per_line, 9, 8, HFilter);
|
||||
aom_filter_block2d_bil_second_pass_media(first_pass, second_pass, 8, 8, 8,
|
||||
VFilter);
|
||||
|
||||
return aom_variance8x8_media(second_pass, 8, dst_ptr, dst_pixels_per_line,
|
||||
sse);
|
||||
}
|
||||
|
||||
unsigned int aom_sub_pixel_variance16x16_media(
|
||||
const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset,
|
||||
const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) {
|
||||
uint16_t first_pass[36 * 16];
|
||||
uint8_t second_pass[20 * 16];
|
||||
const int16_t *HFilter, *VFilter;
|
||||
unsigned int var;
|
||||
|
||||
if (xoffset == 4 && yoffset == 0) {
|
||||
var = aom_variance_halfpixvar16x16_h_media(
|
||||
src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
|
||||
} else if (xoffset == 0 && yoffset == 4) {
|
||||
var = aom_variance_halfpixvar16x16_v_media(
|
||||
src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
|
||||
} else if (xoffset == 4 && yoffset == 4) {
|
||||
var = aom_variance_halfpixvar16x16_hv_media(
|
||||
src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
|
||||
} else {
|
||||
HFilter = bilinear_filters_media[xoffset];
|
||||
VFilter = bilinear_filters_media[yoffset];
|
||||
|
||||
aom_filter_block2d_bil_first_pass_media(
|
||||
src_ptr, first_pass, src_pixels_per_line, 17, 16, HFilter);
|
||||
aom_filter_block2d_bil_second_pass_media(first_pass, second_pass, 16, 16,
|
||||
16, VFilter);
|
||||
|
||||
var = aom_variance16x16_media(second_pass, 16, dst_ptr, dst_pixels_per_line,
|
||||
sse);
|
||||
}
|
||||
return var;
|
||||
}
|
||||
#endif // HAVE_MEDIA
|
||||
@@ -1,80 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
|
||||
void aom_subtract_block_neon(int rows, int cols, int16_t *diff,
|
||||
ptrdiff_t diff_stride, const uint8_t *src,
|
||||
ptrdiff_t src_stride, const uint8_t *pred,
|
||||
ptrdiff_t pred_stride) {
|
||||
int r, c;
|
||||
|
||||
if (cols > 16) {
|
||||
for (r = 0; r < rows; ++r) {
|
||||
for (c = 0; c < cols; c += 32) {
|
||||
const uint8x16_t v_src_00 = vld1q_u8(&src[c + 0]);
|
||||
const uint8x16_t v_src_16 = vld1q_u8(&src[c + 16]);
|
||||
const uint8x16_t v_pred_00 = vld1q_u8(&pred[c + 0]);
|
||||
const uint8x16_t v_pred_16 = vld1q_u8(&pred[c + 16]);
|
||||
const uint16x8_t v_diff_lo_00 =
|
||||
vsubl_u8(vget_low_u8(v_src_00), vget_low_u8(v_pred_00));
|
||||
const uint16x8_t v_diff_hi_00 =
|
||||
vsubl_u8(vget_high_u8(v_src_00), vget_high_u8(v_pred_00));
|
||||
const uint16x8_t v_diff_lo_16 =
|
||||
vsubl_u8(vget_low_u8(v_src_16), vget_low_u8(v_pred_16));
|
||||
const uint16x8_t v_diff_hi_16 =
|
||||
vsubl_u8(vget_high_u8(v_src_16), vget_high_u8(v_pred_16));
|
||||
vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(v_diff_lo_00));
|
||||
vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(v_diff_hi_00));
|
||||
vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(v_diff_lo_16));
|
||||
vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(v_diff_hi_16));
|
||||
}
|
||||
diff += diff_stride;
|
||||
pred += pred_stride;
|
||||
src += src_stride;
|
||||
}
|
||||
} else if (cols > 8) {
|
||||
for (r = 0; r < rows; ++r) {
|
||||
const uint8x16_t v_src = vld1q_u8(&src[0]);
|
||||
const uint8x16_t v_pred = vld1q_u8(&pred[0]);
|
||||
const uint16x8_t v_diff_lo =
|
||||
vsubl_u8(vget_low_u8(v_src), vget_low_u8(v_pred));
|
||||
const uint16x8_t v_diff_hi =
|
||||
vsubl_u8(vget_high_u8(v_src), vget_high_u8(v_pred));
|
||||
vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff_lo));
|
||||
vst1q_s16(&diff[8], vreinterpretq_s16_u16(v_diff_hi));
|
||||
diff += diff_stride;
|
||||
pred += pred_stride;
|
||||
src += src_stride;
|
||||
}
|
||||
} else if (cols > 4) {
|
||||
for (r = 0; r < rows; ++r) {
|
||||
const uint8x8_t v_src = vld1_u8(&src[0]);
|
||||
const uint8x8_t v_pred = vld1_u8(&pred[0]);
|
||||
const uint16x8_t v_diff = vsubl_u8(v_src, v_pred);
|
||||
vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff));
|
||||
diff += diff_stride;
|
||||
pred += pred_stride;
|
||||
src += src_stride;
|
||||
}
|
||||
} else {
|
||||
for (r = 0; r < rows; ++r) {
|
||||
for (c = 0; c < cols; ++c) diff[c] = src[c] - pred[c];
|
||||
|
||||
diff += diff_stride;
|
||||
pred += pred_stride;
|
||||
src += src_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,400 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
#include "./aom_config.h"
|
||||
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_ports/mem.h"
|
||||
|
||||
static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
|
||||
const int32x4_t a = vpaddlq_s16(v_16x8);
|
||||
const int64x2_t b = vpaddlq_s32(a);
|
||||
const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
|
||||
vreinterpret_s32_s64(vget_high_s64(b)));
|
||||
return vget_lane_s32(c, 0);
|
||||
}
|
||||
|
||||
static INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) {
|
||||
const int64x2_t b = vpaddlq_s32(v_32x4);
|
||||
const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
|
||||
vreinterpret_s32_s64(vget_high_s64(b)));
|
||||
return vget_lane_s32(c, 0);
|
||||
}
|
||||
|
||||
// w * h must be less than 2048 or local variable v_sum may overflow.
|
||||
static void variance_neon_w8(const uint8_t *a, int a_stride, const uint8_t *b,
|
||||
int b_stride, int w, int h, uint32_t *sse,
|
||||
int *sum) {
|
||||
int i, j;
|
||||
int16x8_t v_sum = vdupq_n_s16(0);
|
||||
int32x4_t v_sse_lo = vdupq_n_s32(0);
|
||||
int32x4_t v_sse_hi = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < h; ++i) {
|
||||
for (j = 0; j < w; j += 8) {
|
||||
const uint8x8_t v_a = vld1_u8(&a[j]);
|
||||
const uint8x8_t v_b = vld1_u8(&b[j]);
|
||||
const uint16x8_t v_diff = vsubl_u8(v_a, v_b);
|
||||
const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff);
|
||||
v_sum = vaddq_s16(v_sum, sv_diff);
|
||||
v_sse_lo =
|
||||
vmlal_s16(v_sse_lo, vget_low_s16(sv_diff), vget_low_s16(sv_diff));
|
||||
v_sse_hi =
|
||||
vmlal_s16(v_sse_hi, vget_high_s16(sv_diff), vget_high_s16(sv_diff));
|
||||
}
|
||||
a += a_stride;
|
||||
b += b_stride;
|
||||
}
|
||||
|
||||
*sum = horizontal_add_s16x8(v_sum);
|
||||
*sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi));
|
||||
}
|
||||
|
||||
void aom_get8x8var_neon(const uint8_t *a, int a_stride, const uint8_t *b,
|
||||
int b_stride, unsigned int *sse, int *sum) {
|
||||
variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, sum);
|
||||
}
|
||||
|
||||
void aom_get16x16var_neon(const uint8_t *a, int a_stride, const uint8_t *b,
|
||||
int b_stride, unsigned int *sse, int *sum) {
|
||||
variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, sum);
|
||||
}
|
||||
|
||||
unsigned int aom_variance8x8_neon(const uint8_t *a, int a_stride,
|
||||
const uint8_t *b, int b_stride,
|
||||
unsigned int *sse) {
|
||||
int sum;
|
||||
variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
|
||||
return *sse - (((int64_t)sum * sum) >> 6); // >> 6 = / 8 * 8
|
||||
}
|
||||
|
||||
unsigned int aom_variance16x16_neon(const uint8_t *a, int a_stride,
|
||||
const uint8_t *b, int b_stride,
|
||||
unsigned int *sse) {
|
||||
int sum;
|
||||
variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, &sum);
|
||||
return *sse - (((int64_t)sum * sum) >> 8); // >> 8 = / 16 * 16
|
||||
}
|
||||
|
||||
unsigned int aom_variance32x32_neon(const uint8_t *a, int a_stride,
|
||||
const uint8_t *b, int b_stride,
|
||||
unsigned int *sse) {
|
||||
int sum;
|
||||
variance_neon_w8(a, a_stride, b, b_stride, 32, 32, sse, &sum);
|
||||
return *sse - (((int64_t)sum * sum) >> 10); // >> 10 = / 32 * 32
|
||||
}
|
||||
|
||||
unsigned int aom_variance32x64_neon(const uint8_t *a, int a_stride,
|
||||
const uint8_t *b, int b_stride,
|
||||
unsigned int *sse) {
|
||||
int sum1, sum2;
|
||||
uint32_t sse1, sse2;
|
||||
variance_neon_w8(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1);
|
||||
variance_neon_w8(a + (32 * a_stride), a_stride, b + (32 * b_stride), b_stride,
|
||||
32, 32, &sse2, &sum2);
|
||||
*sse = sse1 + sse2;
|
||||
sum1 += sum2;
|
||||
return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64
|
||||
}
|
||||
|
||||
unsigned int aom_variance64x32_neon(const uint8_t *a, int a_stride,
|
||||
const uint8_t *b, int b_stride,
|
||||
unsigned int *sse) {
|
||||
int sum1, sum2;
|
||||
uint32_t sse1, sse2;
|
||||
variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
|
||||
variance_neon_w8(a + (16 * a_stride), a_stride, b + (16 * b_stride), b_stride,
|
||||
64, 16, &sse2, &sum2);
|
||||
*sse = sse1 + sse2;
|
||||
sum1 += sum2;
|
||||
return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64
|
||||
}
|
||||
|
||||
unsigned int aom_variance64x64_neon(const uint8_t *a, int a_stride,
|
||||
const uint8_t *b, int b_stride,
|
||||
unsigned int *sse) {
|
||||
int sum1, sum2;
|
||||
uint32_t sse1, sse2;
|
||||
|
||||
variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
|
||||
variance_neon_w8(a + (16 * a_stride), a_stride, b + (16 * b_stride), b_stride,
|
||||
64, 16, &sse2, &sum2);
|
||||
sse1 += sse2;
|
||||
sum1 += sum2;
|
||||
|
||||
variance_neon_w8(a + (16 * 2 * a_stride), a_stride, b + (16 * 2 * b_stride),
|
||||
b_stride, 64, 16, &sse2, &sum2);
|
||||
sse1 += sse2;
|
||||
sum1 += sum2;
|
||||
|
||||
variance_neon_w8(a + (16 * 3 * a_stride), a_stride, b + (16 * 3 * b_stride),
|
||||
b_stride, 64, 16, &sse2, &sum2);
|
||||
*sse = sse1 + sse2;
|
||||
sum1 += sum2;
|
||||
return *sse - (((int64_t)sum1 * sum1) >> 12); // >> 12 = / 64 * 64
|
||||
}
|
||||
|
||||
unsigned int aom_variance16x8_neon(const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride, unsigned int *sse) {
|
||||
int i;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
|
||||
uint32x2_t d0u32, d10u32;
|
||||
int64x1_t d0s64, d1s64;
|
||||
uint8x16_t q0u8, q1u8, q2u8, q3u8;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int32x4_t q8s32, q9s32, q10s32;
|
||||
int64x2_t q0s64, q1s64, q5s64;
|
||||
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
q0u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
q1u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
__builtin_prefetch(src_ptr);
|
||||
|
||||
q2u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
q3u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
__builtin_prefetch(ref_ptr);
|
||||
|
||||
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
|
||||
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
|
||||
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
|
||||
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
|
||||
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
|
||||
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
|
||||
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
|
||||
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
|
||||
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
|
||||
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
|
||||
|
||||
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
|
||||
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
|
||||
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
|
||||
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
|
||||
}
|
||||
|
||||
q10s32 = vaddq_s32(q10s32, q9s32);
|
||||
q0s64 = vpaddlq_s32(q8s32);
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
|
||||
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
|
||||
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), vreinterpret_s32_s64(d0s64));
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
|
||||
|
||||
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
|
||||
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
|
||||
|
||||
return vget_lane_u32(d0u32, 0);
|
||||
}
|
||||
|
||||
unsigned int aom_variance8x16_neon(const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride, unsigned int *sse) {
|
||||
int i;
|
||||
uint8x8_t d0u8, d2u8, d4u8, d6u8;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16;
|
||||
uint32x2_t d0u32, d10u32;
|
||||
int64x1_t d0s64, d1s64;
|
||||
uint16x8_t q11u16, q12u16;
|
||||
int32x4_t q8s32, q9s32, q10s32;
|
||||
int64x2_t q0s64, q1s64, q5s64;
|
||||
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
d0u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
__builtin_prefetch(src_ptr);
|
||||
|
||||
d4u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d6u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
__builtin_prefetch(ref_ptr);
|
||||
|
||||
q11u16 = vsubl_u8(d0u8, d4u8);
|
||||
q12u16 = vsubl_u8(d2u8, d6u8);
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
|
||||
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
|
||||
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
}
|
||||
|
||||
q10s32 = vaddq_s32(q10s32, q9s32);
|
||||
q0s64 = vpaddlq_s32(q8s32);
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
|
||||
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
|
||||
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), vreinterpret_s32_s64(d0s64));
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
|
||||
|
||||
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
|
||||
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
|
||||
|
||||
return vget_lane_u32(d0u32, 0);
|
||||
}
|
||||
|
||||
unsigned int aom_mse16x16_neon(const unsigned char *src_ptr, int source_stride,
|
||||
const unsigned char *ref_ptr, int recon_stride,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
|
||||
int64x1_t d0s64;
|
||||
uint8x16_t q0u8, q1u8, q2u8, q3u8;
|
||||
int32x4_t q7s32, q8s32, q9s32, q10s32;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int64x2_t q1s64;
|
||||
|
||||
q7s32 = vdupq_n_s32(0);
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 8; i++) { // mse16x16_neon_loop
|
||||
q0u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
q1u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
q2u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
q3u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
|
||||
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
|
||||
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
|
||||
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
|
||||
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
|
||||
q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
|
||||
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
|
||||
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
|
||||
q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
|
||||
q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
|
||||
|
||||
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
|
||||
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
|
||||
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
|
||||
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
|
||||
}
|
||||
|
||||
q7s32 = vaddq_s32(q7s32, q8s32);
|
||||
q9s32 = vaddq_s32(q9s32, q10s32);
|
||||
q10s32 = vaddq_s32(q7s32, q9s32);
|
||||
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
|
||||
return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
|
||||
}
|
||||
|
||||
unsigned int aom_get4x4sse_cs_neon(const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride) {
|
||||
int16x4_t d22s16, d24s16, d26s16, d28s16;
|
||||
int64x1_t d0s64;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
|
||||
int32x4_t q7s32, q8s32, q9s32, q10s32;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int64x2_t q1s64;
|
||||
|
||||
d0u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d4u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d1u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d5u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d6u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d3u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d7u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
|
||||
q11u16 = vsubl_u8(d0u8, d4u8);
|
||||
q12u16 = vsubl_u8(d1u8, d5u8);
|
||||
q13u16 = vsubl_u8(d2u8, d6u8);
|
||||
q14u16 = vsubl_u8(d3u8, d7u8);
|
||||
|
||||
d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
|
||||
d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
|
||||
d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
|
||||
d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
|
||||
|
||||
q7s32 = vmull_s16(d22s16, d22s16);
|
||||
q8s32 = vmull_s16(d24s16, d24s16);
|
||||
q9s32 = vmull_s16(d26s16, d26s16);
|
||||
q10s32 = vmull_s16(d28s16, d28s16);
|
||||
|
||||
q7s32 = vaddq_s32(q7s32, q8s32);
|
||||
q9s32 = vaddq_s32(q9s32, q10s32);
|
||||
q9s32 = vaddq_s32(q7s32, q9s32);
|
||||
|
||||
q1s64 = vpaddlq_s32(q9s32);
|
||||
d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
|
||||
}
|
||||
@@ -1,240 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_BITREADER_H_
|
||||
#define AOM_DSP_BITREADER_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "./aom_config.h"
|
||||
#if CONFIG_EC_ADAPT && !CONFIG_EC_MULTISYMBOL
|
||||
#error "CONFIG_EC_ADAPT is enabled without enabling CONFIG_EC_MULTISYMBOL."
|
||||
#endif
|
||||
|
||||
#include "aom/aomdx.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#if CONFIG_ANS
|
||||
#include "aom_dsp/ansreader.h"
|
||||
#elif CONFIG_DAALA_EC
|
||||
#include "aom_dsp/daalaboolreader.h"
|
||||
#else
|
||||
#include "aom_dsp/dkboolreader.h"
|
||||
#endif
|
||||
#include "aom_dsp/prob.h"
|
||||
#include "av1/common/odintrin.h"
|
||||
|
||||
#if CONFIG_ACCOUNTING
|
||||
#include "av1/common/accounting.h"
|
||||
#define ACCT_STR_NAME acct_str
|
||||
#define ACCT_STR_PARAM , const char *ACCT_STR_NAME
|
||||
#define ACCT_STR_ARG(s) , s
|
||||
#else
|
||||
#define ACCT_STR_PARAM
|
||||
#define ACCT_STR_ARG(s)
|
||||
#endif
|
||||
|
||||
#define aom_read(r, prob, ACCT_STR_NAME) \
|
||||
aom_read_(r, prob ACCT_STR_ARG(ACCT_STR_NAME))
|
||||
#define aom_read_bit(r, ACCT_STR_NAME) \
|
||||
aom_read_bit_(r ACCT_STR_ARG(ACCT_STR_NAME))
|
||||
#define aom_read_tree(r, tree, probs, ACCT_STR_NAME) \
|
||||
aom_read_tree_(r, tree, probs ACCT_STR_ARG(ACCT_STR_NAME))
|
||||
#define aom_read_literal(r, bits, ACCT_STR_NAME) \
|
||||
aom_read_literal_(r, bits ACCT_STR_ARG(ACCT_STR_NAME))
|
||||
#define aom_read_tree_bits(r, tree, probs, ACCT_STR_NAME) \
|
||||
aom_read_tree_bits_(r, tree, probs ACCT_STR_ARG(ACCT_STR_NAME))
|
||||
#define aom_read_symbol(r, cdf, nsymbs, ACCT_STR_NAME) \
|
||||
aom_read_symbol_(r, cdf, nsymbs ACCT_STR_ARG(ACCT_STR_NAME))
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if CONFIG_ANS
|
||||
typedef struct AnsDecoder aom_reader;
|
||||
#elif CONFIG_DAALA_EC
|
||||
typedef struct daala_reader aom_reader;
|
||||
#else
|
||||
typedef struct aom_dk_reader aom_reader;
|
||||
#endif
|
||||
|
||||
static INLINE int aom_reader_init(aom_reader *r, const uint8_t *buffer,
|
||||
size_t size, aom_decrypt_cb decrypt_cb,
|
||||
void *decrypt_state) {
|
||||
#if CONFIG_ANS
|
||||
(void)decrypt_cb;
|
||||
(void)decrypt_state;
|
||||
assert(size <= INT_MAX);
|
||||
return ans_read_init(r, buffer, size);
|
||||
#elif CONFIG_DAALA_EC
|
||||
(void)decrypt_cb;
|
||||
(void)decrypt_state;
|
||||
return aom_daala_reader_init(r, buffer, size);
|
||||
#else
|
||||
return aom_dk_reader_init(r, buffer, size, decrypt_cb, decrypt_state);
|
||||
#endif
|
||||
}
|
||||
|
||||
static INLINE const uint8_t *aom_reader_find_end(aom_reader *r) {
|
||||
#if CONFIG_ANS
|
||||
(void)r;
|
||||
assert(0 && "Use the raw buffer size with ANS");
|
||||
return NULL;
|
||||
#elif CONFIG_DAALA_EC
|
||||
return aom_daala_reader_find_end(r);
|
||||
#else
|
||||
return aom_dk_reader_find_end(r);
|
||||
#endif
|
||||
}
|
||||
|
||||
static INLINE int aom_reader_has_error(aom_reader *r) {
|
||||
#if CONFIG_ANS
|
||||
return ans_reader_has_error(r);
|
||||
#elif CONFIG_DAALA_EC
|
||||
return aom_daala_reader_has_error(r);
|
||||
#else
|
||||
return aom_dk_reader_has_error(r);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Returns the position in the bit reader in bits.
|
||||
static INLINE uint32_t aom_reader_tell(const aom_reader *r) {
|
||||
#if CONFIG_ANS
|
||||
(void)r;
|
||||
assert(0 && "aom_reader_tell() is unimplemented for ANS");
|
||||
return 0;
|
||||
#elif CONFIG_DAALA_EC
|
||||
return aom_daala_reader_tell(r);
|
||||
#else
|
||||
return aom_dk_reader_tell(r);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Returns the position in the bit reader in 1/8th bits.
|
||||
static INLINE uint32_t aom_reader_tell_frac(const aom_reader *r) {
|
||||
#if CONFIG_ANS
|
||||
(void)r;
|
||||
assert(0 && "aom_reader_tell_frac() is unimplemented for ANS");
|
||||
return 0;
|
||||
#elif CONFIG_DAALA_EC
|
||||
return aom_daala_reader_tell_frac(r);
|
||||
#else
|
||||
return aom_dk_reader_tell_frac(r);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CONFIG_ACCOUNTING
|
||||
static INLINE void aom_process_accounting(const aom_reader *r ACCT_STR_PARAM) {
|
||||
if (r->accounting != NULL) {
|
||||
uint32_t tell_frac;
|
||||
tell_frac = aom_reader_tell_frac(r);
|
||||
aom_accounting_record(r->accounting, ACCT_STR_NAME,
|
||||
tell_frac - r->accounting->last_tell_frac);
|
||||
r->accounting->last_tell_frac = tell_frac;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static INLINE int aom_read_(aom_reader *r, int prob ACCT_STR_PARAM) {
|
||||
int ret;
|
||||
#if CONFIG_ANS
|
||||
ret = uabs_read(r, prob);
|
||||
#elif CONFIG_DAALA_EC
|
||||
ret = aom_daala_read(r, prob);
|
||||
#else
|
||||
ret = aom_dk_read(r, prob);
|
||||
#endif
|
||||
#if CONFIG_ACCOUNTING
|
||||
if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
static INLINE int aom_read_bit_(aom_reader *r ACCT_STR_PARAM) {
|
||||
int ret;
|
||||
#if CONFIG_ANS
|
||||
ret = uabs_read_bit(r); // Non trivial optimization at half probability
|
||||
#else
|
||||
ret = aom_read(r, 128, NULL); // aom_prob_half
|
||||
#endif
|
||||
#if CONFIG_ACCOUNTING
|
||||
if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
static INLINE int aom_read_literal_(aom_reader *r, int bits ACCT_STR_PARAM) {
|
||||
int literal = 0, bit;
|
||||
|
||||
for (bit = bits - 1; bit >= 0; bit--) literal |= aom_read_bit(r, NULL) << bit;
|
||||
#if CONFIG_ACCOUNTING
|
||||
if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
|
||||
#endif
|
||||
return literal;
|
||||
}
|
||||
|
||||
static INLINE int aom_read_tree_bits_(aom_reader *r, const aom_tree_index *tree,
|
||||
const aom_prob *probs ACCT_STR_PARAM) {
|
||||
aom_tree_index i = 0;
|
||||
|
||||
while ((i = tree[i + aom_read(r, probs[i >> 1], NULL)]) > 0) continue;
|
||||
#if CONFIG_ACCOUNTING
|
||||
if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
|
||||
#endif
|
||||
return -i;
|
||||
}
|
||||
|
||||
static INLINE int aom_read_tree_(aom_reader *r, const aom_tree_index *tree,
|
||||
const aom_prob *probs ACCT_STR_PARAM) {
|
||||
int ret;
|
||||
#if CONFIG_DAALA_EC
|
||||
ret = daala_read_tree_bits(r, tree, probs);
|
||||
#else
|
||||
ret = aom_read_tree_bits(r, tree, probs, NULL);
|
||||
#endif
|
||||
#if CONFIG_ACCOUNTING
|
||||
if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if CONFIG_EC_MULTISYMBOL
|
||||
static INLINE int aom_read_symbol_(aom_reader *r, aom_cdf_prob *cdf,
|
||||
int nsymbs ACCT_STR_PARAM) {
|
||||
int ret;
|
||||
#if CONFIG_RANS
|
||||
(void)nsymbs;
|
||||
ret = rans_read(r, cdf);
|
||||
#elif CONFIG_DAALA_EC
|
||||
ret = daala_read_symbol(r, cdf, nsymbs);
|
||||
#else
|
||||
#error \
|
||||
"CONFIG_EC_MULTISYMBOL is selected without a valid backing entropy " \
|
||||
"coder. Enable daala_ec or ans for a valid configuration."
|
||||
#endif
|
||||
|
||||
#if CONFIG_EC_ADAPT
|
||||
update_cdf(cdf, ret, nsymbs);
|
||||
#endif
|
||||
|
||||
#if CONFIG_ACCOUNTING
|
||||
if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
#endif // CONFIG_EC_MULTISYMBOL
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_BITREADER_H_
|
||||
@@ -1,47 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
#include "./aom_config.h"
|
||||
#include "./bitreader_buffer.h"
|
||||
|
||||
size_t aom_rb_bytes_read(struct aom_read_bit_buffer *rb) {
|
||||
return (rb->bit_offset + 7) >> 3;
|
||||
}
|
||||
|
||||
int aom_rb_read_bit(struct aom_read_bit_buffer *rb) {
|
||||
const size_t off = rb->bit_offset;
|
||||
const size_t p = off >> 3;
|
||||
const int q = 7 - (int)(off & 0x7);
|
||||
if (rb->bit_buffer + p < rb->bit_buffer_end) {
|
||||
const int bit = (rb->bit_buffer[p] >> q) & 1;
|
||||
rb->bit_offset = off + 1;
|
||||
return bit;
|
||||
} else {
|
||||
rb->error_handler(rb->error_handler_data);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int aom_rb_read_literal(struct aom_read_bit_buffer *rb, int bits) {
|
||||
int value = 0, bit;
|
||||
for (bit = bits - 1; bit >= 0; bit--) value |= aom_rb_read_bit(rb) << bit;
|
||||
return value;
|
||||
}
|
||||
|
||||
int aom_rb_read_signed_literal(struct aom_read_bit_buffer *rb, int bits) {
|
||||
const int value = aom_rb_read_literal(rb, bits);
|
||||
return aom_rb_read_bit(rb) ? -value : value;
|
||||
}
|
||||
|
||||
int aom_rb_read_inv_signed_literal(struct aom_read_bit_buffer *rb, int bits) {
|
||||
const int nbits = sizeof(unsigned) * 8 - bits - 1;
|
||||
const unsigned value = (unsigned)aom_rb_read_literal(rb, bits + 1) << nbits;
|
||||
return ((int)value) >> nbits;
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_BITREADER_BUFFER_H_
|
||||
#define AOM_DSP_BITREADER_BUFFER_H_
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "aom/aom_integer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void (*aom_rb_error_handler)(void *data);
|
||||
|
||||
struct aom_read_bit_buffer {
|
||||
const uint8_t *bit_buffer;
|
||||
const uint8_t *bit_buffer_end;
|
||||
size_t bit_offset;
|
||||
|
||||
void *error_handler_data;
|
||||
aom_rb_error_handler error_handler;
|
||||
};
|
||||
|
||||
size_t aom_rb_bytes_read(struct aom_read_bit_buffer *rb);
|
||||
|
||||
int aom_rb_read_bit(struct aom_read_bit_buffer *rb);
|
||||
|
||||
int aom_rb_read_literal(struct aom_read_bit_buffer *rb, int bits);
|
||||
|
||||
int aom_rb_read_signed_literal(struct aom_read_bit_buffer *rb, int bits);
|
||||
|
||||
int aom_rb_read_inv_signed_literal(struct aom_read_bit_buffer *rb, int bits);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_BITREADER_BUFFER_H_
|
||||
@@ -1,179 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_BITWRITER_H_
|
||||
#define AOM_DSP_BITWRITER_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include "./aom_config.h"
|
||||
#if CONFIG_EC_ADAPT && !CONFIG_EC_MULTISYMBOL
|
||||
#error "CONFIG_EC_ADAPT is enabled without enabling CONFIG_EC_MULTISYMBOL"
|
||||
#endif
|
||||
|
||||
#if CONFIG_ANS
|
||||
#include "aom_dsp/buf_ans.h"
|
||||
#elif CONFIG_DAALA_EC
|
||||
#include "aom_dsp/daalaboolwriter.h"
|
||||
#else
|
||||
#include "aom_dsp/dkboolwriter.h"
|
||||
#endif
|
||||
#include "aom_dsp/prob.h"
|
||||
|
||||
#if CONFIG_RD_DEBUG
|
||||
#include "av1/encoder/cost.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if CONFIG_ANS
|
||||
typedef struct BufAnsCoder aom_writer;
|
||||
#elif CONFIG_DAALA_EC
|
||||
typedef struct daala_writer aom_writer;
|
||||
#else
|
||||
typedef struct aom_dk_writer aom_writer;
|
||||
#endif
|
||||
|
||||
typedef struct TOKEN_STATS { int64_t cost; } TOKEN_STATS;
|
||||
|
||||
static INLINE void aom_start_encode(aom_writer *bc, uint8_t *buffer) {
|
||||
#if CONFIG_ANS
|
||||
(void)bc;
|
||||
(void)buffer;
|
||||
assert(0 && "buf_ans requires a more complicated startup procedure");
|
||||
#elif CONFIG_DAALA_EC
|
||||
aom_daala_start_encode(bc, buffer);
|
||||
#else
|
||||
aom_dk_start_encode(bc, buffer);
|
||||
#endif
|
||||
}
|
||||
|
||||
static INLINE void aom_stop_encode(aom_writer *bc) {
|
||||
#if CONFIG_ANS
|
||||
(void)bc;
|
||||
assert(0 && "buf_ans requires a more complicated shutdown procedure");
|
||||
#elif CONFIG_DAALA_EC
|
||||
aom_daala_stop_encode(bc);
|
||||
#else
|
||||
aom_dk_stop_encode(bc);
|
||||
#endif
|
||||
}
|
||||
|
||||
static INLINE void aom_write(aom_writer *br, int bit, int probability) {
|
||||
#if CONFIG_ANS
|
||||
buf_uabs_write(br, bit, probability);
|
||||
#elif CONFIG_DAALA_EC
|
||||
aom_daala_write(br, bit, probability);
|
||||
#else
|
||||
aom_dk_write(br, bit, probability);
|
||||
#endif
|
||||
}
|
||||
|
||||
static INLINE void aom_write_record(aom_writer *br, int bit, int probability,
|
||||
TOKEN_STATS *token_stats) {
|
||||
aom_write(br, bit, probability);
|
||||
#if CONFIG_RD_DEBUG
|
||||
token_stats->cost += av1_cost_bit(probability, bit);
|
||||
#else
|
||||
(void)token_stats;
|
||||
#endif
|
||||
}
|
||||
|
||||
static INLINE void aom_write_bit(aom_writer *w, int bit) {
|
||||
aom_write(w, bit, 128); // aom_prob_half
|
||||
}
|
||||
|
||||
static INLINE void aom_write_bit_record(aom_writer *w, int bit,
|
||||
TOKEN_STATS *token_stats) {
|
||||
aom_write_record(w, bit, 128, token_stats); // aom_prob_half
|
||||
}
|
||||
|
||||
static INLINE void aom_write_literal(aom_writer *w, int data, int bits) {
|
||||
int bit;
|
||||
|
||||
for (bit = bits - 1; bit >= 0; bit--) aom_write_bit(w, 1 & (data >> bit));
|
||||
}
|
||||
|
||||
static INLINE void aom_write_tree_bits(aom_writer *w, const aom_tree_index *tr,
|
||||
const aom_prob *probs, int bits, int len,
|
||||
aom_tree_index i) {
|
||||
do {
|
||||
const int bit = (bits >> --len) & 1;
|
||||
aom_write(w, bit, probs[i >> 1]);
|
||||
i = tr[i + bit];
|
||||
} while (len);
|
||||
}
|
||||
|
||||
static INLINE void aom_write_tree_bits_record(aom_writer *w,
|
||||
const aom_tree_index *tr,
|
||||
const aom_prob *probs, int bits,
|
||||
int len, aom_tree_index i,
|
||||
TOKEN_STATS *token_stats) {
|
||||
do {
|
||||
const int bit = (bits >> --len) & 1;
|
||||
aom_write_record(w, bit, probs[i >> 1], token_stats);
|
||||
i = tr[i + bit];
|
||||
} while (len);
|
||||
}
|
||||
|
||||
static INLINE void aom_write_tree(aom_writer *w, const aom_tree_index *tree,
|
||||
const aom_prob *probs, int bits, int len,
|
||||
aom_tree_index i) {
|
||||
#if CONFIG_DAALA_EC
|
||||
daala_write_tree_bits(w, tree, probs, bits, len, i);
|
||||
#else
|
||||
aom_write_tree_bits(w, tree, probs, bits, len, i);
|
||||
#endif
|
||||
}
|
||||
|
||||
static INLINE void aom_write_tree_record(aom_writer *w,
|
||||
const aom_tree_index *tree,
|
||||
const aom_prob *probs, int bits,
|
||||
int len, aom_tree_index i,
|
||||
TOKEN_STATS *token_stats) {
|
||||
#if CONFIG_DAALA_EC
|
||||
(void)token_stats;
|
||||
daala_write_tree_bits(w, tree, probs, bits, len, i);
|
||||
#else
|
||||
aom_write_tree_bits_record(w, tree, probs, bits, len, i, token_stats);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CONFIG_EC_MULTISYMBOL
|
||||
static INLINE void aom_write_symbol(aom_writer *w, int symb, aom_cdf_prob *cdf,
|
||||
int nsymbs) {
|
||||
#if CONFIG_RANS
|
||||
struct rans_sym s;
|
||||
(void)nsymbs;
|
||||
assert(cdf);
|
||||
s.cum_prob = symb > 0 ? cdf[symb - 1] : 0;
|
||||
s.prob = cdf[symb] - s.cum_prob;
|
||||
buf_rans_write(w, &s);
|
||||
#elif CONFIG_DAALA_EC
|
||||
daala_write_symbol(w, symb, cdf, nsymbs);
|
||||
#else
|
||||
#error \
|
||||
"CONFIG_EC_MULTISYMBOL is selected without a valid backing entropy " \
|
||||
"coder. Enable daala_ec or ans for a valid configuration."
|
||||
#endif
|
||||
|
||||
#if CONFIG_EC_ADAPT
|
||||
update_cdf(cdf, symb, nsymbs);
|
||||
#endif
|
||||
}
|
||||
#endif // CONFIG_EC_MULTISYMBOL
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_BITWRITER_H_
|
||||
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "./bitwriter_buffer.h"
|
||||
|
||||
size_t aom_wb_bytes_written(const struct aom_write_bit_buffer *wb) {
|
||||
return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0);
|
||||
}
|
||||
|
||||
void aom_wb_write_bit(struct aom_write_bit_buffer *wb, int bit) {
|
||||
const int off = (int)wb->bit_offset;
|
||||
const int p = off / CHAR_BIT;
|
||||
const int q = CHAR_BIT - 1 - off % CHAR_BIT;
|
||||
if (q == CHAR_BIT - 1) {
|
||||
wb->bit_buffer[p] = bit << q;
|
||||
} else {
|
||||
wb->bit_buffer[p] &= ~(1 << q);
|
||||
wb->bit_buffer[p] |= bit << q;
|
||||
}
|
||||
wb->bit_offset = off + 1;
|
||||
}
|
||||
|
||||
void aom_wb_write_literal(struct aom_write_bit_buffer *wb, int data, int bits) {
|
||||
int bit;
|
||||
for (bit = bits - 1; bit >= 0; bit--) aom_wb_write_bit(wb, (data >> bit) & 1);
|
||||
}
|
||||
|
||||
void aom_wb_write_inv_signed_literal(struct aom_write_bit_buffer *wb, int data,
|
||||
int bits) {
|
||||
aom_wb_write_literal(wb, data, bits + 1);
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_BITWRITER_BUFFER_H_
|
||||
#define AOM_DSP_BITWRITER_BUFFER_H_
|
||||
|
||||
#include "aom/aom_integer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct aom_write_bit_buffer {
|
||||
uint8_t *bit_buffer;
|
||||
size_t bit_offset;
|
||||
};
|
||||
|
||||
size_t aom_wb_bytes_written(const struct aom_write_bit_buffer *wb);
|
||||
|
||||
void aom_wb_write_bit(struct aom_write_bit_buffer *wb, int bit);
|
||||
|
||||
void aom_wb_write_literal(struct aom_write_bit_buffer *wb, int data, int bits);
|
||||
|
||||
void aom_wb_write_inv_signed_literal(struct aom_write_bit_buffer *wb, int data,
|
||||
int bits);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_BITWRITER_BUFFER_H_
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_BLEND_H_
|
||||
#define AOM_DSP_BLEND_H_
|
||||
|
||||
#include "aom_ports/mem.h"
|
||||
|
||||
// Various blending functions and macros.
|
||||
// See also the aom_blend_* functions in aom_dsp_rtcd.h
|
||||
|
||||
// Alpha blending with alpha values from the range [0, 64], where 64
|
||||
// means use the first input and 0 means use the second input.
|
||||
|
||||
#define AOM_BLEND_A64_ROUND_BITS 6
|
||||
#define AOM_BLEND_A64_MAX_ALPHA (1 << AOM_BLEND_A64_ROUND_BITS) // 64
|
||||
|
||||
#define AOM_BLEND_A64(a, v0, v1) \
|
||||
ROUND_POWER_OF_TWO((a) * (v0) + (AOM_BLEND_A64_MAX_ALPHA - (a)) * (v1), \
|
||||
AOM_BLEND_A64_ROUND_BITS)
|
||||
|
||||
// Alpha blending with alpha values from the range [0, 256], where 256
|
||||
// means use the first input and 0 means use the second input.
|
||||
#define AOM_BLEND_A256_ROUND_BITS 8
|
||||
#define AOM_BLEND_A256_MAX_ALPHA (1 << AOM_BLEND_A256_ROUND_BITS) // 256
|
||||
|
||||
#define AOM_BLEND_A256(a, v0, v1) \
|
||||
ROUND_POWER_OF_TWO((a) * (v0) + (AOM_BLEND_A256_MAX_ALPHA - (a)) * (v1), \
|
||||
AOM_BLEND_A256_ROUND_BITS)
|
||||
|
||||
// Blending by averaging.
|
||||
#define AOM_BLEND_AVG(v0, v1) ROUND_POWER_OF_TWO((v0) + (v1), 1)
|
||||
|
||||
#endif // AOM_DSP_BLEND_H_
|
||||
@@ -1,71 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_ports/mem.h"
|
||||
#include "aom_dsp/aom_dsp_common.h"
|
||||
#include "aom_dsp/blend.h"
|
||||
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
|
||||
void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride,
|
||||
const uint8_t *src0, uint32_t src0_stride,
|
||||
const uint8_t *src1, uint32_t src1_stride,
|
||||
const uint8_t *mask, int h, int w) {
|
||||
int i, j;
|
||||
|
||||
assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
|
||||
assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
|
||||
|
||||
assert(h >= 1);
|
||||
assert(w >= 1);
|
||||
assert(IS_POWER_OF_TWO(h));
|
||||
assert(IS_POWER_OF_TWO(w));
|
||||
|
||||
for (i = 0; i < h; ++i) {
|
||||
for (j = 0; j < w; ++j) {
|
||||
dst[i * dst_stride + j] = AOM_BLEND_A64(
|
||||
mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_AOM_HIGHBITDEPTH
|
||||
void aom_highbd_blend_a64_hmask_c(uint8_t *dst_8, uint32_t dst_stride,
|
||||
const uint8_t *src0_8, uint32_t src0_stride,
|
||||
const uint8_t *src1_8, uint32_t src1_stride,
|
||||
const uint8_t *mask, int h, int w, int bd) {
|
||||
int i, j;
|
||||
uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
|
||||
const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
|
||||
const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
|
||||
(void)bd;
|
||||
|
||||
assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
|
||||
assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
|
||||
|
||||
assert(h >= 1);
|
||||
assert(w >= 1);
|
||||
assert(IS_POWER_OF_TWO(h));
|
||||
assert(IS_POWER_OF_TWO(w));
|
||||
|
||||
assert(bd == 8 || bd == 10 || bd == 12);
|
||||
|
||||
for (i = 0; i < h; ++i) {
|
||||
for (j = 0; j < w; ++j) {
|
||||
dst[i * dst_stride + j] = AOM_BLEND_A64(
|
||||
mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_AOM_HIGHBITDEPTH
|
||||
@@ -1,145 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_ports/mem.h"
|
||||
#include "aom_dsp/blend.h"
|
||||
#include "aom_dsp/aom_dsp_common.h"
|
||||
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
|
||||
// Blending with alpha mask. Mask values come from the range [0, 64],
|
||||
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
|
||||
// be the same as dst, or dst can be different from both sources.
|
||||
|
||||
void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride,
|
||||
const uint8_t *src0, uint32_t src0_stride,
|
||||
const uint8_t *src1, uint32_t src1_stride,
|
||||
const uint8_t *mask, uint32_t mask_stride, int h,
|
||||
int w, int subh, int subw) {
|
||||
int i, j;
|
||||
|
||||
assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
|
||||
assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
|
||||
|
||||
assert(h >= 1);
|
||||
assert(w >= 1);
|
||||
assert(IS_POWER_OF_TWO(h));
|
||||
assert(IS_POWER_OF_TWO(w));
|
||||
|
||||
if (subw == 0 && subh == 0) {
|
||||
for (i = 0; i < h; ++i) {
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m = mask[i * mask_stride + j];
|
||||
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
|
||||
src1[i * src1_stride + j]);
|
||||
}
|
||||
}
|
||||
} else if (subw == 1 && subh == 1) {
|
||||
for (i = 0; i < h; ++i) {
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m = ROUND_POWER_OF_TWO(
|
||||
mask[(2 * i) * mask_stride + (2 * j)] +
|
||||
mask[(2 * i + 1) * mask_stride + (2 * j)] +
|
||||
mask[(2 * i) * mask_stride + (2 * j + 1)] +
|
||||
mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
|
||||
2);
|
||||
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
|
||||
src1[i * src1_stride + j]);
|
||||
}
|
||||
}
|
||||
} else if (subw == 1 && subh == 0) {
|
||||
for (i = 0; i < h; ++i) {
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
|
||||
mask[i * mask_stride + (2 * j + 1)]);
|
||||
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
|
||||
src1[i * src1_stride + j]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < h; ++i) {
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
|
||||
mask[(2 * i + 1) * mask_stride + j]);
|
||||
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
|
||||
src1[i * src1_stride + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_AOM_HIGHBITDEPTH
|
||||
void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride,
|
||||
const uint8_t *src0_8, uint32_t src0_stride,
|
||||
const uint8_t *src1_8, uint32_t src1_stride,
|
||||
const uint8_t *mask, uint32_t mask_stride,
|
||||
int h, int w, int subh, int subw, int bd) {
|
||||
int i, j;
|
||||
uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
|
||||
const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
|
||||
const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
|
||||
(void)bd;
|
||||
|
||||
assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
|
||||
assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
|
||||
|
||||
assert(h >= 1);
|
||||
assert(w >= 1);
|
||||
assert(IS_POWER_OF_TWO(h));
|
||||
assert(IS_POWER_OF_TWO(w));
|
||||
|
||||
assert(bd == 8 || bd == 10 || bd == 12);
|
||||
|
||||
if (subw == 0 && subh == 0) {
|
||||
for (i = 0; i < h; ++i) {
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m = mask[i * mask_stride + j];
|
||||
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
|
||||
src1[i * src1_stride + j]);
|
||||
}
|
||||
}
|
||||
} else if (subw == 1 && subh == 1) {
|
||||
for (i = 0; i < h; ++i) {
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m = ROUND_POWER_OF_TWO(
|
||||
mask[(2 * i) * mask_stride + (2 * j)] +
|
||||
mask[(2 * i + 1) * mask_stride + (2 * j)] +
|
||||
mask[(2 * i) * mask_stride + (2 * j + 1)] +
|
||||
mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
|
||||
2);
|
||||
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
|
||||
src1[i * src1_stride + j]);
|
||||
}
|
||||
}
|
||||
} else if (subw == 1 && subh == 0) {
|
||||
for (i = 0; i < h; ++i) {
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
|
||||
mask[i * mask_stride + (2 * j + 1)]);
|
||||
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
|
||||
src1[i * src1_stride + j]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < h; ++i) {
|
||||
for (j = 0; j < w; ++j) {
|
||||
const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
|
||||
mask[(2 * i + 1) * mask_stride + j]);
|
||||
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
|
||||
src1[i * src1_stride + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_AOM_HIGHBITDEPTH
|
||||
@@ -1,73 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_ports/mem.h"
|
||||
#include "aom_dsp/aom_dsp_common.h"
|
||||
#include "aom_dsp/blend.h"
|
||||
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
|
||||
void aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride,
|
||||
const uint8_t *src0, uint32_t src0_stride,
|
||||
const uint8_t *src1, uint32_t src1_stride,
|
||||
const uint8_t *mask, int h, int w) {
|
||||
int i, j;
|
||||
|
||||
assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
|
||||
assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
|
||||
|
||||
assert(h >= 1);
|
||||
assert(w >= 1);
|
||||
assert(IS_POWER_OF_TWO(h));
|
||||
assert(IS_POWER_OF_TWO(w));
|
||||
|
||||
for (i = 0; i < h; ++i) {
|
||||
const int m = mask[i];
|
||||
for (j = 0; j < w; ++j) {
|
||||
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
|
||||
src1[i * src1_stride + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_AOM_HIGHBITDEPTH
|
||||
void aom_highbd_blend_a64_vmask_c(uint8_t *dst_8, uint32_t dst_stride,
|
||||
const uint8_t *src0_8, uint32_t src0_stride,
|
||||
const uint8_t *src1_8, uint32_t src1_stride,
|
||||
const uint8_t *mask, int h, int w, int bd) {
|
||||
int i, j;
|
||||
uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
|
||||
const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
|
||||
const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
|
||||
(void)bd;
|
||||
|
||||
assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
|
||||
assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
|
||||
|
||||
assert(h >= 1);
|
||||
assert(w >= 1);
|
||||
assert(IS_POWER_OF_TWO(h));
|
||||
assert(IS_POWER_OF_TWO(w));
|
||||
|
||||
assert(bd == 8 || bd == 10 || bd == 12);
|
||||
|
||||
for (i = 0; i < h; ++i) {
|
||||
const int m = mask[i];
|
||||
for (j = 0; j < w; ++j) {
|
||||
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
|
||||
src1[i * src1_stride + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_AOM_HIGHBITDEPTH
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "aom_dsp/buf_ans.h"
|
||||
#include "aom_mem/aom_mem.h"
|
||||
#include "aom/internal/aom_codec_internal.h"
|
||||
|
||||
void aom_buf_ans_alloc(struct BufAnsCoder *c,
|
||||
struct aom_internal_error_info *error, int size_hint) {
|
||||
c->error = error;
|
||||
c->size = size_hint;
|
||||
AOM_CHECK_MEM_ERROR(error, c->buf, aom_malloc(c->size * sizeof(*c->buf)));
|
||||
// Initialize to overfull to trigger the assert in write.
|
||||
c->offset = c->size + 1;
|
||||
}
|
||||
|
||||
void aom_buf_ans_free(struct BufAnsCoder *c) {
|
||||
aom_free(c->buf);
|
||||
c->buf = NULL;
|
||||
c->size = 0;
|
||||
}
|
||||
|
||||
void aom_buf_ans_grow(struct BufAnsCoder *c) {
|
||||
struct buffered_ans_symbol *new_buf = NULL;
|
||||
int new_size = c->size * 2;
|
||||
AOM_CHECK_MEM_ERROR(c->error, new_buf,
|
||||
aom_malloc(new_size * sizeof(*new_buf)));
|
||||
memcpy(new_buf, c->buf, c->size * sizeof(*c->buf));
|
||||
aom_free(c->buf);
|
||||
c->buf = new_buf;
|
||||
c->size = new_size;
|
||||
}
|
||||
@@ -1,112 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_BUF_ANS_H_
|
||||
#define AOM_DSP_BUF_ANS_H_
|
||||
// Buffered forward ANS writer.
|
||||
// Symbols are written to the writer in forward (decode) order and serialized
|
||||
// backwards due to ANS's stack like behavior.
|
||||
|
||||
#include <assert.h>
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/ans.h"
|
||||
#include "aom_dsp/answriter.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
#define ANS_METHOD_UABS 0
|
||||
#define ANS_METHOD_RANS 1
|
||||
|
||||
struct buffered_ans_symbol {
|
||||
unsigned int method : 1; // one of ANS_METHOD_UABS or ANS_METHOD_RANS
|
||||
// TODO(aconverse): Should be possible to write this in terms of start for ABS
|
||||
unsigned int val_start : RANS_PROB_BITS; // Boolean value for ABS
|
||||
// start in symbol cycle for Rans
|
||||
unsigned int prob : RANS_PROB_BITS; // Probability of this symbol
|
||||
};
|
||||
|
||||
struct BufAnsCoder {
|
||||
struct aom_internal_error_info *error;
|
||||
struct buffered_ans_symbol *buf;
|
||||
int size;
|
||||
int offset;
|
||||
};
|
||||
|
||||
void aom_buf_ans_alloc(struct BufAnsCoder *c,
|
||||
struct aom_internal_error_info *error, int size_hint);
|
||||
|
||||
void aom_buf_ans_free(struct BufAnsCoder *c);
|
||||
|
||||
void aom_buf_ans_grow(struct BufAnsCoder *c);
|
||||
|
||||
static INLINE void buf_ans_write_reset(struct BufAnsCoder *const c) {
|
||||
c->offset = 0;
|
||||
}
|
||||
|
||||
static INLINE void buf_uabs_write(struct BufAnsCoder *const c, uint8_t val,
|
||||
AnsP8 prob) {
|
||||
assert(c->offset <= c->size);
|
||||
if (c->offset == c->size) {
|
||||
aom_buf_ans_grow(c);
|
||||
}
|
||||
c->buf[c->offset].method = ANS_METHOD_UABS;
|
||||
c->buf[c->offset].val_start = val;
|
||||
c->buf[c->offset].prob = prob;
|
||||
++c->offset;
|
||||
}
|
||||
|
||||
static INLINE void buf_rans_write(struct BufAnsCoder *const c,
|
||||
const struct rans_sym *const sym) {
|
||||
assert(c->offset <= c->size);
|
||||
if (c->offset == c->size) {
|
||||
aom_buf_ans_grow(c);
|
||||
}
|
||||
c->buf[c->offset].method = ANS_METHOD_RANS;
|
||||
c->buf[c->offset].val_start = sym->cum_prob;
|
||||
c->buf[c->offset].prob = sym->prob;
|
||||
++c->offset;
|
||||
}
|
||||
|
||||
static INLINE void buf_ans_flush(const struct BufAnsCoder *const c,
|
||||
struct AnsCoder *ans) {
|
||||
int offset;
|
||||
for (offset = c->offset - 1; offset >= 0; --offset) {
|
||||
if (c->buf[offset].method == ANS_METHOD_RANS) {
|
||||
struct rans_sym sym;
|
||||
sym.prob = c->buf[offset].prob;
|
||||
sym.cum_prob = c->buf[offset].val_start;
|
||||
rans_write(ans, &sym);
|
||||
} else {
|
||||
uabs_write(ans, (uint8_t)c->buf[offset].val_start,
|
||||
(AnsP8)c->buf[offset].prob);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void buf_uabs_write_bit(struct BufAnsCoder *c, int bit) {
|
||||
buf_uabs_write(c, bit, 128);
|
||||
}
|
||||
|
||||
static INLINE void buf_uabs_write_literal(struct BufAnsCoder *c, int literal,
|
||||
int bits) {
|
||||
int bit;
|
||||
|
||||
assert(bits < 31);
|
||||
for (bit = bits - 1; bit >= 0; bit--)
|
||||
buf_uabs_write_bit(c, 1 & (literal >> bit));
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
#endif // AOM_DSP_BUF_ANS_H_
|
||||
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include "aom_dsp/daalaboolreader.h"
|
||||
|
||||
int aom_daala_reader_init(daala_reader *r, const uint8_t *buffer, int size) {
|
||||
if (size && !buffer) {
|
||||
return 1;
|
||||
}
|
||||
r->buffer_end = buffer + size;
|
||||
r->buffer = buffer;
|
||||
od_ec_dec_init(&r->ec, buffer, size - 1);
|
||||
#if CONFIG_ACCOUNTING
|
||||
r->accounting = NULL;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
const uint8_t *aom_daala_reader_find_end(daala_reader *r) {
|
||||
return r->buffer_end;
|
||||
}
|
||||
|
||||
uint32_t aom_daala_reader_tell(const daala_reader *r) {
|
||||
return od_ec_dec_tell(&r->ec);
|
||||
}
|
||||
|
||||
uint32_t aom_daala_reader_tell_frac(const daala_reader *r) {
|
||||
return od_ec_dec_tell_frac(&r->ec);
|
||||
}
|
||||
@@ -1,87 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_DAALABOOLREADER_H_
|
||||
#define AOM_DSP_DAALABOOLREADER_H_
|
||||
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/entdec.h"
|
||||
#include "aom_dsp/prob.h"
|
||||
#if CONFIG_ACCOUNTING
|
||||
#include "av1/common/accounting.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct daala_reader {
|
||||
const uint8_t *buffer;
|
||||
const uint8_t *buffer_end;
|
||||
od_ec_dec ec;
|
||||
#if CONFIG_ACCOUNTING
|
||||
Accounting *accounting;
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef struct daala_reader daala_reader;
|
||||
|
||||
int aom_daala_reader_init(daala_reader *r, const uint8_t *buffer, int size);
|
||||
const uint8_t *aom_daala_reader_find_end(daala_reader *r);
|
||||
uint32_t aom_daala_reader_tell(const daala_reader *r);
|
||||
uint32_t aom_daala_reader_tell_frac(const daala_reader *r);
|
||||
|
||||
static INLINE int aom_daala_read(daala_reader *r, int prob) {
|
||||
if (prob == 128) {
|
||||
return od_ec_dec_bits(&r->ec, 1, "aom_bits");
|
||||
} else {
|
||||
int p = ((prob << 15) + (256 - prob)) >> 8;
|
||||
return od_ec_decode_bool_q15(&r->ec, p);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE int aom_daala_read_bit(daala_reader *r) {
|
||||
return aom_daala_read(r, 128);
|
||||
}
|
||||
|
||||
static INLINE int aom_daala_reader_has_error(daala_reader *r) {
|
||||
return r->ec.error;
|
||||
}
|
||||
|
||||
static INLINE int daala_read_tree_bits(daala_reader *r,
|
||||
const aom_tree_index *tree,
|
||||
const aom_prob *probs) {
|
||||
aom_tree_index i = 0;
|
||||
do {
|
||||
aom_cdf_prob cdf[16];
|
||||
aom_tree_index index[16];
|
||||
int path[16];
|
||||
int dist[16];
|
||||
int nsymbs;
|
||||
int symb;
|
||||
nsymbs = tree_to_cdf(tree, probs, i, cdf, index, path, dist);
|
||||
symb = od_ec_decode_cdf_q15(&r->ec, cdf, nsymbs);
|
||||
OD_ASSERT(symb >= 0 && symb < nsymbs);
|
||||
i = index[symb];
|
||||
} while (i > 0);
|
||||
return -i;
|
||||
}
|
||||
|
||||
static INLINE int daala_read_symbol(daala_reader *r, const aom_cdf_prob *cdf,
|
||||
int nsymbs) {
|
||||
return od_ec_decode_cdf_q15(&r->ec, cdf, nsymbs);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "aom_dsp/daalaboolwriter.h"
|
||||
|
||||
void aom_daala_start_encode(daala_writer *br, uint8_t *source) {
|
||||
br->buffer = source;
|
||||
br->pos = 0;
|
||||
od_ec_enc_init(&br->ec, 62025);
|
||||
}
|
||||
|
||||
void aom_daala_stop_encode(daala_writer *br) {
|
||||
uint32_t daala_bytes;
|
||||
unsigned char *daala_data;
|
||||
daala_data = od_ec_enc_done(&br->ec, &daala_bytes);
|
||||
memcpy(br->buffer, daala_data, daala_bytes);
|
||||
br->pos = daala_bytes;
|
||||
/* Prevent ec bitstream from being detected as a superframe marker.
|
||||
Must always be added, so that rawbits knows the exact length of the
|
||||
bitstream. */
|
||||
br->buffer[br->pos++] = 0;
|
||||
od_ec_enc_clear(&br->ec);
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_DAALABOOLWRITER_H_
|
||||
#define AOM_DSP_DAALABOOLWRITER_H_
|
||||
|
||||
#include "aom_dsp/entenc.h"
|
||||
#include "aom_dsp/prob.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct daala_writer {
|
||||
unsigned int pos;
|
||||
uint8_t *buffer;
|
||||
od_ec_enc ec;
|
||||
};
|
||||
|
||||
typedef struct daala_writer daala_writer;
|
||||
|
||||
void aom_daala_start_encode(daala_writer *w, uint8_t *buffer);
|
||||
void aom_daala_stop_encode(daala_writer *w);
|
||||
|
||||
static INLINE void aom_daala_write(daala_writer *w, int bit, int prob) {
|
||||
if (prob == 128) {
|
||||
od_ec_enc_bits(&w->ec, bit, 1);
|
||||
} else {
|
||||
int p = ((prob << 15) + (256 - prob)) >> 8;
|
||||
od_ec_encode_bool_q15(&w->ec, bit, p);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void daala_write_tree_bits(daala_writer *w,
|
||||
const aom_tree_index *tree,
|
||||
const aom_prob *probs, int bits,
|
||||
int len, aom_tree_index i) {
|
||||
aom_tree_index root;
|
||||
root = i;
|
||||
do {
|
||||
aom_cdf_prob cdf[16];
|
||||
aom_tree_index index[16];
|
||||
int path[16];
|
||||
int dist[16];
|
||||
int nsymbs;
|
||||
int symb;
|
||||
int j;
|
||||
/* Compute the CDF of the binary tree using the given probabilities. */
|
||||
nsymbs = tree_to_cdf(tree, probs, root, cdf, index, path, dist);
|
||||
/* Find the symbol to code. */
|
||||
symb = -1;
|
||||
for (j = 0; j < nsymbs; j++) {
|
||||
/* If this symbol codes a leaf node, */
|
||||
if (index[j] <= 0) {
|
||||
if (len == dist[j] && path[j] == bits) {
|
||||
symb = j;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (len > dist[j] && path[j] == bits >> (len - dist[j])) {
|
||||
symb = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
OD_ASSERT(symb != -1);
|
||||
od_ec_encode_cdf_q15(&w->ec, symb, cdf, nsymbs);
|
||||
bits &= (1 << (len - dist[symb])) - 1;
|
||||
len -= dist[symb];
|
||||
} while (len);
|
||||
}
|
||||
|
||||
static INLINE void daala_write_symbol(daala_writer *w, int symb,
|
||||
const aom_cdf_prob *cdf, int nsymbs) {
|
||||
od_ec_encode_cdf_q15(&w->ec, symb, cdf, nsymbs);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,195 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "aom/aom_integer.h"
|
||||
|
||||
const int16_t aom_rv[] = {
|
||||
8, 5, 2, 2, 8, 12, 4, 9, 8, 3, 0, 3, 9, 0, 0, 0, 8, 3, 14,
|
||||
4, 10, 1, 11, 14, 1, 14, 9, 6, 12, 11, 8, 6, 10, 0, 0, 8, 9, 0,
|
||||
3, 14, 8, 11, 13, 4, 2, 9, 0, 3, 9, 6, 1, 2, 3, 14, 13, 1, 8,
|
||||
2, 9, 7, 3, 3, 1, 13, 13, 6, 6, 5, 2, 7, 11, 9, 11, 8, 7, 3,
|
||||
2, 0, 13, 13, 14, 4, 12, 5, 12, 10, 8, 10, 13, 10, 4, 14, 4, 10, 0,
|
||||
8, 11, 1, 13, 7, 7, 14, 6, 14, 13, 2, 13, 5, 4, 4, 0, 10, 0, 5,
|
||||
13, 2, 12, 7, 11, 13, 8, 0, 4, 10, 7, 2, 7, 2, 2, 5, 3, 4, 7,
|
||||
3, 3, 14, 14, 5, 9, 13, 3, 14, 3, 6, 3, 0, 11, 8, 13, 1, 13, 1,
|
||||
12, 0, 10, 9, 7, 6, 2, 8, 5, 2, 13, 7, 1, 13, 14, 7, 6, 7, 9,
|
||||
6, 10, 11, 7, 8, 7, 5, 14, 8, 4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
|
||||
3, 12, 5, 7, 14, 3, 14, 5, 2, 6, 11, 12, 12, 8, 0, 11, 13, 1, 2,
|
||||
0, 5, 10, 14, 7, 8, 0, 4, 11, 0, 8, 0, 3, 10, 5, 8, 0, 11, 6,
|
||||
7, 8, 10, 7, 13, 9, 2, 5, 1, 5, 10, 2, 4, 3, 5, 6, 10, 8, 9,
|
||||
4, 11, 14, 0, 10, 0, 5, 13, 2, 12, 7, 11, 13, 8, 0, 4, 10, 7, 2,
|
||||
7, 2, 2, 5, 3, 4, 7, 3, 3, 14, 14, 5, 9, 13, 3, 14, 3, 6, 3,
|
||||
0, 11, 8, 13, 1, 13, 1, 12, 0, 10, 9, 7, 6, 2, 8, 5, 2, 13, 7,
|
||||
1, 13, 14, 7, 6, 7, 9, 6, 10, 11, 7, 8, 7, 5, 14, 8, 4, 4, 0,
|
||||
8, 7, 10, 0, 8, 14, 11, 3, 12, 5, 7, 14, 3, 14, 5, 2, 6, 11, 12,
|
||||
12, 8, 0, 11, 13, 1, 2, 0, 5, 10, 14, 7, 8, 0, 4, 11, 0, 8, 0,
|
||||
3, 10, 5, 8, 0, 11, 6, 7, 8, 10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
|
||||
4, 3, 5, 6, 10, 8, 9, 4, 11, 14, 3, 8, 3, 7, 8, 5, 11, 4, 12,
|
||||
3, 11, 9, 14, 8, 14, 13, 4, 3, 1, 2, 14, 6, 5, 4, 4, 11, 4, 6,
|
||||
2, 1, 5, 8, 8, 12, 13, 5, 14, 10, 12, 13, 0, 9, 5, 5, 11, 10, 13,
|
||||
9, 10, 13,
|
||||
};
|
||||
|
||||
void aom_post_proc_down_and_across_mb_row_c(unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int src_pixels_per_line,
|
||||
int dst_pixels_per_line, int cols,
|
||||
unsigned char *f, int size) {
|
||||
unsigned char *p_src, *p_dst;
|
||||
int row;
|
||||
int col;
|
||||
unsigned char v;
|
||||
unsigned char d[4];
|
||||
|
||||
for (row = 0; row < size; row++) {
|
||||
/* post_proc_down for one row */
|
||||
p_src = src_ptr;
|
||||
p_dst = dst_ptr;
|
||||
|
||||
for (col = 0; col < cols; col++) {
|
||||
unsigned char p_above2 = p_src[col - 2 * src_pixels_per_line];
|
||||
unsigned char p_above1 = p_src[col - src_pixels_per_line];
|
||||
unsigned char p_below1 = p_src[col + src_pixels_per_line];
|
||||
unsigned char p_below2 = p_src[col + 2 * src_pixels_per_line];
|
||||
|
||||
v = p_src[col];
|
||||
|
||||
if ((abs(v - p_above2) < f[col]) && (abs(v - p_above1) < f[col]) &&
|
||||
(abs(v - p_below1) < f[col]) && (abs(v - p_below2) < f[col])) {
|
||||
unsigned char k1, k2, k3;
|
||||
k1 = (p_above2 + p_above1 + 1) >> 1;
|
||||
k2 = (p_below2 + p_below1 + 1) >> 1;
|
||||
k3 = (k1 + k2 + 1) >> 1;
|
||||
v = (k3 + v + 1) >> 1;
|
||||
}
|
||||
|
||||
p_dst[col] = v;
|
||||
}
|
||||
|
||||
/* now post_proc_across */
|
||||
p_src = dst_ptr;
|
||||
p_dst = dst_ptr;
|
||||
|
||||
p_src[-2] = p_src[-1] = p_src[0];
|
||||
p_src[cols] = p_src[cols + 1] = p_src[cols - 1];
|
||||
|
||||
for (col = 0; col < cols; col++) {
|
||||
v = p_src[col];
|
||||
|
||||
if ((abs(v - p_src[col - 2]) < f[col]) &&
|
||||
(abs(v - p_src[col - 1]) < f[col]) &&
|
||||
(abs(v - p_src[col + 1]) < f[col]) &&
|
||||
(abs(v - p_src[col + 2]) < f[col])) {
|
||||
unsigned char k1, k2, k3;
|
||||
k1 = (p_src[col - 2] + p_src[col - 1] + 1) >> 1;
|
||||
k2 = (p_src[col + 2] + p_src[col + 1] + 1) >> 1;
|
||||
k3 = (k1 + k2 + 1) >> 1;
|
||||
v = (k3 + v + 1) >> 1;
|
||||
}
|
||||
|
||||
d[col & 3] = v;
|
||||
|
||||
if (col >= 2) p_dst[col - 2] = d[(col - 2) & 3];
|
||||
}
|
||||
|
||||
/* handle the last two pixels */
|
||||
p_dst[col - 2] = d[(col - 2) & 3];
|
||||
p_dst[col - 1] = d[(col - 1) & 3];
|
||||
|
||||
/* next row */
|
||||
src_ptr += src_pixels_per_line;
|
||||
dst_ptr += dst_pixels_per_line;
|
||||
}
|
||||
}
|
||||
|
||||
void aom_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows,
|
||||
int cols, int flimit) {
|
||||
int r, c, i;
|
||||
|
||||
unsigned char *s = src;
|
||||
unsigned char d[16];
|
||||
|
||||
for (r = 0; r < rows; r++) {
|
||||
int sumsq = 0;
|
||||
int sum = 0;
|
||||
|
||||
for (i = -8; i < 0; i++) s[i] = s[0];
|
||||
|
||||
/* 17 avoids valgrind warning - we buffer values in c in d
|
||||
* and only write them when we've read 8 ahead...
|
||||
*/
|
||||
for (i = 0; i < 17; i++) s[i + cols] = s[cols - 1];
|
||||
|
||||
for (i = -8; i <= 6; i++) {
|
||||
sumsq += s[i] * s[i];
|
||||
sum += s[i];
|
||||
d[i + 8] = 0;
|
||||
}
|
||||
|
||||
for (c = 0; c < cols + 8; c++) {
|
||||
int x = s[c + 7] - s[c - 8];
|
||||
int y = s[c + 7] + s[c - 8];
|
||||
|
||||
sum += x;
|
||||
sumsq += x * y;
|
||||
|
||||
d[c & 15] = s[c];
|
||||
|
||||
if (sumsq * 15 - sum * sum < flimit) {
|
||||
d[c & 15] = (8 + sum + s[c]) >> 4;
|
||||
}
|
||||
|
||||
s[c - 8] = d[(c - 8) & 15];
|
||||
}
|
||||
|
||||
s += pitch;
|
||||
}
|
||||
}
|
||||
|
||||
void aom_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,
|
||||
int flimit) {
|
||||
int r, c, i;
|
||||
const int16_t *rv3 = &aom_rv[63 & rand()];
|
||||
|
||||
for (c = 0; c < cols; c++) {
|
||||
unsigned char *s = &dst[c];
|
||||
int sumsq = 0;
|
||||
int sum = 0;
|
||||
unsigned char d[16];
|
||||
const int16_t *rv2 = rv3 + ((c * 17) & 127);
|
||||
|
||||
for (i = -8; i < 0; i++) s[i * pitch] = s[0];
|
||||
|
||||
/* 17 avoids valgrind warning - we buffer values in c in d
|
||||
* and only write them when we've read 8 ahead...
|
||||
*/
|
||||
for (i = 0; i < 17; i++) s[(i + rows) * pitch] = s[(rows - 1) * pitch];
|
||||
|
||||
for (i = -8; i <= 6; i++) {
|
||||
sumsq += s[i * pitch] * s[i * pitch];
|
||||
sum += s[i * pitch];
|
||||
}
|
||||
|
||||
for (r = 0; r < rows + 8; r++) {
|
||||
sumsq += s[7 * pitch] * s[7 * pitch] - s[-8 * pitch] * s[-8 * pitch];
|
||||
sum += s[7 * pitch] - s[-8 * pitch];
|
||||
d[r & 15] = s[0];
|
||||
|
||||
if (sumsq * 15 - sum * sum < flimit) {
|
||||
d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4;
|
||||
}
|
||||
if (r >= 8) s[-8 * pitch] = d[(r - 8) & 15];
|
||||
s += pitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,180 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_DKBOOLREADER_H_
|
||||
#define AOM_DSP_DKBOOLREADER_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "./aom_config.h"
|
||||
#if CONFIG_BITSTREAM_DEBUG
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include "aom_util/debug_util.h"
|
||||
#endif // CONFIG_BITSTREAM_DEBUG
|
||||
|
||||
#include "aom_ports/mem.h"
|
||||
#include "aom/aomdx.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/prob.h"
|
||||
#if CONFIG_ACCOUNTING
|
||||
#include "av1/common/accounting.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef size_t BD_VALUE;
|
||||
|
||||
#define BD_VALUE_SIZE ((int)sizeof(BD_VALUE) * CHAR_BIT)
|
||||
|
||||
// This is meant to be a large, positive constant that can still be efficiently
|
||||
// loaded as an immediate (on platforms like ARM, for example).
|
||||
// Even relatively modest values like 100 would work fine.
|
||||
#define LOTS_OF_BITS 0x40000000
|
||||
|
||||
struct aom_dk_reader {
|
||||
// Be careful when reordering this struct, it may impact the cache negatively.
|
||||
BD_VALUE value;
|
||||
unsigned int range;
|
||||
int count;
|
||||
const uint8_t *buffer_start;
|
||||
const uint8_t *buffer_end;
|
||||
const uint8_t *buffer;
|
||||
aom_decrypt_cb decrypt_cb;
|
||||
void *decrypt_state;
|
||||
uint8_t clear_buffer[sizeof(BD_VALUE) + 1];
|
||||
#if CONFIG_ACCOUNTING
|
||||
Accounting *accounting;
|
||||
#endif
|
||||
};
|
||||
|
||||
int aom_dk_reader_init(struct aom_dk_reader *r, const uint8_t *buffer,
|
||||
size_t size, aom_decrypt_cb decrypt_cb,
|
||||
void *decrypt_state);
|
||||
|
||||
void aom_dk_reader_fill(struct aom_dk_reader *r);
|
||||
|
||||
const uint8_t *aom_dk_reader_find_end(struct aom_dk_reader *r);
|
||||
|
||||
static INLINE uint32_t aom_dk_reader_tell(const struct aom_dk_reader *r) {
|
||||
const uint32_t bits_read = (r->buffer - r->buffer_start) * CHAR_BIT;
|
||||
const int count =
|
||||
(r->count < LOTS_OF_BITS) ? r->count : r->count - LOTS_OF_BITS;
|
||||
assert(r->buffer >= r->buffer_start);
|
||||
return bits_read - (count + CHAR_BIT);
|
||||
}
|
||||
|
||||
/*The resolution of fractional-precision bit usage measurements, i.e.,
|
||||
3 => 1/8th bits.*/
|
||||
#define DK_BITRES (3)
|
||||
|
||||
static INLINE uint32_t aom_dk_reader_tell_frac(const struct aom_dk_reader *r) {
|
||||
uint32_t num_bits;
|
||||
uint32_t range;
|
||||
int l;
|
||||
int i;
|
||||
num_bits = aom_dk_reader_tell(r) << DK_BITRES;
|
||||
range = r->range;
|
||||
l = 0;
|
||||
for (i = DK_BITRES; i-- > 0;) {
|
||||
int b;
|
||||
range = range * range >> 7;
|
||||
b = (int)(range >> 8);
|
||||
l = l << 1 | b;
|
||||
range >>= b;
|
||||
}
|
||||
return num_bits - l;
|
||||
}
|
||||
|
||||
static INLINE int aom_dk_reader_has_error(struct aom_dk_reader *r) {
|
||||
// Check if we have reached the end of the buffer.
|
||||
//
|
||||
// Variable 'count' stores the number of bits in the 'value' buffer, minus
|
||||
// 8. The top byte is part of the algorithm, and the remainder is buffered
|
||||
// to be shifted into it. So if count == 8, the top 16 bits of 'value' are
|
||||
// occupied, 8 for the algorithm and 8 in the buffer.
|
||||
//
|
||||
// When reading a byte from the user's buffer, count is filled with 8 and
|
||||
// one byte is filled into the value buffer. When we reach the end of the
|
||||
// data, count is additionally filled with LOTS_OF_BITS. So when
|
||||
// count == LOTS_OF_BITS - 1, the user's data has been exhausted.
|
||||
//
|
||||
// 1 if we have tried to decode bits after the end of stream was encountered.
|
||||
// 0 No error.
|
||||
return r->count > BD_VALUE_SIZE && r->count < LOTS_OF_BITS;
|
||||
}
|
||||
|
||||
static INLINE int aom_dk_read(struct aom_dk_reader *r, int prob) {
|
||||
unsigned int bit = 0;
|
||||
BD_VALUE value;
|
||||
BD_VALUE bigsplit;
|
||||
int count;
|
||||
unsigned int range;
|
||||
unsigned int split = (r->range * prob + (256 - prob)) >> CHAR_BIT;
|
||||
|
||||
if (r->count < 0) aom_dk_reader_fill(r);
|
||||
|
||||
value = r->value;
|
||||
count = r->count;
|
||||
|
||||
bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT);
|
||||
|
||||
range = split;
|
||||
|
||||
if (value >= bigsplit) {
|
||||
range = r->range - split;
|
||||
value = value - bigsplit;
|
||||
bit = 1;
|
||||
}
|
||||
|
||||
{
|
||||
register int shift = aom_norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
}
|
||||
r->value = value;
|
||||
r->count = count;
|
||||
r->range = range;
|
||||
|
||||
#if CONFIG_BITSTREAM_DEBUG
|
||||
{
|
||||
int ref_bit, ref_prob;
|
||||
const int queue_r = bitstream_queue_get_read();
|
||||
const int frame_idx = bitstream_queue_get_frame_read();
|
||||
bitstream_queue_pop(&ref_bit, &ref_prob);
|
||||
if (prob != ref_prob) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"\n *** prob error, frame_idx_r %d prob %d ref_prob %d queue_r %d\n",
|
||||
frame_idx, prob, ref_prob, queue_r);
|
||||
assert(0);
|
||||
}
|
||||
if ((int)bit != ref_bit) {
|
||||
fprintf(stderr, "\n *** bit error, frame_idx_r %d bit %d ref_bit %d\n",
|
||||
frame_idx, bit, ref_bit);
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_BITSTREAM_DEBUG
|
||||
|
||||
return bit;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_DKBOOLREADER_H_
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./dkboolwriter.h"
|
||||
|
||||
static INLINE void aom_dk_write_bit(aom_dk_writer *w, int bit) {
|
||||
aom_dk_write(w, bit, 128); // aom_prob_half
|
||||
}
|
||||
|
||||
void aom_dk_start_encode(aom_dk_writer *br, uint8_t *source) {
|
||||
br->lowvalue = 0;
|
||||
br->range = 255;
|
||||
br->count = -24;
|
||||
br->buffer = source;
|
||||
br->pos = 0;
|
||||
aom_dk_write_bit(br, 0);
|
||||
}
|
||||
|
||||
void aom_dk_stop_encode(aom_dk_writer *br) {
|
||||
int i;
|
||||
|
||||
#if CONFIG_BITSTREAM_DEBUG
|
||||
bitstream_queue_set_skip_write(1);
|
||||
#endif // CONFIG_BITSTREAM_DEBUG
|
||||
|
||||
for (i = 0; i < 32; i++) aom_dk_write_bit(br, 0);
|
||||
|
||||
#if CONFIG_BITSTREAM_DEBUG
|
||||
bitstream_queue_set_skip_write(0);
|
||||
#endif // CONFIG_BITSTREAM_DEBUG
|
||||
|
||||
// Ensure there's no ambigous collision with any index marker bytes
|
||||
if ((br->buffer[br->pos - 1] & 0xe0) == 0xc0) br->buffer[br->pos++] = 0;
|
||||
}
|
||||
@@ -1,104 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_DKBOOLWRITER_H_
|
||||
#define AOM_DSP_DKBOOLWRITER_H_
|
||||
|
||||
#include "./aom_config.h"
|
||||
|
||||
#if CONFIG_BITSTREAM_DEBUG
|
||||
#include <stdio.h>
|
||||
#include "aom_util/debug_util.h"
|
||||
#endif // CONFIG_BITSTREAM_DEBUG
|
||||
|
||||
#include "aom_dsp/prob.h"
|
||||
#include "aom_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct aom_dk_writer {
|
||||
unsigned int lowvalue;
|
||||
unsigned int range;
|
||||
int count;
|
||||
unsigned int pos;
|
||||
uint8_t *buffer;
|
||||
} aom_dk_writer;
|
||||
|
||||
void aom_dk_start_encode(aom_dk_writer *bc, uint8_t *buffer);
|
||||
void aom_dk_stop_encode(aom_dk_writer *bc);
|
||||
|
||||
static INLINE void aom_dk_write(aom_dk_writer *br, int bit, int probability) {
|
||||
unsigned int split;
|
||||
int count = br->count;
|
||||
unsigned int range = br->range;
|
||||
unsigned int lowvalue = br->lowvalue;
|
||||
register int shift;
|
||||
|
||||
#if CONFIG_BITSTREAM_DEBUG
|
||||
// int queue_r = 0;
|
||||
// int frame_idx_r = 0;
|
||||
// int queue_w = bitstream_queue_get_write();
|
||||
// int frame_idx_w = bitstream_queue_get_frame_write();
|
||||
// if (frame_idx_w == frame_idx_r && queue_w == queue_r) {
|
||||
// fprintf(stderr, "\n *** bitstream queue at frame_idx_w %d queue_w %d\n",
|
||||
// frame_idx_w, queue_w);
|
||||
// }
|
||||
bitstream_queue_push(bit, probability);
|
||||
#endif // CONFIG_BITSTREAM_DEBUG
|
||||
|
||||
split = 1 + (((range - 1) * probability) >> 8);
|
||||
|
||||
range = split;
|
||||
|
||||
if (bit) {
|
||||
lowvalue += split;
|
||||
range = br->range - split;
|
||||
}
|
||||
|
||||
shift = aom_norm[range];
|
||||
|
||||
range <<= shift;
|
||||
count += shift;
|
||||
|
||||
if (count >= 0) {
|
||||
int offset = shift - count;
|
||||
|
||||
if ((lowvalue << (offset - 1)) & 0x80000000) {
|
||||
int x = br->pos - 1;
|
||||
|
||||
while (x >= 0 && br->buffer[x] == 0xff) {
|
||||
br->buffer[x] = 0;
|
||||
x--;
|
||||
}
|
||||
|
||||
br->buffer[x] += 1;
|
||||
}
|
||||
|
||||
br->buffer[br->pos++] = (lowvalue >> (24 - offset));
|
||||
lowvalue <<= offset;
|
||||
shift = count;
|
||||
lowvalue &= 0xffffff;
|
||||
count -= 8;
|
||||
}
|
||||
|
||||
lowvalue <<= shift;
|
||||
br->count = count;
|
||||
br->lowvalue = lowvalue;
|
||||
br->range = range;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_DKBOOLWRITER_H_
|
||||
@@ -1,80 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "./config.h"
|
||||
#endif
|
||||
|
||||
#include "aom_dsp/entcode.h"
|
||||
|
||||
/*CDFs for uniform probability distributions of small sizes (2 through 16,
|
||||
inclusive).*/
|
||||
// clang-format off
|
||||
const uint16_t OD_UNIFORM_CDFS_Q15[135] = {
|
||||
16384, 32768,
|
||||
10923, 21845, 32768,
|
||||
8192, 16384, 24576, 32768,
|
||||
6554, 13107, 19661, 26214, 32768,
|
||||
5461, 10923, 16384, 21845, 27307, 32768,
|
||||
4681, 9362, 14043, 18725, 23406, 28087, 32768,
|
||||
4096, 8192, 12288, 16384, 20480, 24576, 28672, 32768,
|
||||
3641, 7282, 10923, 14564, 18204, 21845, 25486, 29127, 32768,
|
||||
3277, 6554, 9830, 13107, 16384, 19661, 22938, 26214, 29491, 32768,
|
||||
2979, 5958, 8937, 11916, 14895, 17873, 20852, 23831, 26810, 29789, 32768,
|
||||
2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, 24576, 27307, 30037,
|
||||
32768,
|
||||
2521, 5041, 7562, 10082, 12603, 15124, 17644, 20165, 22686, 25206, 27727,
|
||||
30247, 32768,
|
||||
2341, 4681, 7022, 9362, 11703, 14043, 16384, 18725, 21065, 23406, 25746,
|
||||
28087, 30427, 32768,
|
||||
2185, 4369, 6554, 8738, 10923, 13107, 15292, 17476, 19661, 21845, 24030,
|
||||
26214, 28399, 30583, 32768,
|
||||
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, 20480, 22528,
|
||||
24576, 26624, 28672, 30720, 32768
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
/*Given the current total integer number of bits used and the current value of
|
||||
rng, computes the fraction number of bits used to OD_BITRES precision.
|
||||
This is used by od_ec_enc_tell_frac() and od_ec_dec_tell_frac().
|
||||
nbits_total: The number of whole bits currently used, i.e., the value
|
||||
returned by od_ec_enc_tell() or od_ec_dec_tell().
|
||||
rng: The current value of rng from either the encoder or decoder state.
|
||||
Return: The number of bits scaled by 2**OD_BITRES.
|
||||
This will always be slightly larger than the exact value (e.g., all
|
||||
rounding error is in the positive direction).*/
|
||||
uint32_t od_ec_tell_frac(uint32_t nbits_total, uint32_t rng) {
|
||||
uint32_t nbits;
|
||||
int l;
|
||||
int i;
|
||||
/*To handle the non-integral number of bits still left in the encoder/decoder
|
||||
state, we compute the worst-case number of bits of val that must be
|
||||
encoded to ensure that the value is inside the range for any possible
|
||||
subsequent bits.
|
||||
The computation here is independent of val itself (the decoder does not
|
||||
even track that value), even though the real number of bits used after
|
||||
od_ec_enc_done() may be 1 smaller if rng is a power of two and the
|
||||
corresponding trailing bits of val are all zeros.
|
||||
If we did try to track that special case, then coding a value with a
|
||||
probability of 1/(1 << n) might sometimes appear to use more than n bits.
|
||||
This may help explain the surprising result that a newly initialized
|
||||
encoder or decoder claims to have used 1 bit.*/
|
||||
nbits = nbits_total << OD_BITRES;
|
||||
l = 0;
|
||||
for (i = OD_BITRES; i-- > 0;) {
|
||||
int b;
|
||||
rng = rng * rng >> 15;
|
||||
b = (int)(rng >> 16);
|
||||
l = l << 1 | b;
|
||||
rng >>= b;
|
||||
}
|
||||
return nbits - l;
|
||||
}
|
||||
@@ -1,105 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#if !defined(_entcode_H)
|
||||
#define _entcode_H (1)
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include "av1/common/odintrin.h"
|
||||
|
||||
/*Set this flag 1 to enable a "reduced overhead" version of the entropy coder.
|
||||
This uses a partition function that more accurately follows the input
|
||||
probability estimates at the expense of some additional CPU cost (though
|
||||
still an order of magnitude less than a full division).
|
||||
|
||||
In classic arithmetic coding, the partition function maps a value x in the
|
||||
range [0, ft] to a value in y in [0, r] with 0 < ft <= r via
|
||||
y = x*r/ft.
|
||||
Any deviation from this value increases coding inefficiency.
|
||||
|
||||
To avoid divisions, we require ft <= r < 2*ft (enforcing it by shifting up
|
||||
ft if necessary), and replace that function with
|
||||
y = x + OD_MINI(x, r - ft).
|
||||
This counts values of x smaller than r - ft double compared to values larger
|
||||
than r - ft, which over-estimates the probability of symbols at the start of
|
||||
the alphabet, and under-estimates the probability of symbols at the end of
|
||||
the alphabet.
|
||||
The overall coding inefficiency assuming accurate probability models and
|
||||
independent symbols is in the 1% range, which is similar to that of CABAC.
|
||||
|
||||
To reduce overhead even further, we split this into two cases:
|
||||
1) r - ft > ft - (r - ft).
|
||||
That is, we have more values of x that are double-counted than
|
||||
single-counted.
|
||||
In this case, we still double-count the first 2*r - 3*ft values of x, but
|
||||
after that we alternate between single-counting and double-counting for
|
||||
the rest.
|
||||
2) r - ft < ft - (r - ft).
|
||||
That is, we have more values of x that are single-counted than
|
||||
double-counted.
|
||||
In this case, we alternate between single-counting and double-counting for
|
||||
the first 2*(r - ft) values of x, and single-count the rest.
|
||||
For two equiprobable symbols in different places in the alphabet, this
|
||||
reduces the maximum ratio of over-estimation to under-estimation from 2:1
|
||||
for the previous partition function to either 4:3 or 3:2 (for each of the
|
||||
two cases above, respectively), assuming symbol probabilities significantly
|
||||
greater than 1/32768.
|
||||
That reduces the worst-case per-symbol overhead from 1 bit to 0.58 bits.
|
||||
|
||||
The resulting function is
|
||||
e = OD_MAXI(2*r - 3*ft, 0);
|
||||
y = x + OD_MINI(x, e) + OD_MINI(OD_MAXI(x - e, 0) >> 1, r - ft).
|
||||
Here, e is a value that is greater than 0 in case 1, and 0 in case 2.
|
||||
This function is about 3 times as expensive to evaluate as the high-overhead
|
||||
version, but still an order of magnitude cheaper than a division, since it
|
||||
is composed only of very simple operations.
|
||||
Because we want to fit in 16-bit registers and must use unsigned values to do
|
||||
so, we use saturating subtraction to enforce the maximums with 0.
|
||||
|
||||
Enabling this reduces the measured overhead in ectest from 0.805% to 0.621%
|
||||
(vs. 0.022% for the division-based partition function with r much greater
|
||||
than ft).
|
||||
It improves performance on ntt-short-1 by about 0.3%.*/
|
||||
#define OD_EC_REDUCED_OVERHEAD (1)
|
||||
|
||||
/*OPT: od_ec_window must be at least 32 bits, but if you have fast arithmetic
|
||||
on a larger type, you can speed up the decoder by using it here.*/
|
||||
typedef uint32_t od_ec_window;
|
||||
|
||||
#define OD_EC_WINDOW_SIZE ((int)sizeof(od_ec_window) * CHAR_BIT)
|
||||
|
||||
/*Unsigned subtraction with unsigned saturation.
|
||||
This implementation of the macro is intentionally chosen to increase the
|
||||
number of common subexpressions in the reduced-overhead partition function.
|
||||
This matters for C code, but it would not for hardware with a saturating
|
||||
subtraction instruction.*/
|
||||
#define OD_SUBSATU(a, b) ((a)-OD_MINI(a, b))
|
||||
|
||||
/*The number of bits to use for the range-coded part of unsigned integers.*/
|
||||
#define OD_EC_UINT_BITS (4)
|
||||
|
||||
/*The resolution of fractional-precision bit usage measurements, i.e.,
|
||||
3 => 1/8th bits.*/
|
||||
#define OD_BITRES (3)
|
||||
|
||||
extern const uint16_t OD_UNIFORM_CDFS_Q15[135];
|
||||
|
||||
/*Returns a Q15 CDF for a uniform probability distribution of the given size.
|
||||
n: The size of the distribution.
|
||||
This must be at least 2, and no more than 16.*/
|
||||
#define OD_UNIFORM_CDF_Q15(n) (OD_UNIFORM_CDFS_Q15 + ((n) * ((n)-1) >> 1) - 1)
|
||||
|
||||
/*See entcode.c for further documentation.*/
|
||||
|
||||
OD_WARN_UNUSED_RESULT uint32_t od_ec_tell_frac(uint32_t nbits_total,
|
||||
uint32_t rng);
|
||||
|
||||
#endif
|
||||
494
aom_dsp/entdec.c
494
aom_dsp/entdec.c
@@ -1,494 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "./config.h"
|
||||
#endif
|
||||
|
||||
#include "aom_dsp/entdec.h"
|
||||
|
||||
/*A range decoder.
|
||||
This is an entropy decoder based upon \cite{Mar79}, which is itself a
|
||||
rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
|
||||
It is very similar to arithmetic encoding, except that encoding is done with
|
||||
digits in any base, instead of with bits, and so it is faster when using
|
||||
larger bases (i.e.: a byte).
|
||||
The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
|
||||
is the base, longer than the theoretical optimum, but to my knowledge there
|
||||
is no published justification for this claim.
|
||||
This only seems true when using near-infinite precision arithmetic so that
|
||||
the process is carried out with no rounding errors.
|
||||
|
||||
An excellent description of implementation details is available at
|
||||
http://www.arturocampos.com/ac_range.html
|
||||
A recent work \cite{MNW98} which proposes several changes to arithmetic
|
||||
encoding for efficiency actually re-discovers many of the principles
|
||||
behind range encoding, and presents a good theoretical analysis of them.
|
||||
|
||||
End of stream is handled by writing out the smallest number of bits that
|
||||
ensures that the stream will be correctly decoded regardless of the value of
|
||||
any subsequent bits.
|
||||
od_ec_dec_tell() can be used to determine how many bits were needed to decode
|
||||
all the symbols thus far; other data can be packed in the remaining bits of
|
||||
the input buffer.
|
||||
@PHDTHESIS{Pas76,
|
||||
author="Richard Clark Pasco",
|
||||
title="Source coding algorithms for fast data compression",
|
||||
school="Dept. of Electrical Engineering, Stanford University",
|
||||
address="Stanford, CA",
|
||||
month=May,
|
||||
year=1976,
|
||||
URL="http://www.richpasco.org/scaffdc.pdf"
|
||||
}
|
||||
@INPROCEEDINGS{Mar79,
|
||||
author="Martin, G.N.N.",
|
||||
title="Range encoding: an algorithm for removing redundancy from a digitised
|
||||
message",
|
||||
booktitle="Video & Data Recording Conference",
|
||||
year=1979,
|
||||
address="Southampton",
|
||||
month=Jul,
|
||||
URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz"
|
||||
}
|
||||
@ARTICLE{MNW98,
|
||||
author="Alistair Moffat and Radford Neal and Ian H. Witten",
|
||||
title="Arithmetic Coding Revisited",
|
||||
journal="{ACM} Transactions on Information Systems",
|
||||
year=1998,
|
||||
volume=16,
|
||||
number=3,
|
||||
pages="256--294",
|
||||
month=Jul,
|
||||
URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf"
|
||||
}*/
|
||||
|
||||
/*This is meant to be a large, positive constant that can still be efficiently
|
||||
loaded as an immediate (on platforms like ARM, for example).
|
||||
Even relatively modest values like 100 would work fine.*/
|
||||
#define OD_EC_LOTS_OF_BITS (0x4000)
|
||||
|
||||
static void od_ec_dec_refill(od_ec_dec *dec) {
|
||||
int s;
|
||||
od_ec_window dif;
|
||||
int16_t cnt;
|
||||
const unsigned char *bptr;
|
||||
const unsigned char *end;
|
||||
dif = dec->dif;
|
||||
cnt = dec->cnt;
|
||||
bptr = dec->bptr;
|
||||
end = dec->end;
|
||||
s = OD_EC_WINDOW_SIZE - 9 - (cnt + 15);
|
||||
for (; s >= 0 && bptr < end; s -= 8, bptr++) {
|
||||
OD_ASSERT(s <= OD_EC_WINDOW_SIZE - 8);
|
||||
dif |= (od_ec_window)bptr[0] << s;
|
||||
cnt += 8;
|
||||
}
|
||||
if (bptr >= end) {
|
||||
dec->tell_offs += OD_EC_LOTS_OF_BITS - cnt;
|
||||
cnt = OD_EC_LOTS_OF_BITS;
|
||||
}
|
||||
dec->dif = dif;
|
||||
dec->cnt = cnt;
|
||||
dec->bptr = bptr;
|
||||
}
|
||||
|
||||
/*Takes updated dif and range values, renormalizes them so that
|
||||
32768 <= rng < 65536 (reading more bytes from the stream into dif if
|
||||
necessary), and stores them back in the decoder context.
|
||||
dif: The new value of dif.
|
||||
rng: The new value of the range.
|
||||
ret: The value to return.
|
||||
Return: ret.
|
||||
This allows the compiler to jump to this function via a tail-call.*/
|
||||
static int od_ec_dec_normalize(od_ec_dec *dec, od_ec_window dif, unsigned rng,
|
||||
int ret) {
|
||||
int d;
|
||||
OD_ASSERT(rng <= 65535U);
|
||||
d = 16 - OD_ILOG_NZ(rng);
|
||||
dec->cnt -= d;
|
||||
dec->dif = dif << d;
|
||||
dec->rng = rng << d;
|
||||
if (dec->cnt < 0) od_ec_dec_refill(dec);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*Initializes the decoder.
|
||||
buf: The input buffer to use.
|
||||
Return: 0 on success, or a negative value on error.*/
|
||||
void od_ec_dec_init(od_ec_dec *dec, const unsigned char *buf,
|
||||
uint32_t storage) {
|
||||
dec->buf = buf;
|
||||
dec->eptr = buf + storage;
|
||||
dec->end_window = 0;
|
||||
dec->nend_bits = 0;
|
||||
dec->tell_offs = 10 - (OD_EC_WINDOW_SIZE - 8);
|
||||
dec->end = buf + storage;
|
||||
dec->bptr = buf;
|
||||
dec->dif = 0;
|
||||
dec->rng = 0x8000;
|
||||
dec->cnt = -15;
|
||||
dec->error = 0;
|
||||
od_ec_dec_refill(dec);
|
||||
}
|
||||
|
||||
/*Decode a bit that has an fz/ft probability of being a zero.
|
||||
fz: The probability that the bit is zero, scaled by _ft.
|
||||
ft: The total probability.
|
||||
This must be at least 16384 and no more than 32768.
|
||||
Return: The value decoded (0 or 1).*/
|
||||
int od_ec_decode_bool(od_ec_dec *dec, unsigned fz, unsigned ft) {
|
||||
od_ec_window dif;
|
||||
od_ec_window vw;
|
||||
unsigned r;
|
||||
int s;
|
||||
unsigned v;
|
||||
int ret;
|
||||
OD_ASSERT(0 < fz);
|
||||
OD_ASSERT(fz < ft);
|
||||
OD_ASSERT(16384 <= ft);
|
||||
OD_ASSERT(ft <= 32768U);
|
||||
dif = dec->dif;
|
||||
r = dec->rng;
|
||||
OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
|
||||
OD_ASSERT(ft <= r);
|
||||
s = r - ft >= ft;
|
||||
ft <<= s;
|
||||
fz <<= s;
|
||||
OD_ASSERT(r - ft < ft);
|
||||
#if OD_EC_REDUCED_OVERHEAD
|
||||
{
|
||||
unsigned d;
|
||||
unsigned e;
|
||||
d = r - ft;
|
||||
e = OD_SUBSATU(2 * d, ft);
|
||||
v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d);
|
||||
}
|
||||
#else
|
||||
v = fz + OD_MINI(fz, r - ft);
|
||||
#endif
|
||||
vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
|
||||
ret = dif >= vw;
|
||||
if (ret) dif -= vw;
|
||||
r = ret ? r - v : v;
|
||||
return od_ec_dec_normalize(dec, dif, r, ret);
|
||||
}
|
||||
|
||||
/*Decode a bit that has an fz probability of being a zero in Q15.
|
||||
This is a simpler, lower overhead version of od_ec_decode_bool() for use when
|
||||
ft == 32768.
|
||||
To be decoded properly by this function, symbols cannot have been encoded by
|
||||
od_ec_encode(), but must have been encoded with one of the equivalent _q15()
|
||||
or _dyadic() functions instead.
|
||||
fz: The probability that the bit is zero, scaled by 32768.
|
||||
Return: The value decoded (0 or 1).*/
|
||||
int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned fz) {
|
||||
od_ec_window dif;
|
||||
od_ec_window vw;
|
||||
unsigned r;
|
||||
unsigned r_new;
|
||||
unsigned v;
|
||||
int ret;
|
||||
OD_ASSERT(0 < fz);
|
||||
OD_ASSERT(fz < 32768U);
|
||||
dif = dec->dif;
|
||||
r = dec->rng;
|
||||
OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
|
||||
OD_ASSERT(32768U <= r);
|
||||
v = fz * (uint32_t)r >> 15;
|
||||
vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
|
||||
ret = 0;
|
||||
r_new = v;
|
||||
if (dif >= vw) {
|
||||
r_new = r - v;
|
||||
dif -= vw;
|
||||
ret = 1;
|
||||
}
|
||||
return od_ec_dec_normalize(dec, dif, r_new, ret);
|
||||
}
|
||||
|
||||
/*Decodes a symbol given a cumulative distribution function (CDF) table.
|
||||
cdf: The CDF, such that symbol s falls in the range
|
||||
[s > 0 ? cdf[s - 1] : 0, cdf[s]).
|
||||
The values must be monotonically non-increasing, and cdf[nsyms - 1]
|
||||
must be at least 16384, and no more than 32768.
|
||||
nsyms: The number of symbols in the alphabet.
|
||||
This should be at most 16.
|
||||
Return: The decoded symbol s.*/
|
||||
int od_ec_decode_cdf(od_ec_dec *dec, const uint16_t *cdf, int nsyms) {
|
||||
od_ec_window dif;
|
||||
unsigned r;
|
||||
unsigned c;
|
||||
unsigned d;
|
||||
#if OD_EC_REDUCED_OVERHEAD
|
||||
unsigned e;
|
||||
#endif
|
||||
int s;
|
||||
unsigned u;
|
||||
unsigned v;
|
||||
unsigned q;
|
||||
unsigned fl;
|
||||
unsigned fh;
|
||||
unsigned ft;
|
||||
int ret;
|
||||
dif = dec->dif;
|
||||
r = dec->rng;
|
||||
OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
|
||||
OD_ASSERT(nsyms > 0);
|
||||
ft = cdf[nsyms - 1];
|
||||
OD_ASSERT(16384 <= ft);
|
||||
OD_ASSERT(ft <= 32768U);
|
||||
OD_ASSERT(ft <= r);
|
||||
s = r - ft >= ft;
|
||||
ft <<= s;
|
||||
d = r - ft;
|
||||
OD_ASSERT(d < ft);
|
||||
c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
|
||||
q = OD_MAXI((int)(c >> 1), (int)(c - d));
|
||||
#if OD_EC_REDUCED_OVERHEAD
|
||||
e = OD_SUBSATU(2 * d, ft);
|
||||
/*The correctness of this inverse partition function is not obvious, but it
|
||||
was checked exhaustively for all possible values of r, ft, and c.
|
||||
TODO: It should be possible to optimize this better than the compiler,
|
||||
given that we do not care about the accuracy of negative results (as we
|
||||
will not use them).
|
||||
It would also be nice to get rid of the 32-bit dividend, as it requires a
|
||||
32x32->64 bit multiply to invert.*/
|
||||
q = OD_MAXI((int)q, (int)((2 * (int32_t)c + 1 - (int32_t)e) / 3));
|
||||
#endif
|
||||
q >>= s;
|
||||
OD_ASSERT(q<ft>> s);
|
||||
fl = 0;
|
||||
ret = 0;
|
||||
for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh;
|
||||
OD_ASSERT(fh <= ft >> s);
|
||||
fl <<= s;
|
||||
fh <<= s;
|
||||
#if OD_EC_REDUCED_OVERHEAD
|
||||
u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
|
||||
v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
|
||||
#else
|
||||
u = fl + OD_MINI(fl, d);
|
||||
v = fh + OD_MINI(fh, d);
|
||||
#endif
|
||||
r = v - u;
|
||||
dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
|
||||
return od_ec_dec_normalize(dec, dif, r, ret);
|
||||
}
|
||||
|
||||
/*Decodes a symbol given a cumulative distribution function (CDF) table.
|
||||
cdf: The CDF, such that symbol s falls in the range
|
||||
[s > 0 ? cdf[s - 1] : 0, cdf[s]).
|
||||
The values must be monotonically non-increasing, and cdf[nsyms - 1]
|
||||
must be at least 2, and no more than 32768.
|
||||
nsyms: The number of symbols in the alphabet.
|
||||
This should be at most 16.
|
||||
Return: The decoded symbol s.*/
|
||||
int od_ec_decode_cdf_unscaled(od_ec_dec *dec, const uint16_t *cdf, int nsyms) {
|
||||
od_ec_window dif;
|
||||
unsigned r;
|
||||
unsigned c;
|
||||
unsigned d;
|
||||
#if OD_EC_REDUCED_OVERHEAD
|
||||
unsigned e;
|
||||
#endif
|
||||
int s;
|
||||
unsigned u;
|
||||
unsigned v;
|
||||
unsigned q;
|
||||
unsigned fl;
|
||||
unsigned fh;
|
||||
unsigned ft;
|
||||
int ret;
|
||||
dif = dec->dif;
|
||||
r = dec->rng;
|
||||
OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
|
||||
OD_ASSERT(nsyms > 0);
|
||||
ft = cdf[nsyms - 1];
|
||||
OD_ASSERT(2 <= ft);
|
||||
OD_ASSERT(ft <= 32768U);
|
||||
s = 15 - OD_ILOG_NZ(ft - 1);
|
||||
ft <<= s;
|
||||
OD_ASSERT(ft <= r);
|
||||
if (r - ft >= ft) {
|
||||
ft <<= 1;
|
||||
s++;
|
||||
}
|
||||
d = r - ft;
|
||||
OD_ASSERT(d < ft);
|
||||
c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
|
||||
q = OD_MAXI((int)(c >> 1), (int)(c - d));
|
||||
#if OD_EC_REDUCED_OVERHEAD
|
||||
e = OD_SUBSATU(2 * d, ft);
|
||||
/*TODO: See TODO above.*/
|
||||
q = OD_MAXI((int)q, (int)((2 * (int32_t)c + 1 - (int32_t)e) / 3));
|
||||
#endif
|
||||
q >>= s;
|
||||
OD_ASSERT(q<ft>> s);
|
||||
fl = 0;
|
||||
ret = 0;
|
||||
for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh;
|
||||
OD_ASSERT(fh <= ft >> s);
|
||||
fl <<= s;
|
||||
fh <<= s;
|
||||
#if OD_EC_REDUCED_OVERHEAD
|
||||
u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
|
||||
v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
|
||||
#else
|
||||
u = fl + OD_MINI(fl, d);
|
||||
v = fh + OD_MINI(fh, d);
|
||||
#endif
|
||||
r = v - u;
|
||||
dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
|
||||
return od_ec_dec_normalize(dec, dif, r, ret);
|
||||
}
|
||||
|
||||
/*Decodes a symbol given a cumulative distribution function (CDF) table that
|
||||
sums to a power of two.
|
||||
This is a simpler, lower overhead version of od_ec_decode_cdf() for use when
|
||||
cdf[nsyms - 1] is a power of two.
|
||||
To be decoded properly by this function, symbols cannot have been encoded by
|
||||
od_ec_encode(), but must have been encoded with one of the equivalent _q15()
|
||||
functions instead.
|
||||
cdf: The CDF, such that symbol s falls in the range
|
||||
[s > 0 ? cdf[s - 1] : 0, cdf[s]).
|
||||
The values must be monotonically non-increasing, and cdf[nsyms - 1]
|
||||
must be exactly 1 << ftb.
|
||||
nsyms: The number of symbols in the alphabet.
|
||||
This should be at most 16.
|
||||
ftb: The number of bits of precision in the cumulative distribution.
|
||||
This must be no more than 15.
|
||||
Return: The decoded symbol s.*/
|
||||
int od_ec_decode_cdf_unscaled_dyadic(od_ec_dec *dec, const uint16_t *cdf,
|
||||
int nsyms, unsigned ftb) {
|
||||
od_ec_window dif;
|
||||
unsigned r;
|
||||
unsigned c;
|
||||
unsigned u;
|
||||
unsigned v;
|
||||
int ret;
|
||||
(void)nsyms;
|
||||
dif = dec->dif;
|
||||
r = dec->rng;
|
||||
OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
|
||||
OD_ASSERT(ftb <= 15);
|
||||
OD_ASSERT(cdf[nsyms - 1] == 1U << ftb);
|
||||
OD_ASSERT(32768U <= r);
|
||||
c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
|
||||
v = 0;
|
||||
ret = -1;
|
||||
do {
|
||||
u = v;
|
||||
v = cdf[++ret] * (uint32_t)r >> ftb;
|
||||
} while (v <= c);
|
||||
OD_ASSERT(v <= r);
|
||||
r = v - u;
|
||||
dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
|
||||
return od_ec_dec_normalize(dec, dif, r, ret);
|
||||
}
|
||||
|
||||
/*Decodes a symbol given a cumulative distribution function (CDF) table in Q15.
|
||||
This is a simpler, lower overhead version of od_ec_decode_cdf() for use when
|
||||
cdf[nsyms - 1] == 32768.
|
||||
To be decoded properly by this function, symbols cannot have been encoded by
|
||||
od_ec_encode(), but must have been encoded with one of the equivalent _q15()
|
||||
or dyadic() functions instead.
|
||||
cdf: The CDF, such that symbol s falls in the range
|
||||
[s > 0 ? cdf[s - 1] : 0, cdf[s]).
|
||||
The values must be monotonically non-increasing, and cdf[nsyms - 1]
|
||||
must be 32768.
|
||||
nsyms: The number of symbols in the alphabet.
|
||||
This should be at most 16.
|
||||
Return: The decoded symbol s.*/
|
||||
int od_ec_decode_cdf_q15(od_ec_dec *dec, const uint16_t *cdf, int nsyms) {
|
||||
return od_ec_decode_cdf_unscaled_dyadic(dec, cdf, nsyms, 15);
|
||||
}
|
||||
|
||||
/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
|
||||
The integer must have been encoded with od_ec_enc_uint().
|
||||
ft: The number of integers that can be decoded (one more than the max).
|
||||
This must be at least 2, and no more than 2**29.
|
||||
Return: The decoded bits.*/
|
||||
uint32_t od_ec_dec_uint(od_ec_dec *dec, uint32_t ft) {
|
||||
OD_ASSERT(ft >= 2);
|
||||
OD_ASSERT(ft <= (uint32_t)1 << (25 + OD_EC_UINT_BITS));
|
||||
if (ft > 1U << OD_EC_UINT_BITS) {
|
||||
uint32_t t;
|
||||
int ft1;
|
||||
int ftb;
|
||||
ft--;
|
||||
ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS;
|
||||
ft1 = (int)(ft >> ftb) + 1;
|
||||
t = od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft1), ft1);
|
||||
t = t << ftb | od_ec_dec_bits(dec, ftb, "");
|
||||
if (t <= ft) return t;
|
||||
dec->error = 1;
|
||||
return ft;
|
||||
}
|
||||
return od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft), (int)ft);
|
||||
}
|
||||
|
||||
/*Extracts a sequence of raw bits from the stream.
|
||||
The bits must have been encoded with od_ec_enc_bits().
|
||||
ftb: The number of bits to extract.
|
||||
This must be between 0 and 25, inclusive.
|
||||
Return: The decoded bits.*/
|
||||
uint32_t od_ec_dec_bits_(od_ec_dec *dec, unsigned ftb) {
|
||||
od_ec_window window;
|
||||
int available;
|
||||
uint32_t ret;
|
||||
OD_ASSERT(ftb <= 25);
|
||||
window = dec->end_window;
|
||||
available = dec->nend_bits;
|
||||
if ((unsigned)available < ftb) {
|
||||
const unsigned char *buf;
|
||||
const unsigned char *eptr;
|
||||
buf = dec->buf;
|
||||
eptr = dec->eptr;
|
||||
OD_ASSERT(available <= OD_EC_WINDOW_SIZE - 8);
|
||||
do {
|
||||
if (eptr <= buf) {
|
||||
dec->tell_offs += OD_EC_LOTS_OF_BITS - available;
|
||||
available = OD_EC_LOTS_OF_BITS;
|
||||
break;
|
||||
}
|
||||
window |= (od_ec_window) * --eptr << available;
|
||||
available += 8;
|
||||
} while (available <= OD_EC_WINDOW_SIZE - 8);
|
||||
dec->eptr = eptr;
|
||||
}
|
||||
ret = (uint32_t)window & (((uint32_t)1 << ftb) - 1);
|
||||
window >>= ftb;
|
||||
available -= ftb;
|
||||
dec->end_window = window;
|
||||
dec->nend_bits = available;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*Returns the number of bits "used" by the decoded symbols so far.
|
||||
This same number can be computed in either the encoder or the decoder, and is
|
||||
suitable for making coding decisions.
|
||||
Return: The number of bits.
|
||||
This will always be slightly larger than the exact value (e.g., all
|
||||
rounding error is in the positive direction).*/
|
||||
int od_ec_dec_tell(const od_ec_dec *dec) {
|
||||
return ((dec->end - dec->eptr) + (dec->bptr - dec->buf)) * 8 - dec->cnt -
|
||||
dec->nend_bits + dec->tell_offs;
|
||||
}
|
||||
|
||||
/*Returns the number of bits "used" by the decoded symbols so far.
|
||||
This same number can be computed in either the encoder or the decoder, and is
|
||||
suitable for making coding decisions.
|
||||
Return: The number of bits scaled by 2**OD_BITRES.
|
||||
This will always be slightly larger than the exact value (e.g., all
|
||||
rounding error is in the positive direction).*/
|
||||
uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec) {
|
||||
return od_ec_tell_frac(od_ec_dec_tell(dec), dec->rng);
|
||||
}
|
||||
101
aom_dsp/entdec.h
101
aom_dsp/entdec.h
@@ -1,101 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#if !defined(_entdec_H)
|
||||
#define _entdec_H (1)
|
||||
#include <limits.h>
|
||||
#include "aom_dsp/entcode.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct od_ec_dec od_ec_dec;
|
||||
|
||||
#if OD_ACCOUNTING
|
||||
#define OD_ACC_STR , char *acc_str
|
||||
#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb, str)
|
||||
#else
|
||||
#define OD_ACC_STR
|
||||
#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb)
|
||||
#endif
|
||||
|
||||
/*The entropy decoder context.*/
|
||||
struct od_ec_dec {
|
||||
/*The start of the current input buffer.*/
|
||||
const unsigned char *buf;
|
||||
/*The read pointer for the raw bits.*/
|
||||
const unsigned char *eptr;
|
||||
/*Bits that will be read from/written at the end.*/
|
||||
od_ec_window end_window;
|
||||
/*Number of valid bits in end_window.*/
|
||||
int nend_bits;
|
||||
/*An offset used to keep track of tell after reaching the end of the stream.
|
||||
This is constant throughout most of the decoding process, but becomes
|
||||
important once we hit the end of the buffer and stop incrementing pointers
|
||||
(and instead pretend cnt/nend_bits have lots of bits).*/
|
||||
int32_t tell_offs;
|
||||
/*The end of the current input buffer.*/
|
||||
const unsigned char *end;
|
||||
/*The read pointer for the entropy-coded bits.*/
|
||||
const unsigned char *bptr;
|
||||
/*The difference between the coded value and the low end of the current
|
||||
range.*/
|
||||
od_ec_window dif;
|
||||
/*The number of values in the current range.*/
|
||||
uint16_t rng;
|
||||
/*The number of bits of data in the current value.*/
|
||||
int16_t cnt;
|
||||
/*Nonzero if an error occurred.*/
|
||||
int error;
|
||||
};
|
||||
|
||||
/*See entdec.c for further documentation.*/
|
||||
|
||||
void od_ec_dec_init(od_ec_dec *dec, const unsigned char *buf, uint32_t storage)
|
||||
OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
|
||||
|
||||
OD_WARN_UNUSED_RESULT int od_ec_decode_bool(od_ec_dec *dec, unsigned fz,
|
||||
unsigned ft) OD_ARG_NONNULL(1);
|
||||
OD_WARN_UNUSED_RESULT int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned fz)
|
||||
OD_ARG_NONNULL(1);
|
||||
OD_WARN_UNUSED_RESULT int od_ec_decode_cdf(od_ec_dec *dec, const uint16_t *cdf,
|
||||
int nsyms) OD_ARG_NONNULL(1)
|
||||
OD_ARG_NONNULL(2);
|
||||
OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_q15(od_ec_dec *dec,
|
||||
const uint16_t *cdf, int nsyms)
|
||||
OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
|
||||
OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_unscaled(od_ec_dec *dec,
|
||||
const uint16_t *cdf,
|
||||
int nsyms) OD_ARG_NONNULL(1)
|
||||
OD_ARG_NONNULL(2);
|
||||
OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_unscaled_dyadic(od_ec_dec *dec,
|
||||
const uint16_t *cdf,
|
||||
int nsyms,
|
||||
unsigned _ftb)
|
||||
OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
|
||||
|
||||
OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_uint(od_ec_dec *dec, uint32_t ft)
|
||||
OD_ARG_NONNULL(1);
|
||||
|
||||
OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_bits_(od_ec_dec *dec, unsigned ftb)
|
||||
OD_ARG_NONNULL(1);
|
||||
|
||||
OD_WARN_UNUSED_RESULT int od_ec_dec_tell(const od_ec_dec *dec)
|
||||
OD_ARG_NONNULL(1);
|
||||
OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec)
|
||||
OD_ARG_NONNULL(1);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
686
aom_dsp/entenc.c
686
aom_dsp/entenc.c
@@ -1,686 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "./config.h"
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "aom_dsp/entenc.h"
|
||||
|
||||
/*A range encoder.
|
||||
See entdec.c and the references for implementation details \cite{Mar79,MNW98}.
|
||||
|
||||
@INPROCEEDINGS{Mar79,
|
||||
author="Martin, G.N.N.",
|
||||
title="Range encoding: an algorithm for removing redundancy from a digitised
|
||||
message",
|
||||
booktitle="Video \& Data Recording Conference",
|
||||
year=1979,
|
||||
address="Southampton",
|
||||
month=Jul,
|
||||
URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz"
|
||||
}
|
||||
@ARTICLE{MNW98,
|
||||
author="Alistair Moffat and Radford Neal and Ian H. Witten",
|
||||
title="Arithmetic Coding Revisited",
|
||||
journal="{ACM} Transactions on Information Systems",
|
||||
year=1998,
|
||||
volume=16,
|
||||
number=3,
|
||||
pages="256--294",
|
||||
month=Jul,
|
||||
URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf"
|
||||
}*/
|
||||
|
||||
/*Takes updated low and range values, renormalizes them so that
|
||||
32768 <= rng < 65536 (flushing bytes from low to the pre-carry buffer if
|
||||
necessary), and stores them back in the encoder context.
|
||||
low: The new value of low.
|
||||
rng: The new value of the range.*/
|
||||
static void od_ec_enc_normalize(od_ec_enc *enc, od_ec_window low,
|
||||
unsigned rng) {
|
||||
int d;
|
||||
int c;
|
||||
int s;
|
||||
c = enc->cnt;
|
||||
OD_ASSERT(rng <= 65535U);
|
||||
d = 16 - OD_ILOG_NZ(rng);
|
||||
s = c + d;
|
||||
/*TODO: Right now we flush every time we have at least one byte available.
|
||||
Instead we should use an od_ec_window and flush right before we're about to
|
||||
shift bits off the end of the window.
|
||||
For a 32-bit window this is about the same amount of work, but for a 64-bit
|
||||
window it should be a fair win.*/
|
||||
if (s >= 0) {
|
||||
uint16_t *buf;
|
||||
uint32_t storage;
|
||||
uint32_t offs;
|
||||
unsigned m;
|
||||
buf = enc->precarry_buf;
|
||||
storage = enc->precarry_storage;
|
||||
offs = enc->offs;
|
||||
if (offs + 2 > storage) {
|
||||
storage = 2 * storage + 2;
|
||||
buf = (uint16_t *)realloc(buf, sizeof(*buf) * storage);
|
||||
if (buf == NULL) {
|
||||
enc->error = -1;
|
||||
enc->offs = 0;
|
||||
return;
|
||||
}
|
||||
enc->precarry_buf = buf;
|
||||
enc->precarry_storage = storage;
|
||||
}
|
||||
c += 16;
|
||||
m = (1 << c) - 1;
|
||||
if (s >= 8) {
|
||||
OD_ASSERT(offs < storage);
|
||||
buf[offs++] = (uint16_t)(low >> c);
|
||||
low &= m;
|
||||
c -= 8;
|
||||
m >>= 8;
|
||||
}
|
||||
OD_ASSERT(offs < storage);
|
||||
buf[offs++] = (uint16_t)(low >> c);
|
||||
s = c + d - 24;
|
||||
low &= m;
|
||||
enc->offs = offs;
|
||||
}
|
||||
enc->low = low << d;
|
||||
enc->rng = rng << d;
|
||||
enc->cnt = s;
|
||||
}
|
||||
|
||||
/*Initializes the encoder.
|
||||
size: The initial size of the buffer, in bytes.*/
|
||||
void od_ec_enc_init(od_ec_enc *enc, uint32_t size) {
|
||||
od_ec_enc_reset(enc);
|
||||
enc->buf = (unsigned char *)malloc(sizeof(*enc->buf) * size);
|
||||
enc->storage = size;
|
||||
if (size > 0 && enc->buf == NULL) {
|
||||
enc->storage = 0;
|
||||
enc->error = -1;
|
||||
}
|
||||
enc->precarry_buf = (uint16_t *)malloc(sizeof(*enc->precarry_buf) * size);
|
||||
enc->precarry_storage = size;
|
||||
if (size > 0 && enc->precarry_buf == NULL) {
|
||||
enc->precarry_storage = 0;
|
||||
enc->error = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/*Reinitializes the encoder.*/
|
||||
void od_ec_enc_reset(od_ec_enc *enc) {
|
||||
enc->end_offs = 0;
|
||||
enc->end_window = 0;
|
||||
enc->nend_bits = 0;
|
||||
enc->offs = 0;
|
||||
enc->low = 0;
|
||||
enc->rng = 0x8000;
|
||||
/*This is initialized to -9 so that it crosses zero after we've accumulated
|
||||
one byte + one carry bit.*/
|
||||
enc->cnt = -9;
|
||||
enc->error = 0;
|
||||
#if OD_MEASURE_EC_OVERHEAD
|
||||
enc->entropy = 0;
|
||||
enc->nb_symbols = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*Frees the buffers used by the encoder.*/
|
||||
void od_ec_enc_clear(od_ec_enc *enc) {
|
||||
free(enc->precarry_buf);
|
||||
free(enc->buf);
|
||||
}
|
||||
|
||||
/*Encodes a symbol given its scaled frequency information.
|
||||
The frequency information must be discernable by the decoder, assuming it
|
||||
has read only the previous symbols from the stream.
|
||||
You can change the frequency information, or even the entire source alphabet,
|
||||
so long as the decoder can tell from the context of the previously encoded
|
||||
information that it is supposed to do so as well.
|
||||
fl: The cumulative frequency of all symbols that come before the one to be
|
||||
encoded.
|
||||
fh: The cumulative frequency of all symbols up to and including the one to
|
||||
be encoded.
|
||||
Together with fl, this defines the range [fl, fh) in which the decoded
|
||||
value will fall.
|
||||
ft: The sum of the frequencies of all the symbols.
|
||||
This must be at least 16384, and no more than 32768.*/
|
||||
static void od_ec_encode(od_ec_enc *enc, unsigned fl, unsigned fh,
|
||||
unsigned ft) {
|
||||
od_ec_window l;
|
||||
unsigned r;
|
||||
int s;
|
||||
unsigned d;
|
||||
unsigned u;
|
||||
unsigned v;
|
||||
OD_ASSERT(fl < fh);
|
||||
OD_ASSERT(fh <= ft);
|
||||
OD_ASSERT(16384 <= ft);
|
||||
OD_ASSERT(ft <= 32768U);
|
||||
l = enc->low;
|
||||
r = enc->rng;
|
||||
OD_ASSERT(ft <= r);
|
||||
s = r - ft >= ft;
|
||||
ft <<= s;
|
||||
fl <<= s;
|
||||
fh <<= s;
|
||||
d = r - ft;
|
||||
OD_ASSERT(d < ft);
|
||||
#if OD_EC_REDUCED_OVERHEAD
|
||||
{
|
||||
unsigned e;
|
||||
e = OD_SUBSATU(2 * d, ft);
|
||||
u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
|
||||
v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
|
||||
}
|
||||
#else
|
||||
u = fl + OD_MINI(fl, d);
|
||||
v = fh + OD_MINI(fh, d);
|
||||
#endif
|
||||
r = v - u;
|
||||
l += u;
|
||||
od_ec_enc_normalize(enc, l, r);
|
||||
#if OD_MEASURE_EC_OVERHEAD
|
||||
enc->entropy -= OD_LOG2((double)(fh - fl) / ft);
|
||||
enc->nb_symbols++;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*Encodes a symbol given its frequency in Q15.
|
||||
This is like od_ec_encode() when ft == 32768, but is simpler and has lower
|
||||
overhead.
|
||||
Symbols encoded with this function cannot be properly decoded with
|
||||
od_ec_decode(), and must be decoded with one of the equivalent _q15()
|
||||
functions instead.
|
||||
fl: The cumulative frequency of all symbols that come before the one to be
|
||||
encoded.
|
||||
fh: The cumulative frequency of all symbols up to and including the one to
|
||||
be encoded.*/
|
||||
static void od_ec_encode_q15(od_ec_enc *enc, unsigned fl, unsigned fh) {
|
||||
od_ec_window l;
|
||||
unsigned r;
|
||||
unsigned u;
|
||||
unsigned v;
|
||||
OD_ASSERT(fl < fh);
|
||||
OD_ASSERT(fh <= 32768U);
|
||||
l = enc->low;
|
||||
r = enc->rng;
|
||||
OD_ASSERT(32768U <= r);
|
||||
u = fl * (uint32_t)r >> 15;
|
||||
v = fh * (uint32_t)r >> 15;
|
||||
r = v - u;
|
||||
l += u;
|
||||
od_ec_enc_normalize(enc, l, r);
|
||||
#if OD_MEASURE_EC_OVERHEAD
|
||||
enc->entropy -= OD_LOG2((double)(fh - fl) / 32768.);
|
||||
enc->nb_symbols++;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*Encodes a symbol given its frequency information with an arbitrary scale.
|
||||
This operates just like od_ec_encode(), but does not require that ft be at
|
||||
least 16384.
|
||||
fl: The cumulative frequency of all symbols that come before the one to be
|
||||
encoded.
|
||||
fh: The cumulative frequency of all symbols up to and including the one to
|
||||
be encoded.
|
||||
ft: The sum of the frequencies of all the symbols.
|
||||
This must be at least 2 and no more than 32768.*/
|
||||
static void od_ec_encode_unscaled(od_ec_enc *enc, unsigned fl, unsigned fh,
|
||||
unsigned ft) {
|
||||
int s;
|
||||
OD_ASSERT(fl < fh);
|
||||
OD_ASSERT(fh <= ft);
|
||||
OD_ASSERT(2 <= ft);
|
||||
OD_ASSERT(ft <= 32768U);
|
||||
s = 15 - OD_ILOG_NZ(ft - 1);
|
||||
od_ec_encode(enc, fl << s, fh << s, ft << s);
|
||||
}
|
||||
|
||||
/*Encode a bit that has an fz/ft probability of being a zero.
|
||||
val: The value to encode (0 or 1).
|
||||
fz: The probability that val is zero, scaled by ft.
|
||||
ft: The total probability.
|
||||
This must be at least 16384 and no more than 32768.*/
|
||||
void od_ec_encode_bool(od_ec_enc *enc, int val, unsigned fz, unsigned ft) {
|
||||
od_ec_window l;
|
||||
unsigned r;
|
||||
int s;
|
||||
unsigned v;
|
||||
OD_ASSERT(0 < fz);
|
||||
OD_ASSERT(fz < ft);
|
||||
OD_ASSERT(16384 <= ft);
|
||||
OD_ASSERT(ft <= 32768U);
|
||||
l = enc->low;
|
||||
r = enc->rng;
|
||||
OD_ASSERT(ft <= r);
|
||||
s = r - ft >= ft;
|
||||
ft <<= s;
|
||||
fz <<= s;
|
||||
OD_ASSERT(r - ft < ft);
|
||||
#if OD_EC_REDUCED_OVERHEAD
|
||||
{
|
||||
unsigned d;
|
||||
unsigned e;
|
||||
d = r - ft;
|
||||
e = OD_SUBSATU(2 * d, ft);
|
||||
v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d);
|
||||
}
|
||||
#else
|
||||
v = fz + OD_MINI(fz, r - ft);
|
||||
#endif
|
||||
if (val) l += v;
|
||||
r = val ? r - v : v;
|
||||
od_ec_enc_normalize(enc, l, r);
|
||||
#if OD_MEASURE_EC_OVERHEAD
|
||||
enc->entropy -= OD_LOG2((double)(val ? ft - fz : fz) / ft);
|
||||
enc->nb_symbols++;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*Encode a bit that has an fz probability of being a zero in Q15.
|
||||
This is a simpler, lower overhead version of od_ec_encode_bool() for use when
|
||||
ft == 32768.
|
||||
Symbols encoded with this function cannot be properly decoded with
|
||||
od_ec_decode(), and must be decoded with one of the equivalent _q15()
|
||||
functions instead.
|
||||
val: The value to encode (0 or 1).
|
||||
fz: The probability that val is zero, scaled by 32768.*/
|
||||
void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned fz) {
|
||||
od_ec_window l;
|
||||
unsigned r;
|
||||
unsigned v;
|
||||
OD_ASSERT(0 < fz);
|
||||
OD_ASSERT(fz < 32768U);
|
||||
l = enc->low;
|
||||
r = enc->rng;
|
||||
OD_ASSERT(32768U <= r);
|
||||
v = fz * (uint32_t)r >> 15;
|
||||
if (val) l += v;
|
||||
r = val ? r - v : v;
|
||||
od_ec_enc_normalize(enc, l, r);
|
||||
#if OD_MEASURE_EC_OVERHEAD
|
||||
enc->entropy -= OD_LOG2((double)(val ? 32768 - fz : fz) / 32768.);
|
||||
enc->nb_symbols++;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*Encodes a symbol given a cumulative distribution function (CDF) table.
|
||||
s: The index of the symbol to encode.
|
||||
cdf: The CDF, such that symbol s falls in the range
|
||||
[s > 0 ? cdf[s - 1] : 0, cdf[s]).
|
||||
The values must be monotonically non-decreasing, and the last value
|
||||
must be at least 16384, and no more than 32768.
|
||||
nsyms: The number of symbols in the alphabet.
|
||||
This should be at most 16.*/
|
||||
void od_ec_encode_cdf(od_ec_enc *enc, int s, const uint16_t *cdf, int nsyms) {
|
||||
OD_ASSERT(s >= 0);
|
||||
OD_ASSERT(s < nsyms);
|
||||
od_ec_encode(enc, s > 0 ? cdf[s - 1] : 0, cdf[s], cdf[nsyms - 1]);
|
||||
}
|
||||
|
||||
/*Encodes a symbol given a cumulative distribution function (CDF) table in Q15.
|
||||
This is a simpler, lower overhead version of od_ec_encode_cdf() for use when
|
||||
cdf[nsyms - 1] == 32768.
|
||||
Symbols encoded with this function cannot be properly decoded with
|
||||
od_ec_decode(), and must be decoded with one of the equivalent _q15()
|
||||
functions instead.
|
||||
s: The index of the symbol to encode.
|
||||
cdf: The CDF, such that symbol s falls in the range
|
||||
[s > 0 ? cdf[s - 1] : 0, cdf[s]).
|
||||
The values must be monotonically non-decreasing, and the last value
|
||||
must be exactly 32768.
|
||||
nsyms: The number of symbols in the alphabet.
|
||||
This should be at most 16.*/
|
||||
void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, const uint16_t *cdf,
|
||||
int nsyms) {
|
||||
(void)nsyms;
|
||||
OD_ASSERT(s >= 0);
|
||||
OD_ASSERT(s < nsyms);
|
||||
OD_ASSERT(cdf[nsyms - 1] == 32768U);
|
||||
od_ec_encode_q15(enc, s > 0 ? cdf[s - 1] : 0, cdf[s]);
|
||||
}
|
||||
|
||||
/*Encodes a symbol given a cumulative distribution function (CDF) table.
|
||||
s: The index of the symbol to encode.
|
||||
cdf: The CDF, such that symbol s falls in the range
|
||||
[s > 0 ? cdf[s - 1] : 0, cdf[s]).
|
||||
The values must be monotonically non-decreasing, and the last value
|
||||
must be at least 2, and no more than 32768.
|
||||
nsyms: The number of symbols in the alphabet.
|
||||
This should be at most 16.*/
|
||||
void od_ec_encode_cdf_unscaled(od_ec_enc *enc, int s, const uint16_t *cdf,
|
||||
int nsyms) {
|
||||
OD_ASSERT(s >= 0);
|
||||
OD_ASSERT(s < nsyms);
|
||||
od_ec_encode_unscaled(enc, s > 0 ? cdf[s - 1] : 0, cdf[s], cdf[nsyms - 1]);
|
||||
}
|
||||
|
||||
/*Equivalent to od_ec_encode_cdf_q15() with the cdf scaled by
|
||||
(1 << (15 - ftb)).
|
||||
s: The index of the symbol to encode.
|
||||
cdf: The CDF, such that symbol s falls in the range
|
||||
[s > 0 ? cdf[s - 1] : 0, cdf[s]).
|
||||
The values must be monotonically non-decreasing, and the last value
|
||||
must be exactly 1 << ftb.
|
||||
nsyms: The number of symbols in the alphabet.
|
||||
This should be at most 16.
|
||||
ftb: The number of bits of precision in the cumulative distribution.
|
||||
This must be no more than 15.*/
|
||||
void od_ec_encode_cdf_unscaled_dyadic(od_ec_enc *enc, int s,
|
||||
const uint16_t *cdf, int nsyms,
|
||||
unsigned ftb) {
|
||||
(void)nsyms;
|
||||
OD_ASSERT(s >= 0);
|
||||
OD_ASSERT(s < nsyms);
|
||||
OD_ASSERT(ftb <= 15);
|
||||
OD_ASSERT(cdf[nsyms - 1] == 1U << ftb);
|
||||
od_ec_encode_q15(enc, s > 0 ? cdf[s - 1] << (15 - ftb) : 0,
|
||||
cdf[s] << (15 - ftb));
|
||||
}
|
||||
|
||||
/*Encodes a raw unsigned integer in the stream.
|
||||
fl: The integer to encode.
|
||||
ft: The number of integers that can be encoded (one more than the max).
|
||||
This must be at least 2, and no more than 2**29.*/
|
||||
void od_ec_enc_uint(od_ec_enc *enc, uint32_t fl, uint32_t ft) {
|
||||
OD_ASSERT(ft >= 2);
|
||||
OD_ASSERT(fl < ft);
|
||||
OD_ASSERT(ft <= (uint32_t)1 << (25 + OD_EC_UINT_BITS));
|
||||
if (ft > 1U << OD_EC_UINT_BITS) {
|
||||
int ft1;
|
||||
int ftb;
|
||||
ft--;
|
||||
ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS;
|
||||
ft1 = (int)(ft >> ftb) + 1;
|
||||
od_ec_encode_cdf_q15(enc, (int)(fl >> ftb), OD_UNIFORM_CDF_Q15(ft1), ft1);
|
||||
od_ec_enc_bits(enc, fl & (((uint32_t)1 << ftb) - 1), ftb);
|
||||
} else {
|
||||
od_ec_encode_cdf_q15(enc, (int)fl, OD_UNIFORM_CDF_Q15(ft), (int)ft);
|
||||
}
|
||||
}
|
||||
|
||||
/*Encodes a sequence of raw bits in the stream.
|
||||
fl: The bits to encode.
|
||||
ftb: The number of bits to encode.
|
||||
This must be between 0 and 25, inclusive.*/
|
||||
void od_ec_enc_bits(od_ec_enc *enc, uint32_t fl, unsigned ftb) {
|
||||
od_ec_window end_window;
|
||||
int nend_bits;
|
||||
OD_ASSERT(ftb <= 25);
|
||||
OD_ASSERT(fl < (uint32_t)1 << ftb);
|
||||
#if OD_MEASURE_EC_OVERHEAD
|
||||
enc->entropy += ftb;
|
||||
#endif
|
||||
end_window = enc->end_window;
|
||||
nend_bits = enc->nend_bits;
|
||||
if (nend_bits + ftb > OD_EC_WINDOW_SIZE) {
|
||||
unsigned char *buf;
|
||||
uint32_t storage;
|
||||
uint32_t end_offs;
|
||||
buf = enc->buf;
|
||||
storage = enc->storage;
|
||||
end_offs = enc->end_offs;
|
||||
if (end_offs + (OD_EC_WINDOW_SIZE >> 3) >= storage) {
|
||||
unsigned char *new_buf;
|
||||
uint32_t new_storage;
|
||||
new_storage = 2 * storage + (OD_EC_WINDOW_SIZE >> 3);
|
||||
new_buf = (unsigned char *)malloc(sizeof(*new_buf) * new_storage);
|
||||
if (new_buf == NULL) {
|
||||
enc->error = -1;
|
||||
enc->end_offs = 0;
|
||||
return;
|
||||
}
|
||||
OD_COPY(new_buf + new_storage - end_offs, buf + storage - end_offs,
|
||||
end_offs);
|
||||
storage = new_storage;
|
||||
free(buf);
|
||||
enc->buf = buf = new_buf;
|
||||
enc->storage = storage;
|
||||
}
|
||||
do {
|
||||
OD_ASSERT(end_offs < storage);
|
||||
buf[storage - ++end_offs] = (unsigned char)end_window;
|
||||
end_window >>= 8;
|
||||
nend_bits -= 8;
|
||||
} while (nend_bits >= 8);
|
||||
enc->end_offs = end_offs;
|
||||
}
|
||||
OD_ASSERT(nend_bits + ftb <= OD_EC_WINDOW_SIZE);
|
||||
end_window |= (od_ec_window)fl << nend_bits;
|
||||
nend_bits += ftb;
|
||||
enc->end_window = end_window;
|
||||
enc->nend_bits = nend_bits;
|
||||
}
|
||||
|
||||
/*Overwrites a few bits at the very start of an existing stream, after they
|
||||
have already been encoded.
|
||||
This makes it possible to have a few flags up front, where it is easy for
|
||||
decoders to access them without parsing the whole stream, even if their
|
||||
values are not determined until late in the encoding process, without having
|
||||
to buffer all the intermediate symbols in the encoder.
|
||||
In order for this to work, at least nbits bits must have already been encoded
|
||||
using probabilities that are an exact power of two.
|
||||
The encoder can verify the number of encoded bits is sufficient, but cannot
|
||||
check this latter condition.
|
||||
val: The bits to encode (in the least nbits significant bits).
|
||||
They will be decoded in order from most-significant to least.
|
||||
nbits: The number of bits to overwrite.
|
||||
This must be no more than 8.*/
|
||||
void od_ec_enc_patch_initial_bits(od_ec_enc *enc, unsigned val, int nbits) {
|
||||
int shift;
|
||||
unsigned mask;
|
||||
OD_ASSERT(nbits >= 0);
|
||||
OD_ASSERT(nbits <= 8);
|
||||
OD_ASSERT(val < 1U << nbits);
|
||||
shift = 8 - nbits;
|
||||
mask = ((1U << nbits) - 1) << shift;
|
||||
if (enc->offs > 0) {
|
||||
/*The first byte has been finalized.*/
|
||||
enc->precarry_buf[0] =
|
||||
(uint16_t)((enc->precarry_buf[0] & ~mask) | val << shift);
|
||||
} else if (9 + enc->cnt + (enc->rng == 0x8000) > nbits) {
|
||||
/*The first byte has yet to be output.*/
|
||||
enc->low = (enc->low & ~((od_ec_window)mask << (16 + enc->cnt))) |
|
||||
(od_ec_window)val << (16 + enc->cnt + shift);
|
||||
} else {
|
||||
/*The encoder hasn't even encoded _nbits of data yet.*/
|
||||
enc->error = -1;
|
||||
}
|
||||
}
|
||||
|
||||
#if OD_MEASURE_EC_OVERHEAD
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
/*Indicates that there are no more symbols to encode.
|
||||
All remaining output bytes are flushed to the output buffer.
|
||||
od_ec_enc_reset() should be called before using the encoder again.
|
||||
bytes: Returns the size of the encoded data in the returned buffer.
|
||||
Return: A pointer to the start of the final buffer, or NULL if there was an
|
||||
encoding error.*/
|
||||
unsigned char *od_ec_enc_done(od_ec_enc *enc, uint32_t *nbytes) {
|
||||
unsigned char *out;
|
||||
uint32_t storage;
|
||||
uint16_t *buf;
|
||||
uint32_t offs;
|
||||
uint32_t end_offs;
|
||||
int nend_bits;
|
||||
od_ec_window m;
|
||||
od_ec_window e;
|
||||
od_ec_window l;
|
||||
unsigned r;
|
||||
int c;
|
||||
int s;
|
||||
if (enc->error) return NULL;
|
||||
#if OD_MEASURE_EC_OVERHEAD
|
||||
{
|
||||
uint32_t tell;
|
||||
/* Don't count the 1 bit we lose to raw bits as overhead. */
|
||||
tell = od_ec_enc_tell(enc) - 1;
|
||||
fprintf(stderr, "overhead: %f%%\n",
|
||||
100 * (tell - enc->entropy) / enc->entropy);
|
||||
fprintf(stderr, "efficiency: %f bits/symbol\n",
|
||||
(double)tell / enc->nb_symbols);
|
||||
}
|
||||
#endif
|
||||
/*We output the minimum number of bits that ensures that the symbols encoded
|
||||
thus far will be decoded correctly regardless of the bits that follow.*/
|
||||
l = enc->low;
|
||||
r = enc->rng;
|
||||
c = enc->cnt;
|
||||
s = 9;
|
||||
m = 0x7FFF;
|
||||
e = (l + m) & ~m;
|
||||
while ((e | m) >= l + r) {
|
||||
s++;
|
||||
m >>= 1;
|
||||
e = (l + m) & ~m;
|
||||
}
|
||||
s += c;
|
||||
offs = enc->offs;
|
||||
buf = enc->precarry_buf;
|
||||
if (s > 0) {
|
||||
unsigned n;
|
||||
storage = enc->precarry_storage;
|
||||
if (offs + ((s + 7) >> 3) > storage) {
|
||||
storage = storage * 2 + ((s + 7) >> 3);
|
||||
buf = (uint16_t *)realloc(buf, sizeof(*buf) * storage);
|
||||
if (buf == NULL) {
|
||||
enc->error = -1;
|
||||
return NULL;
|
||||
}
|
||||
enc->precarry_buf = buf;
|
||||
enc->precarry_storage = storage;
|
||||
}
|
||||
n = (1 << (c + 16)) - 1;
|
||||
do {
|
||||
OD_ASSERT(offs < storage);
|
||||
buf[offs++] = (uint16_t)(e >> (c + 16));
|
||||
e &= n;
|
||||
s -= 8;
|
||||
c -= 8;
|
||||
n >>= 8;
|
||||
} while (s > 0);
|
||||
}
|
||||
/*Make sure there's enough room for the entropy-coded bits and the raw
|
||||
bits.*/
|
||||
out = enc->buf;
|
||||
storage = enc->storage;
|
||||
end_offs = enc->end_offs;
|
||||
e = enc->end_window;
|
||||
nend_bits = enc->nend_bits;
|
||||
s = -s;
|
||||
c = OD_MAXI((nend_bits - s + 7) >> 3, 0);
|
||||
if (offs + end_offs + c > storage) {
|
||||
storage = offs + end_offs + c;
|
||||
out = (unsigned char *)realloc(out, sizeof(*out) * storage);
|
||||
if (out == NULL) {
|
||||
enc->error = -1;
|
||||
return NULL;
|
||||
}
|
||||
OD_MOVE(out + storage - end_offs, out + enc->storage - end_offs, end_offs);
|
||||
enc->buf = out;
|
||||
enc->storage = storage;
|
||||
}
|
||||
/*If we have buffered raw bits, flush them as well.*/
|
||||
while (nend_bits > s) {
|
||||
OD_ASSERT(end_offs < storage);
|
||||
out[storage - ++end_offs] = (unsigned char)e;
|
||||
e >>= 8;
|
||||
nend_bits -= 8;
|
||||
}
|
||||
*nbytes = offs + end_offs;
|
||||
/*Perform carry propagation.*/
|
||||
OD_ASSERT(offs + end_offs <= storage);
|
||||
out = out + storage - (offs + end_offs);
|
||||
c = 0;
|
||||
end_offs = offs;
|
||||
while (offs-- > 0) {
|
||||
c = buf[offs] + c;
|
||||
out[offs] = (unsigned char)c;
|
||||
c >>= 8;
|
||||
}
|
||||
/*Add any remaining raw bits to the last byte.
|
||||
There is guaranteed to be enough room, because nend_bits <= s.*/
|
||||
OD_ASSERT(nend_bits <= 0 || end_offs > 0);
|
||||
if (nend_bits > 0) out[end_offs - 1] |= (unsigned char)e;
|
||||
/*Note: Unless there's an allocation error, if you keep encoding into the
|
||||
current buffer and call this function again later, everything will work
|
||||
just fine (you won't get a new packet out, but you will get a single
|
||||
buffer with the new data appended to the old).
|
||||
However, this function is O(N) where N is the amount of data coded so far,
|
||||
so calling it more than once for a given packet is a bad idea.*/
|
||||
return out;
|
||||
}
|
||||
|
||||
/*Returns the number of bits "used" by the encoded symbols so far.
|
||||
This same number can be computed in either the encoder or the decoder, and is
|
||||
suitable for making coding decisions.
|
||||
Warning: The value returned by this function can decrease compared to an
|
||||
earlier call, even after encoding more data, if there is an encoding error
|
||||
(i.e., a failure to allocate enough space for the output buffer).
|
||||
Return: The number of bits.
|
||||
This will always be slightly larger than the exact value (e.g., all
|
||||
rounding error is in the positive direction).*/
|
||||
int od_ec_enc_tell(const od_ec_enc *enc) {
|
||||
/*The 10 here counteracts the offset of -9 baked into cnt, and adds 1 extra
|
||||
bit, which we reserve for terminating the stream.*/
|
||||
return (enc->offs + enc->end_offs) * 8 + enc->cnt + enc->nend_bits + 10;
|
||||
}
|
||||
|
||||
/*Returns the number of bits "used" by the encoded symbols so far.
|
||||
This same number can be computed in either the encoder or the decoder, and is
|
||||
suitable for making coding decisions.
|
||||
Warning: The value returned by this function can decrease compared to an
|
||||
earlier call, even after encoding more data, if there is an encoding error
|
||||
(i.e., a failure to allocate enough space for the output buffer).
|
||||
Return: The number of bits scaled by 2**OD_BITRES.
|
||||
This will always be slightly larger than the exact value (e.g., all
|
||||
rounding error is in the positive direction).*/
|
||||
uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc) {
|
||||
return od_ec_tell_frac(od_ec_enc_tell(enc), enc->rng);
|
||||
}
|
||||
|
||||
/*Saves a entropy coder checkpoint to dst.
|
||||
This allows an encoder to reverse a series of entropy coder
|
||||
decisions if it decides that the information would have been
|
||||
better coded some other way.*/
|
||||
void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src) {
|
||||
OD_COPY(dst, src, 1);
|
||||
}
|
||||
|
||||
/*Restores an entropy coder checkpoint saved by od_ec_enc_checkpoint.
|
||||
This can only be used to restore from checkpoints earlier in the target
|
||||
state's history: you can not switch backwards and forwards or otherwise
|
||||
switch to a state which isn't a casual ancestor of the current state.
|
||||
Restore is also incompatible with patching the initial bits, as the
|
||||
changes will remain in the restored version.*/
|
||||
void od_ec_enc_rollback(od_ec_enc *dst, const od_ec_enc *src) {
|
||||
unsigned char *buf;
|
||||
uint32_t storage;
|
||||
uint16_t *precarry_buf;
|
||||
uint32_t precarry_storage;
|
||||
OD_ASSERT(dst->storage >= src->storage);
|
||||
OD_ASSERT(dst->precarry_storage >= src->precarry_storage);
|
||||
buf = dst->buf;
|
||||
storage = dst->storage;
|
||||
precarry_buf = dst->precarry_buf;
|
||||
precarry_storage = dst->precarry_storage;
|
||||
OD_COPY(dst, src, 1);
|
||||
dst->buf = buf;
|
||||
dst->storage = storage;
|
||||
dst->precarry_buf = precarry_buf;
|
||||
dst->precarry_storage = precarry_storage;
|
||||
}
|
||||
103
aom_dsp/entenc.h
103
aom_dsp/entenc.h
@@ -1,103 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#if !defined(_entenc_H)
|
||||
#define _entenc_H (1)
|
||||
#include <stddef.h>
|
||||
#include "aom_dsp/entcode.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct od_ec_enc od_ec_enc;
|
||||
|
||||
#define OD_MEASURE_EC_OVERHEAD (0)
|
||||
|
||||
/*The entropy encoder context.*/
|
||||
struct od_ec_enc {
|
||||
/*Buffered output.
|
||||
This contains only the raw bits until the final call to od_ec_enc_done(),
|
||||
where all the arithmetic-coded data gets prepended to it.*/
|
||||
unsigned char *buf;
|
||||
/*The size of the buffer.*/
|
||||
uint32_t storage;
|
||||
/*The offset at which the last byte containing raw bits was written.*/
|
||||
uint32_t end_offs;
|
||||
/*Bits that will be read from/written at the end.*/
|
||||
od_ec_window end_window;
|
||||
/*Number of valid bits in end_window.*/
|
||||
int nend_bits;
|
||||
/*A buffer for output bytes with their associated carry flags.*/
|
||||
uint16_t *precarry_buf;
|
||||
/*The size of the pre-carry buffer.*/
|
||||
uint32_t precarry_storage;
|
||||
/*The offset at which the next entropy-coded byte will be written.*/
|
||||
uint32_t offs;
|
||||
/*The low end of the current range.*/
|
||||
od_ec_window low;
|
||||
/*The number of values in the current range.*/
|
||||
uint16_t rng;
|
||||
/*The number of bits of data in the current value.*/
|
||||
int16_t cnt;
|
||||
/*Nonzero if an error occurred.*/
|
||||
int error;
|
||||
#if OD_MEASURE_EC_OVERHEAD
|
||||
double entropy;
|
||||
int nb_symbols;
|
||||
#endif
|
||||
};
|
||||
|
||||
/*See entenc.c for further documentation.*/
|
||||
|
||||
void od_ec_enc_init(od_ec_enc *enc, uint32_t size) OD_ARG_NONNULL(1);
|
||||
void od_ec_enc_reset(od_ec_enc *enc) OD_ARG_NONNULL(1);
|
||||
void od_ec_enc_clear(od_ec_enc *enc) OD_ARG_NONNULL(1);
|
||||
|
||||
void od_ec_encode_bool(od_ec_enc *enc, int val, unsigned fz, unsigned _ft)
|
||||
OD_ARG_NONNULL(1);
|
||||
void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned fz_q15)
|
||||
OD_ARG_NONNULL(1);
|
||||
void od_ec_encode_cdf(od_ec_enc *enc, int s, const uint16_t *cdf, int nsyms)
|
||||
OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
|
||||
void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, const uint16_t *cdf, int nsyms)
|
||||
OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
|
||||
void od_ec_encode_cdf_unscaled(od_ec_enc *enc, int s, const uint16_t *cdf,
|
||||
int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
|
||||
void od_ec_encode_cdf_unscaled_dyadic(od_ec_enc *enc, int s,
|
||||
const uint16_t *cdf, int nsyms,
|
||||
unsigned ftb) OD_ARG_NONNULL(1)
|
||||
OD_ARG_NONNULL(3);
|
||||
|
||||
void od_ec_enc_uint(od_ec_enc *enc, uint32_t fl, uint32_t ft) OD_ARG_NONNULL(1);
|
||||
|
||||
void od_ec_enc_bits(od_ec_enc *enc, uint32_t fl, unsigned ftb)
|
||||
OD_ARG_NONNULL(1);
|
||||
|
||||
void od_ec_enc_patch_initial_bits(od_ec_enc *enc, unsigned val, int nbits)
|
||||
OD_ARG_NONNULL(1);
|
||||
OD_WARN_UNUSED_RESULT unsigned char *od_ec_enc_done(od_ec_enc *enc,
|
||||
uint32_t *nbytes)
|
||||
OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
|
||||
|
||||
OD_WARN_UNUSED_RESULT int od_ec_enc_tell(const od_ec_enc *enc)
|
||||
OD_ARG_NONNULL(1);
|
||||
OD_WARN_UNUSED_RESULT uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc)
|
||||
OD_ARG_NONNULL(1);
|
||||
|
||||
void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src);
|
||||
void od_ec_enc_rollback(od_ec_enc *dst, const od_ec_enc *src);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,26 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_FWD_TXFM_H_
|
||||
#define AOM_DSP_FWD_TXFM_H_
|
||||
|
||||
#include "aom_dsp/txfm_common.h"
|
||||
|
||||
static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
|
||||
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||
// TODO(debargha, peter.derivaz): Find new bounds for this assert
|
||||
// and make the bounds consts.
|
||||
// assert(INT16_MIN <= rv && rv <= INT16_MAX);
|
||||
return rv;
|
||||
}
|
||||
|
||||
void aom_fdct32(const tran_high_t *input, tran_high_t *output, int round);
|
||||
#endif // AOM_DSP_FWD_TXFM_H_
|
||||
@@ -1,124 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_
|
||||
#define AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_
|
||||
|
||||
#include "aom_dsp/mips/macros_msa.h"
|
||||
#include "aom_dsp/aom_filter.h"
|
||||
|
||||
extern const uint8_t mc_filt_mask_arr[16 * 3];
|
||||
|
||||
#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt0, filt1, filt2, \
|
||||
filt3) \
|
||||
({ \
|
||||
v8i16 tmp_dpadd_0, tmp_dpadd_1; \
|
||||
\
|
||||
tmp_dpadd_0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \
|
||||
tmp_dpadd_0 = __msa_dpadd_s_h(tmp_dpadd_0, (v16i8)vec1, (v16i8)filt1); \
|
||||
tmp_dpadd_1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2); \
|
||||
tmp_dpadd_1 = __msa_dpadd_s_h(tmp_dpadd_1, (v16i8)vec3, (v16i8)filt3); \
|
||||
tmp_dpadd_0 = __msa_adds_s_h(tmp_dpadd_0, tmp_dpadd_1); \
|
||||
\
|
||||
tmp_dpadd_0; \
|
||||
})
|
||||
|
||||
#define HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_h0, \
|
||||
filt_h1, filt_h2, filt_h3) \
|
||||
({ \
|
||||
v16i8 vec0_m, vec1_m, vec2_m, vec3_m; \
|
||||
v8i16 hz_out_m; \
|
||||
\
|
||||
VSHF_B4_SB(src0, src1, mask0, mask1, mask2, mask3, vec0_m, vec1_m, vec2_m, \
|
||||
vec3_m); \
|
||||
hz_out_m = FILT_8TAP_DPADD_S_H(vec0_m, vec1_m, vec2_m, vec3_m, filt_h0, \
|
||||
filt_h1, filt_h2, filt_h3); \
|
||||
\
|
||||
hz_out_m = __msa_srari_h(hz_out_m, FILTER_BITS); \
|
||||
hz_out_m = __msa_sat_s_h(hz_out_m, 7); \
|
||||
\
|
||||
hz_out_m; \
|
||||
})
|
||||
|
||||
#define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
|
||||
mask2, mask3, filt0, filt1, filt2, filt3, \
|
||||
out0, out1) \
|
||||
{ \
|
||||
v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
|
||||
v8i16 res0_m, res1_m, res2_m, res3_m; \
|
||||
\
|
||||
VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m); \
|
||||
DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, res0_m, res1_m); \
|
||||
VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m); \
|
||||
DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, res0_m, res1_m); \
|
||||
VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m); \
|
||||
DOTP_SB2_SH(vec4_m, vec5_m, filt2, filt2, res2_m, res3_m); \
|
||||
VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec6_m, vec7_m); \
|
||||
DPADD_SB2_SH(vec6_m, vec7_m, filt3, filt3, res2_m, res3_m); \
|
||||
ADDS_SH2_SH(res0_m, res2_m, res1_m, res3_m, out0, out1); \
|
||||
}
|
||||
|
||||
#define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
|
||||
mask2, mask3, filt0, filt1, filt2, filt3, \
|
||||
out0, out1, out2, out3) \
|
||||
{ \
|
||||
v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
|
||||
v8i16 res0_m, res1_m, res2_m, res3_m, res4_m, res5_m, res6_m, res7_m; \
|
||||
\
|
||||
VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \
|
||||
VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \
|
||||
DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0, \
|
||||
res0_m, res1_m, res2_m, res3_m); \
|
||||
VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0_m, vec1_m); \
|
||||
VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2_m, vec3_m); \
|
||||
DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt2, filt2, filt2, filt2, \
|
||||
res4_m, res5_m, res6_m, res7_m); \
|
||||
VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4_m, vec5_m); \
|
||||
VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6_m, vec7_m); \
|
||||
DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt1, filt1, filt1, filt1, \
|
||||
res0_m, res1_m, res2_m, res3_m); \
|
||||
VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4_m, vec5_m); \
|
||||
VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6_m, vec7_m); \
|
||||
DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt3, filt3, filt3, filt3, \
|
||||
res4_m, res5_m, res6_m, res7_m); \
|
||||
ADDS_SH4_SH(res0_m, res4_m, res1_m, res5_m, res2_m, res6_m, res3_m, \
|
||||
res7_m, out0, out1, out2, out3); \
|
||||
}
|
||||
|
||||
#define PCKEV_XORI128_AVG_ST_UB(in0, in1, dst, pdst) \
|
||||
{ \
|
||||
v16u8 tmp_m; \
|
||||
\
|
||||
tmp_m = PCKEV_XORI128_UB(in1, in0); \
|
||||
tmp_m = __msa_aver_u_b(tmp_m, (v16u8)dst); \
|
||||
ST_UB(tmp_m, (pdst)); \
|
||||
}
|
||||
|
||||
#define PCKEV_AVG_ST_UB(in0, in1, dst, pdst) \
|
||||
{ \
|
||||
v16u8 tmp_m; \
|
||||
\
|
||||
tmp_m = (v16u8)__msa_pckev_b((v16i8)in0, (v16i8)in1); \
|
||||
tmp_m = __msa_aver_u_b(tmp_m, (v16u8)dst); \
|
||||
ST_UB(tmp_m, (pdst)); \
|
||||
}
|
||||
|
||||
#define PCKEV_AVG_ST8x4_UB(in1, dst0, in2, dst1, in3, dst2, in4, dst3, pdst, \
|
||||
stride) \
|
||||
{ \
|
||||
v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
|
||||
\
|
||||
PCKEV_B2_UB(in2, in1, in4, in3, tmp0_m, tmp1_m); \
|
||||
PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m); \
|
||||
AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m); \
|
||||
ST8x4_UB(tmp0_m, tmp1_m, pdst, stride); \
|
||||
}
|
||||
#endif /* AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_ */
|
||||
@@ -1,31 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include "aom_dsp/mips/common_dspr2.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
uint8_t aom_ff_cropTbl_a[256 + 2 * CROP_WIDTH];
|
||||
uint8_t *aom_ff_cropTbl;
|
||||
|
||||
void aom_dsputil_static_init(void) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 256; i++) aom_ff_cropTbl_a[i + CROP_WIDTH] = i;
|
||||
|
||||
for (i = 0; i < CROP_WIDTH; i++) {
|
||||
aom_ff_cropTbl_a[i] = 0;
|
||||
aom_ff_cropTbl_a[i + CROP_WIDTH + 256] = 255;
|
||||
}
|
||||
|
||||
aom_ff_cropTbl = &aom_ff_cropTbl_a[CROP_WIDTH];
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_COMMON_MIPS_DSPR2_H_
|
||||
#define AOM_COMMON_MIPS_DSPR2_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#if HAVE_DSPR2
|
||||
#define CROP_WIDTH 512
|
||||
|
||||
extern uint8_t *aom_ff_cropTbl; // From "aom_dsp/mips/intrapred4_dspr2.c"
|
||||
|
||||
static INLINE void prefetch_load(const unsigned char *src) {
|
||||
__asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src));
|
||||
}
|
||||
|
||||
/* prefetch data for store */
|
||||
static INLINE void prefetch_store(unsigned char *dst) {
|
||||
__asm__ __volatile__("pref 1, 0(%[dst]) \n\t" : : [dst] "r"(dst));
|
||||
}
|
||||
|
||||
static INLINE void prefetch_load_streamed(const unsigned char *src) {
|
||||
__asm__ __volatile__("pref 4, 0(%[src]) \n\t" : : [src] "r"(src));
|
||||
}
|
||||
|
||||
/* prefetch data for store */
|
||||
static INLINE void prefetch_store_streamed(unsigned char *dst) {
|
||||
__asm__ __volatile__("pref 5, 0(%[dst]) \n\t" : : [dst] "r"(dst));
|
||||
}
|
||||
#endif // #if HAVE_DSPR2
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_COMMON_MIPS_DSPR2_H_
|
||||
@@ -1,59 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_MIPS_AOM_COMMON_DSPR2_H_
|
||||
#define AOM_DSP_MIPS_AOM_COMMON_DSPR2_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/mips/common_dspr2.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if HAVE_DSPR2
|
||||
void aom_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4, int w,
|
||||
int h);
|
||||
|
||||
void aom_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h);
|
||||
|
||||
void aom_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4, int w,
|
||||
int h);
|
||||
|
||||
void aom_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||
ptrdiff_t dst_stride, const int16_t *filter, int w,
|
||||
int h);
|
||||
|
||||
void aom_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4, int w,
|
||||
int h);
|
||||
|
||||
#endif // #if HAVE_DSPR2
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_MIPS_AOM_COMMON_DSPR2_H_
|
||||
@@ -1,681 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./macros_msa.h"
|
||||
|
||||
extern const int16_t aom_rv[];
|
||||
|
||||
#define AOM_TRANSPOSE8x16_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, out0, \
|
||||
out1, out2, out3, out4, out5, out6, out7, \
|
||||
out8, out9, out10, out11, out12, out13, out14, \
|
||||
out15) \
|
||||
{ \
|
||||
v8i16 temp0, temp1, temp2, temp3, temp4; \
|
||||
v8i16 temp5, temp6, temp7, temp8, temp9; \
|
||||
\
|
||||
ILVR_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2, \
|
||||
temp3); \
|
||||
ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
|
||||
ILVRL_W2_SH(temp5, temp4, temp6, temp7); \
|
||||
ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
|
||||
ILVRL_W2_SH(temp5, temp4, temp8, temp9); \
|
||||
ILVL_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2, \
|
||||
temp3); \
|
||||
ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
|
||||
ILVRL_W2_UB(temp5, temp4, out8, out10); \
|
||||
ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
|
||||
ILVRL_W2_UB(temp5, temp4, out12, out14); \
|
||||
out0 = (v16u8)temp6; \
|
||||
out2 = (v16u8)temp7; \
|
||||
out4 = (v16u8)temp8; \
|
||||
out6 = (v16u8)temp9; \
|
||||
out9 = (v16u8)__msa_ilvl_d((v2i64)out8, (v2i64)out8); \
|
||||
out11 = (v16u8)__msa_ilvl_d((v2i64)out10, (v2i64)out10); \
|
||||
out13 = (v16u8)__msa_ilvl_d((v2i64)out12, (v2i64)out12); \
|
||||
out15 = (v16u8)__msa_ilvl_d((v2i64)out14, (v2i64)out14); \
|
||||
out1 = (v16u8)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \
|
||||
out3 = (v16u8)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \
|
||||
out5 = (v16u8)__msa_ilvl_d((v2i64)out4, (v2i64)out4); \
|
||||
out7 = (v16u8)__msa_ilvl_d((v2i64)out6, (v2i64)out6); \
|
||||
}
|
||||
|
||||
#define AOM_AVER_IF_RETAIN(above2_in, above1_in, src_in, below1_in, below2_in, \
|
||||
ref, out) \
|
||||
{ \
|
||||
v16u8 temp0, temp1; \
|
||||
\
|
||||
temp1 = __msa_aver_u_b(above2_in, above1_in); \
|
||||
temp0 = __msa_aver_u_b(below2_in, below1_in); \
|
||||
temp1 = __msa_aver_u_b(temp1, temp0); \
|
||||
out = __msa_aver_u_b(src_in, temp1); \
|
||||
temp0 = __msa_asub_u_b(src_in, above2_in); \
|
||||
temp1 = __msa_asub_u_b(src_in, above1_in); \
|
||||
temp0 = (temp0 < ref); \
|
||||
temp1 = (temp1 < ref); \
|
||||
temp0 = temp0 & temp1; \
|
||||
temp1 = __msa_asub_u_b(src_in, below1_in); \
|
||||
temp1 = (temp1 < ref); \
|
||||
temp0 = temp0 & temp1; \
|
||||
temp1 = __msa_asub_u_b(src_in, below2_in); \
|
||||
temp1 = (temp1 < ref); \
|
||||
temp0 = temp0 & temp1; \
|
||||
out = __msa_bmz_v(out, src_in, temp0); \
|
||||
}
|
||||
|
||||
#define TRANSPOSE12x16_B(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, \
|
||||
in10, in11, in12, in13, in14, in15) \
|
||||
{ \
|
||||
v8i16 temp0, temp1, temp2, temp3, temp4; \
|
||||
v8i16 temp5, temp6, temp7, temp8, temp9; \
|
||||
\
|
||||
ILVR_B2_SH(in1, in0, in3, in2, temp0, temp1); \
|
||||
ILVRL_H2_SH(temp1, temp0, temp2, temp3); \
|
||||
ILVR_B2_SH(in5, in4, in7, in6, temp0, temp1); \
|
||||
ILVRL_H2_SH(temp1, temp0, temp4, temp5); \
|
||||
ILVRL_W2_SH(temp4, temp2, temp0, temp1); \
|
||||
ILVRL_W2_SH(temp5, temp3, temp2, temp3); \
|
||||
ILVR_B2_SH(in9, in8, in11, in10, temp4, temp5); \
|
||||
ILVR_B2_SH(in9, in8, in11, in10, temp4, temp5); \
|
||||
ILVRL_H2_SH(temp5, temp4, temp6, temp7); \
|
||||
ILVR_B2_SH(in13, in12, in15, in14, temp4, temp5); \
|
||||
ILVRL_H2_SH(temp5, temp4, temp8, temp9); \
|
||||
ILVRL_W2_SH(temp8, temp6, temp4, temp5); \
|
||||
ILVRL_W2_SH(temp9, temp7, temp6, temp7); \
|
||||
ILVL_B2_SH(in1, in0, in3, in2, temp8, temp9); \
|
||||
ILVR_D2_UB(temp4, temp0, temp5, temp1, in0, in2); \
|
||||
in1 = (v16u8)__msa_ilvl_d((v2i64)temp4, (v2i64)temp0); \
|
||||
in3 = (v16u8)__msa_ilvl_d((v2i64)temp5, (v2i64)temp1); \
|
||||
ILVL_B2_SH(in5, in4, in7, in6, temp0, temp1); \
|
||||
ILVR_D2_UB(temp6, temp2, temp7, temp3, in4, in6); \
|
||||
in5 = (v16u8)__msa_ilvl_d((v2i64)temp6, (v2i64)temp2); \
|
||||
in7 = (v16u8)__msa_ilvl_d((v2i64)temp7, (v2i64)temp3); \
|
||||
ILVL_B4_SH(in9, in8, in11, in10, in13, in12, in15, in14, temp2, temp3, \
|
||||
temp4, temp5); \
|
||||
ILVR_H4_SH(temp9, temp8, temp1, temp0, temp3, temp2, temp5, temp4, temp6, \
|
||||
temp7, temp8, temp9); \
|
||||
ILVR_W2_SH(temp7, temp6, temp9, temp8, temp0, temp1); \
|
||||
in8 = (v16u8)__msa_ilvr_d((v2i64)temp1, (v2i64)temp0); \
|
||||
in9 = (v16u8)__msa_ilvl_d((v2i64)temp1, (v2i64)temp0); \
|
||||
ILVL_W2_SH(temp7, temp6, temp9, temp8, temp2, temp3); \
|
||||
in10 = (v16u8)__msa_ilvr_d((v2i64)temp3, (v2i64)temp2); \
|
||||
in11 = (v16u8)__msa_ilvl_d((v2i64)temp3, (v2i64)temp2); \
|
||||
}
|
||||
|
||||
#define AOM_TRANSPOSE12x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, in8, \
|
||||
in9, in10, in11) \
|
||||
{ \
|
||||
v8i16 temp0, temp1, temp2, temp3; \
|
||||
v8i16 temp4, temp5, temp6, temp7; \
|
||||
\
|
||||
ILVR_B2_SH(in1, in0, in3, in2, temp0, temp1); \
|
||||
ILVRL_H2_SH(temp1, temp0, temp2, temp3); \
|
||||
ILVR_B2_SH(in5, in4, in7, in6, temp0, temp1); \
|
||||
ILVRL_H2_SH(temp1, temp0, temp4, temp5); \
|
||||
ILVRL_W2_SH(temp4, temp2, temp0, temp1); \
|
||||
ILVRL_W2_SH(temp5, temp3, temp2, temp3); \
|
||||
ILVL_B2_SH(in1, in0, in3, in2, temp4, temp5); \
|
||||
temp4 = __msa_ilvr_h(temp5, temp4); \
|
||||
ILVL_B2_SH(in5, in4, in7, in6, temp6, temp7); \
|
||||
temp5 = __msa_ilvr_h(temp7, temp6); \
|
||||
ILVRL_W2_SH(temp5, temp4, temp6, temp7); \
|
||||
in0 = (v16u8)temp0; \
|
||||
in2 = (v16u8)temp1; \
|
||||
in4 = (v16u8)temp2; \
|
||||
in6 = (v16u8)temp3; \
|
||||
in8 = (v16u8)temp6; \
|
||||
in10 = (v16u8)temp7; \
|
||||
in1 = (v16u8)__msa_ilvl_d((v2i64)temp0, (v2i64)temp0); \
|
||||
in3 = (v16u8)__msa_ilvl_d((v2i64)temp1, (v2i64)temp1); \
|
||||
in5 = (v16u8)__msa_ilvl_d((v2i64)temp2, (v2i64)temp2); \
|
||||
in7 = (v16u8)__msa_ilvl_d((v2i64)temp3, (v2i64)temp3); \
|
||||
in9 = (v16u8)__msa_ilvl_d((v2i64)temp6, (v2i64)temp6); \
|
||||
in11 = (v16u8)__msa_ilvl_d((v2i64)temp7, (v2i64)temp7); \
|
||||
}
|
||||
|
||||
static void postproc_down_across_chroma_msa(uint8_t *src_ptr, uint8_t *dst_ptr,
|
||||
int32_t src_stride,
|
||||
int32_t dst_stride, int32_t cols,
|
||||
uint8_t *f) {
|
||||
uint8_t *p_src = src_ptr;
|
||||
uint8_t *p_dst = dst_ptr;
|
||||
uint8_t *f_orig = f;
|
||||
uint8_t *p_dst_st = dst_ptr;
|
||||
uint16_t col;
|
||||
uint64_t out0, out1, out2, out3;
|
||||
v16u8 above2, above1, below2, below1, src, ref, ref_temp;
|
||||
v16u8 inter0, inter1, inter2, inter3, inter4, inter5;
|
||||
v16u8 inter6, inter7, inter8, inter9, inter10, inter11;
|
||||
|
||||
for (col = (cols / 16); col--;) {
|
||||
ref = LD_UB(f);
|
||||
LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
|
||||
src = LD_UB(p_src);
|
||||
LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
|
||||
AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
|
||||
above2 = LD_UB(p_src + 3 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
|
||||
above1 = LD_UB(p_src + 4 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
|
||||
src = LD_UB(p_src + 5 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
|
||||
below1 = LD_UB(p_src + 6 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
|
||||
below2 = LD_UB(p_src + 7 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
|
||||
above2 = LD_UB(p_src + 8 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
|
||||
above1 = LD_UB(p_src + 9 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
|
||||
ST_UB8(inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7,
|
||||
p_dst, dst_stride);
|
||||
|
||||
p_dst += 16;
|
||||
p_src += 16;
|
||||
f += 16;
|
||||
}
|
||||
|
||||
if (0 != (cols / 16)) {
|
||||
ref = LD_UB(f);
|
||||
LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
|
||||
src = LD_UB(p_src);
|
||||
LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
|
||||
AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
|
||||
above2 = LD_UB(p_src + 3 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
|
||||
above1 = LD_UB(p_src + 4 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
|
||||
src = LD_UB(p_src + 5 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
|
||||
below1 = LD_UB(p_src + 6 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
|
||||
below2 = LD_UB(p_src + 7 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
|
||||
above2 = LD_UB(p_src + 8 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
|
||||
above1 = LD_UB(p_src + 9 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
|
||||
out0 = __msa_copy_u_d((v2i64)inter0, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)inter1, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)inter2, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)inter3, 0);
|
||||
SD4(out0, out1, out2, out3, p_dst, dst_stride);
|
||||
|
||||
out0 = __msa_copy_u_d((v2i64)inter4, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)inter5, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)inter6, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)inter7, 0);
|
||||
SD4(out0, out1, out2, out3, p_dst + 4 * dst_stride, dst_stride);
|
||||
}
|
||||
|
||||
f = f_orig;
|
||||
p_dst = dst_ptr - 2;
|
||||
LD_UB8(p_dst, dst_stride, inter0, inter1, inter2, inter3, inter4, inter5,
|
||||
inter6, inter7);
|
||||
|
||||
for (col = 0; col < (cols / 8); ++col) {
|
||||
ref = LD_UB(f);
|
||||
f += 8;
|
||||
AOM_TRANSPOSE12x8_UB_UB(inter0, inter1, inter2, inter3, inter4, inter5,
|
||||
inter6, inter7, inter8, inter9, inter10, inter11);
|
||||
if (0 == col) {
|
||||
above2 = inter2;
|
||||
above1 = inter2;
|
||||
} else {
|
||||
above2 = inter0;
|
||||
above1 = inter1;
|
||||
}
|
||||
src = inter2;
|
||||
below1 = inter3;
|
||||
below2 = inter4;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 0);
|
||||
AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref_temp, inter2);
|
||||
above2 = inter5;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 1);
|
||||
AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref_temp, inter3);
|
||||
above1 = inter6;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 2);
|
||||
AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref_temp, inter4);
|
||||
src = inter7;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 3);
|
||||
AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref_temp, inter5);
|
||||
below1 = inter8;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 4);
|
||||
AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref_temp, inter6);
|
||||
below2 = inter9;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 5);
|
||||
AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref_temp, inter7);
|
||||
if (col == (cols / 8 - 1)) {
|
||||
above2 = inter9;
|
||||
} else {
|
||||
above2 = inter10;
|
||||
}
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 6);
|
||||
AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref_temp, inter8);
|
||||
if (col == (cols / 8 - 1)) {
|
||||
above1 = inter9;
|
||||
} else {
|
||||
above1 = inter11;
|
||||
}
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 7);
|
||||
AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref_temp, inter9);
|
||||
TRANSPOSE8x8_UB_UB(inter2, inter3, inter4, inter5, inter6, inter7, inter8,
|
||||
inter9, inter2, inter3, inter4, inter5, inter6, inter7,
|
||||
inter8, inter9);
|
||||
p_dst += 8;
|
||||
LD_UB2(p_dst, dst_stride, inter0, inter1);
|
||||
ST8x1_UB(inter2, p_dst_st);
|
||||
ST8x1_UB(inter3, (p_dst_st + 1 * dst_stride));
|
||||
LD_UB2(p_dst + 2 * dst_stride, dst_stride, inter2, inter3);
|
||||
ST8x1_UB(inter4, (p_dst_st + 2 * dst_stride));
|
||||
ST8x1_UB(inter5, (p_dst_st + 3 * dst_stride));
|
||||
LD_UB2(p_dst + 4 * dst_stride, dst_stride, inter4, inter5);
|
||||
ST8x1_UB(inter6, (p_dst_st + 4 * dst_stride));
|
||||
ST8x1_UB(inter7, (p_dst_st + 5 * dst_stride));
|
||||
LD_UB2(p_dst + 6 * dst_stride, dst_stride, inter6, inter7);
|
||||
ST8x1_UB(inter8, (p_dst_st + 6 * dst_stride));
|
||||
ST8x1_UB(inter9, (p_dst_st + 7 * dst_stride));
|
||||
p_dst_st += 8;
|
||||
}
|
||||
}
|
||||
|
||||
static void postproc_down_across_luma_msa(uint8_t *src_ptr, uint8_t *dst_ptr,
|
||||
int32_t src_stride,
|
||||
int32_t dst_stride, int32_t cols,
|
||||
uint8_t *f) {
|
||||
uint8_t *p_src = src_ptr;
|
||||
uint8_t *p_dst = dst_ptr;
|
||||
uint8_t *p_dst_st = dst_ptr;
|
||||
uint8_t *f_orig = f;
|
||||
uint16_t col;
|
||||
v16u8 above2, above1, below2, below1;
|
||||
v16u8 src, ref, ref_temp;
|
||||
v16u8 inter0, inter1, inter2, inter3, inter4, inter5, inter6;
|
||||
v16u8 inter7, inter8, inter9, inter10, inter11;
|
||||
v16u8 inter12, inter13, inter14, inter15;
|
||||
|
||||
for (col = (cols / 16); col--;) {
|
||||
ref = LD_UB(f);
|
||||
LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
|
||||
src = LD_UB(p_src);
|
||||
LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
|
||||
AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
|
||||
above2 = LD_UB(p_src + 3 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
|
||||
above1 = LD_UB(p_src + 4 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
|
||||
src = LD_UB(p_src + 5 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
|
||||
below1 = LD_UB(p_src + 6 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
|
||||
below2 = LD_UB(p_src + 7 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
|
||||
above2 = LD_UB(p_src + 8 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
|
||||
above1 = LD_UB(p_src + 9 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
|
||||
src = LD_UB(p_src + 10 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter8);
|
||||
below1 = LD_UB(p_src + 11 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter9);
|
||||
below2 = LD_UB(p_src + 12 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter10);
|
||||
above2 = LD_UB(p_src + 13 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter11);
|
||||
above1 = LD_UB(p_src + 14 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter12);
|
||||
src = LD_UB(p_src + 15 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter13);
|
||||
below1 = LD_UB(p_src + 16 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter14);
|
||||
below2 = LD_UB(p_src + 17 * src_stride);
|
||||
AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter15);
|
||||
ST_UB8(inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7,
|
||||
p_dst, dst_stride);
|
||||
ST_UB8(inter8, inter9, inter10, inter11, inter12, inter13, inter14, inter15,
|
||||
p_dst + 8 * dst_stride, dst_stride);
|
||||
p_src += 16;
|
||||
p_dst += 16;
|
||||
f += 16;
|
||||
}
|
||||
|
||||
f = f_orig;
|
||||
p_dst = dst_ptr - 2;
|
||||
LD_UB8(p_dst, dst_stride, inter0, inter1, inter2, inter3, inter4, inter5,
|
||||
inter6, inter7);
|
||||
LD_UB8(p_dst + 8 * dst_stride, dst_stride, inter8, inter9, inter10, inter11,
|
||||
inter12, inter13, inter14, inter15);
|
||||
|
||||
for (col = 0; col < cols / 8; ++col) {
|
||||
ref = LD_UB(f);
|
||||
f += 8;
|
||||
TRANSPOSE12x16_B(inter0, inter1, inter2, inter3, inter4, inter5, inter6,
|
||||
inter7, inter8, inter9, inter10, inter11, inter12, inter13,
|
||||
inter14, inter15);
|
||||
if (0 == col) {
|
||||
above2 = inter2;
|
||||
above1 = inter2;
|
||||
} else {
|
||||
above2 = inter0;
|
||||
above1 = inter1;
|
||||
}
|
||||
|
||||
src = inter2;
|
||||
below1 = inter3;
|
||||
below2 = inter4;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 0);
|
||||
AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref_temp, inter2);
|
||||
above2 = inter5;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 1);
|
||||
AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref_temp, inter3);
|
||||
above1 = inter6;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 2);
|
||||
AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref_temp, inter4);
|
||||
src = inter7;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 3);
|
||||
AOM_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref_temp, inter5);
|
||||
below1 = inter8;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 4);
|
||||
AOM_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref_temp, inter6);
|
||||
below2 = inter9;
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 5);
|
||||
AOM_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref_temp, inter7);
|
||||
if (col == (cols / 8 - 1)) {
|
||||
above2 = inter9;
|
||||
} else {
|
||||
above2 = inter10;
|
||||
}
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 6);
|
||||
AOM_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref_temp, inter8);
|
||||
if (col == (cols / 8 - 1)) {
|
||||
above1 = inter9;
|
||||
} else {
|
||||
above1 = inter11;
|
||||
}
|
||||
ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 7);
|
||||
AOM_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref_temp, inter9);
|
||||
AOM_TRANSPOSE8x16_UB_UB(inter2, inter3, inter4, inter5, inter6, inter7,
|
||||
inter8, inter9, inter2, inter3, inter4, inter5,
|
||||
inter6, inter7, inter8, inter9, inter10, inter11,
|
||||
inter12, inter13, inter14, inter15, above2, above1);
|
||||
|
||||
p_dst += 8;
|
||||
LD_UB2(p_dst, dst_stride, inter0, inter1);
|
||||
ST8x1_UB(inter2, p_dst_st);
|
||||
ST8x1_UB(inter3, (p_dst_st + 1 * dst_stride));
|
||||
LD_UB2(p_dst + 2 * dst_stride, dst_stride, inter2, inter3);
|
||||
ST8x1_UB(inter4, (p_dst_st + 2 * dst_stride));
|
||||
ST8x1_UB(inter5, (p_dst_st + 3 * dst_stride));
|
||||
LD_UB2(p_dst + 4 * dst_stride, dst_stride, inter4, inter5);
|
||||
ST8x1_UB(inter6, (p_dst_st + 4 * dst_stride));
|
||||
ST8x1_UB(inter7, (p_dst_st + 5 * dst_stride));
|
||||
LD_UB2(p_dst + 6 * dst_stride, dst_stride, inter6, inter7);
|
||||
ST8x1_UB(inter8, (p_dst_st + 6 * dst_stride));
|
||||
ST8x1_UB(inter9, (p_dst_st + 7 * dst_stride));
|
||||
LD_UB2(p_dst + 8 * dst_stride, dst_stride, inter8, inter9);
|
||||
ST8x1_UB(inter10, (p_dst_st + 8 * dst_stride));
|
||||
ST8x1_UB(inter11, (p_dst_st + 9 * dst_stride));
|
||||
LD_UB2(p_dst + 10 * dst_stride, dst_stride, inter10, inter11);
|
||||
ST8x1_UB(inter12, (p_dst_st + 10 * dst_stride));
|
||||
ST8x1_UB(inter13, (p_dst_st + 11 * dst_stride));
|
||||
LD_UB2(p_dst + 12 * dst_stride, dst_stride, inter12, inter13);
|
||||
ST8x1_UB(inter14, (p_dst_st + 12 * dst_stride));
|
||||
ST8x1_UB(inter15, (p_dst_st + 13 * dst_stride));
|
||||
LD_UB2(p_dst + 14 * dst_stride, dst_stride, inter14, inter15);
|
||||
ST8x1_UB(above2, (p_dst_st + 14 * dst_stride));
|
||||
ST8x1_UB(above1, (p_dst_st + 15 * dst_stride));
|
||||
p_dst_st += 8;
|
||||
}
|
||||
}
|
||||
|
||||
void aom_post_proc_down_and_across_mb_row_msa(uint8_t *src, uint8_t *dst,
|
||||
int32_t src_stride,
|
||||
int32_t dst_stride, int32_t cols,
|
||||
uint8_t *f, int32_t size) {
|
||||
if (8 == size) {
|
||||
postproc_down_across_chroma_msa(src, dst, src_stride, dst_stride, cols, f);
|
||||
} else if (16 == size) {
|
||||
postproc_down_across_luma_msa(src, dst, src_stride, dst_stride, cols, f);
|
||||
}
|
||||
}
|
||||
|
||||
void aom_mbpost_proc_across_ip_msa(uint8_t *src_ptr, int32_t pitch,
|
||||
int32_t rows, int32_t cols, int32_t flimit) {
|
||||
int32_t row, col, cnt;
|
||||
uint8_t *src_dup = src_ptr;
|
||||
v16u8 src0, src, tmp_orig;
|
||||
v16u8 tmp = { 0 };
|
||||
v16i8 zero = { 0 };
|
||||
v8u16 sum_h, src_r_h, src_l_h;
|
||||
v4u32 src_r_w, src_l_w;
|
||||
v4i32 flimit_vec;
|
||||
|
||||
flimit_vec = __msa_fill_w(flimit);
|
||||
for (row = rows; row--;) {
|
||||
int32_t sum_sq = 0;
|
||||
int32_t sum = 0;
|
||||
src0 = (v16u8)__msa_fill_b(src_dup[0]);
|
||||
ST8x1_UB(src0, (src_dup - 8));
|
||||
|
||||
src0 = (v16u8)__msa_fill_b(src_dup[cols - 1]);
|
||||
ST_UB(src0, src_dup + cols);
|
||||
src_dup[cols + 16] = src_dup[cols - 1];
|
||||
tmp_orig = (v16u8)__msa_ldi_b(0);
|
||||
tmp_orig[15] = tmp[15];
|
||||
src = LD_UB(src_dup - 8);
|
||||
src[15] = 0;
|
||||
ILVRL_B2_UH(zero, src, src_r_h, src_l_h);
|
||||
src_r_w = __msa_dotp_u_w(src_r_h, src_r_h);
|
||||
src_l_w = __msa_dotp_u_w(src_l_h, src_l_h);
|
||||
sum_sq = HADD_SW_S32(src_r_w);
|
||||
sum_sq += HADD_SW_S32(src_l_w);
|
||||
sum_h = __msa_hadd_u_h(src, src);
|
||||
sum = HADD_UH_U32(sum_h);
|
||||
{
|
||||
v16u8 src7, src8, src_r, src_l;
|
||||
v16i8 mask;
|
||||
v8u16 add_r, add_l;
|
||||
v8i16 sub_r, sub_l, sum_r, sum_l, mask0, mask1;
|
||||
v4i32 sum_sq0, sum_sq1, sum_sq2, sum_sq3;
|
||||
v4i32 sub0, sub1, sub2, sub3;
|
||||
v4i32 sum0_w, sum1_w, sum2_w, sum3_w;
|
||||
v4i32 mul0, mul1, mul2, mul3;
|
||||
v4i32 total0, total1, total2, total3;
|
||||
v8i16 const8 = __msa_fill_h(8);
|
||||
|
||||
src7 = LD_UB(src_dup + 7);
|
||||
src8 = LD_UB(src_dup - 8);
|
||||
for (col = 0; col < (cols >> 4); ++col) {
|
||||
ILVRL_B2_UB(src7, src8, src_r, src_l);
|
||||
HSUB_UB2_SH(src_r, src_l, sub_r, sub_l);
|
||||
|
||||
sum_r[0] = sum + sub_r[0];
|
||||
for (cnt = 0; cnt < 7; ++cnt) {
|
||||
sum_r[cnt + 1] = sum_r[cnt] + sub_r[cnt + 1];
|
||||
}
|
||||
sum_l[0] = sum_r[7] + sub_l[0];
|
||||
for (cnt = 0; cnt < 7; ++cnt) {
|
||||
sum_l[cnt + 1] = sum_l[cnt] + sub_l[cnt + 1];
|
||||
}
|
||||
sum = sum_l[7];
|
||||
src = LD_UB(src_dup + 16 * col);
|
||||
ILVRL_B2_UH(zero, src, src_r_h, src_l_h);
|
||||
src7 = (v16u8)((const8 + sum_r + (v8i16)src_r_h) >> 4);
|
||||
src8 = (v16u8)((const8 + sum_l + (v8i16)src_l_h) >> 4);
|
||||
tmp = (v16u8)__msa_pckev_b((v16i8)src8, (v16i8)src7);
|
||||
|
||||
HADD_UB2_UH(src_r, src_l, add_r, add_l);
|
||||
UNPCK_SH_SW(sub_r, sub0, sub1);
|
||||
UNPCK_SH_SW(sub_l, sub2, sub3);
|
||||
ILVR_H2_SW(zero, add_r, zero, add_l, sum0_w, sum2_w);
|
||||
ILVL_H2_SW(zero, add_r, zero, add_l, sum1_w, sum3_w);
|
||||
MUL4(sum0_w, sub0, sum1_w, sub1, sum2_w, sub2, sum3_w, sub3, mul0, mul1,
|
||||
mul2, mul3);
|
||||
sum_sq0[0] = sum_sq + mul0[0];
|
||||
for (cnt = 0; cnt < 3; ++cnt) {
|
||||
sum_sq0[cnt + 1] = sum_sq0[cnt] + mul0[cnt + 1];
|
||||
}
|
||||
sum_sq1[0] = sum_sq0[3] + mul1[0];
|
||||
for (cnt = 0; cnt < 3; ++cnt) {
|
||||
sum_sq1[cnt + 1] = sum_sq1[cnt] + mul1[cnt + 1];
|
||||
}
|
||||
sum_sq2[0] = sum_sq1[3] + mul2[0];
|
||||
for (cnt = 0; cnt < 3; ++cnt) {
|
||||
sum_sq2[cnt + 1] = sum_sq2[cnt] + mul2[cnt + 1];
|
||||
}
|
||||
sum_sq3[0] = sum_sq2[3] + mul3[0];
|
||||
for (cnt = 0; cnt < 3; ++cnt) {
|
||||
sum_sq3[cnt + 1] = sum_sq3[cnt] + mul3[cnt + 1];
|
||||
}
|
||||
sum_sq = sum_sq3[3];
|
||||
|
||||
UNPCK_SH_SW(sum_r, sum0_w, sum1_w);
|
||||
UNPCK_SH_SW(sum_l, sum2_w, sum3_w);
|
||||
total0 = sum_sq0 * __msa_ldi_w(15);
|
||||
total0 -= sum0_w * sum0_w;
|
||||
total1 = sum_sq1 * __msa_ldi_w(15);
|
||||
total1 -= sum1_w * sum1_w;
|
||||
total2 = sum_sq2 * __msa_ldi_w(15);
|
||||
total2 -= sum2_w * sum2_w;
|
||||
total3 = sum_sq3 * __msa_ldi_w(15);
|
||||
total3 -= sum3_w * sum3_w;
|
||||
total0 = (total0 < flimit_vec);
|
||||
total1 = (total1 < flimit_vec);
|
||||
total2 = (total2 < flimit_vec);
|
||||
total3 = (total3 < flimit_vec);
|
||||
PCKEV_H2_SH(total1, total0, total3, total2, mask0, mask1);
|
||||
mask = __msa_pckev_b((v16i8)mask1, (v16i8)mask0);
|
||||
tmp = __msa_bmz_v(tmp, src, (v16u8)mask);
|
||||
|
||||
if (col == 0) {
|
||||
uint64_t src_d;
|
||||
|
||||
src_d = __msa_copy_u_d((v2i64)tmp_orig, 1);
|
||||
SD(src_d, (src_dup - 8));
|
||||
}
|
||||
|
||||
src7 = LD_UB(src_dup + 16 * (col + 1) + 7);
|
||||
src8 = LD_UB(src_dup + 16 * (col + 1) - 8);
|
||||
ST_UB(tmp, (src_dup + (16 * col)));
|
||||
}
|
||||
|
||||
src_dup += pitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void aom_mbpost_proc_down_msa(uint8_t *dst_ptr, int32_t pitch, int32_t rows,
|
||||
int32_t cols, int32_t flimit) {
|
||||
int32_t row, col, cnt, i;
|
||||
const int16_t *rv3 = &aom_rv[63 & rand()];
|
||||
v4i32 flimit_vec;
|
||||
v16u8 dst7, dst8, dst_r_b, dst_l_b;
|
||||
v16i8 mask;
|
||||
v8u16 add_r, add_l;
|
||||
v8i16 dst_r_h, dst_l_h, sub_r, sub_l, mask0, mask1;
|
||||
v4i32 sub0, sub1, sub2, sub3, total0, total1, total2, total3;
|
||||
|
||||
flimit_vec = __msa_fill_w(flimit);
|
||||
|
||||
for (col = 0; col < (cols >> 4); ++col) {
|
||||
uint8_t *dst_tmp = &dst_ptr[col << 4];
|
||||
v16u8 dst;
|
||||
v16i8 zero = { 0 };
|
||||
v16u8 tmp[16];
|
||||
v8i16 mult0, mult1, rv2_0, rv2_1;
|
||||
v8i16 sum0_h = { 0 };
|
||||
v8i16 sum1_h = { 0 };
|
||||
v4i32 mul0 = { 0 };
|
||||
v4i32 mul1 = { 0 };
|
||||
v4i32 mul2 = { 0 };
|
||||
v4i32 mul3 = { 0 };
|
||||
v4i32 sum0_w, sum1_w, sum2_w, sum3_w;
|
||||
v4i32 add0, add1, add2, add3;
|
||||
const int16_t *rv2[16];
|
||||
|
||||
dst = LD_UB(dst_tmp);
|
||||
for (cnt = (col << 4), i = 0; i < 16; ++cnt) {
|
||||
rv2[i] = rv3 + ((cnt * 17) & 127);
|
||||
++i;
|
||||
}
|
||||
for (cnt = -8; cnt < 0; ++cnt) {
|
||||
ST_UB(dst, dst_tmp + cnt * pitch);
|
||||
}
|
||||
|
||||
dst = LD_UB((dst_tmp + (rows - 1) * pitch));
|
||||
for (cnt = rows; cnt < rows + 17; ++cnt) {
|
||||
ST_UB(dst, dst_tmp + cnt * pitch);
|
||||
}
|
||||
for (cnt = -8; cnt <= 6; ++cnt) {
|
||||
dst = LD_UB(dst_tmp + (cnt * pitch));
|
||||
UNPCK_UB_SH(dst, dst_r_h, dst_l_h);
|
||||
MUL2(dst_r_h, dst_r_h, dst_l_h, dst_l_h, mult0, mult1);
|
||||
mul0 += (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)mult0);
|
||||
mul1 += (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)mult0);
|
||||
mul2 += (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)mult1);
|
||||
mul3 += (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)mult1);
|
||||
ADD2(sum0_h, dst_r_h, sum1_h, dst_l_h, sum0_h, sum1_h);
|
||||
}
|
||||
|
||||
for (row = 0; row < (rows + 8); ++row) {
|
||||
for (i = 0; i < 8; ++i) {
|
||||
rv2_0[i] = *(rv2[i] + (row & 127));
|
||||
rv2_1[i] = *(rv2[i + 8] + (row & 127));
|
||||
}
|
||||
dst7 = LD_UB(dst_tmp + (7 * pitch));
|
||||
dst8 = LD_UB(dst_tmp - (8 * pitch));
|
||||
ILVRL_B2_UB(dst7, dst8, dst_r_b, dst_l_b);
|
||||
|
||||
HSUB_UB2_SH(dst_r_b, dst_l_b, sub_r, sub_l);
|
||||
UNPCK_SH_SW(sub_r, sub0, sub1);
|
||||
UNPCK_SH_SW(sub_l, sub2, sub3);
|
||||
sum0_h += sub_r;
|
||||
sum1_h += sub_l;
|
||||
|
||||
HADD_UB2_UH(dst_r_b, dst_l_b, add_r, add_l);
|
||||
|
||||
ILVRL_H2_SW(zero, add_r, add0, add1);
|
||||
ILVRL_H2_SW(zero, add_l, add2, add3);
|
||||
mul0 += add0 * sub0;
|
||||
mul1 += add1 * sub1;
|
||||
mul2 += add2 * sub2;
|
||||
mul3 += add3 * sub3;
|
||||
dst = LD_UB(dst_tmp);
|
||||
ILVRL_B2_SH(zero, dst, dst_r_h, dst_l_h);
|
||||
dst7 = (v16u8)((rv2_0 + sum0_h + dst_r_h) >> 4);
|
||||
dst8 = (v16u8)((rv2_1 + sum1_h + dst_l_h) >> 4);
|
||||
tmp[row & 15] = (v16u8)__msa_pckev_b((v16i8)dst8, (v16i8)dst7);
|
||||
|
||||
UNPCK_SH_SW(sum0_h, sum0_w, sum1_w);
|
||||
UNPCK_SH_SW(sum1_h, sum2_w, sum3_w);
|
||||
total0 = mul0 * __msa_ldi_w(15);
|
||||
total0 -= sum0_w * sum0_w;
|
||||
total1 = mul1 * __msa_ldi_w(15);
|
||||
total1 -= sum1_w * sum1_w;
|
||||
total2 = mul2 * __msa_ldi_w(15);
|
||||
total2 -= sum2_w * sum2_w;
|
||||
total3 = mul3 * __msa_ldi_w(15);
|
||||
total3 -= sum3_w * sum3_w;
|
||||
total0 = (total0 < flimit_vec);
|
||||
total1 = (total1 < flimit_vec);
|
||||
total2 = (total2 < flimit_vec);
|
||||
total3 = (total3 < flimit_vec);
|
||||
PCKEV_H2_SH(total1, total0, total3, total2, mask0, mask1);
|
||||
mask = __msa_pckev_b((v16i8)mask1, (v16i8)mask0);
|
||||
tmp[row & 15] = __msa_bmz_v(tmp[row & 15], dst, (v16u8)mask);
|
||||
|
||||
if (row >= 8) {
|
||||
ST_UB(tmp[(row - 8) & 15], (dst_tmp - 8 * pitch));
|
||||
}
|
||||
|
||||
dst_tmp += pitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,381 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_MIPS_FWD_TXFM_MSA_H_
|
||||
#define AOM_DSP_MIPS_FWD_TXFM_MSA_H_
|
||||
|
||||
#include "aom_dsp/mips/txfm_macros_msa.h"
|
||||
#include "aom_dsp/txfm_common.h"
|
||||
|
||||
#define LD_HADD(psrc, stride) \
|
||||
({ \
|
||||
v8i16 in0_m, in1_m, in2_m, in3_m, in4_m, in5_m, in6_m, in7_m; \
|
||||
v4i32 vec_w_m; \
|
||||
\
|
||||
LD_SH4((psrc), stride, in0_m, in1_m, in2_m, in3_m); \
|
||||
ADD2(in0_m, in1_m, in2_m, in3_m, in0_m, in2_m); \
|
||||
LD_SH4(((psrc) + 4 * stride), stride, in4_m, in5_m, in6_m, in7_m); \
|
||||
ADD4(in4_m, in5_m, in6_m, in7_m, in0_m, in2_m, in4_m, in6_m, in4_m, in6_m, \
|
||||
in0_m, in4_m); \
|
||||
in0_m += in4_m; \
|
||||
\
|
||||
vec_w_m = __msa_hadd_s_w(in0_m, in0_m); \
|
||||
HADD_SW_S32(vec_w_m); \
|
||||
})
|
||||
|
||||
#define AOM_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m; \
|
||||
v8i16 vec0_m, vec1_m, vec2_m, vec3_m; \
|
||||
v4i32 vec4_m, vec5_m, vec6_m, vec7_m; \
|
||||
v8i16 coeff_m = { \
|
||||
cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, -cospi_8_64, 0, 0, 0 \
|
||||
}; \
|
||||
\
|
||||
BUTTERFLY_4(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m); \
|
||||
ILVR_H2_SH(vec1_m, vec0_m, vec3_m, vec2_m, vec0_m, vec2_m); \
|
||||
SPLATI_H2_SH(coeff_m, 0, 1, cnst0_m, cnst1_m); \
|
||||
cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
|
||||
vec5_m = __msa_dotp_s_w(vec0_m, cnst1_m); \
|
||||
\
|
||||
SPLATI_H2_SH(coeff_m, 4, 3, cnst2_m, cnst3_m); \
|
||||
cnst2_m = __msa_ilvev_h(cnst3_m, cnst2_m); \
|
||||
vec7_m = __msa_dotp_s_w(vec2_m, cnst2_m); \
|
||||
\
|
||||
vec4_m = __msa_dotp_s_w(vec0_m, cnst0_m); \
|
||||
cnst2_m = __msa_splati_h(coeff_m, 2); \
|
||||
cnst2_m = __msa_ilvev_h(cnst2_m, cnst3_m); \
|
||||
vec6_m = __msa_dotp_s_w(vec2_m, cnst2_m); \
|
||||
\
|
||||
SRARI_W4_SW(vec4_m, vec5_m, vec6_m, vec7_m, DCT_CONST_BITS); \
|
||||
PCKEV_H4_SH(vec4_m, vec4_m, vec5_m, vec5_m, vec6_m, vec6_m, vec7_m, \
|
||||
vec7_m, out0, out2, out1, out3); \
|
||||
}
|
||||
|
||||
#define SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7) \
|
||||
{ \
|
||||
v8i16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
|
||||
\
|
||||
SRLI_H4_SH(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m, 15); \
|
||||
SRLI_H4_SH(in4, in5, in6, in7, vec4_m, vec5_m, vec6_m, vec7_m, 15); \
|
||||
AVE_SH4_SH(vec0_m, in0, vec1_m, in1, vec2_m, in2, vec3_m, in3, in0, in1, \
|
||||
in2, in3); \
|
||||
AVE_SH4_SH(vec4_m, in4, vec5_m, in5, vec6_m, in6, vec7_m, in7, in4, in5, \
|
||||
in6, in7); \
|
||||
}
|
||||
|
||||
#define AOM_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
|
||||
out3, out4, out5, out6, out7) \
|
||||
{ \
|
||||
v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m; \
|
||||
v8i16 s7_m, x0_m, x1_m, x2_m, x3_m; \
|
||||
v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, \
|
||||
cospi_4_64, cospi_28_64, cospi_12_64, cospi_20_64 }; \
|
||||
\
|
||||
/* FDCT stage1 */ \
|
||||
BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, s0_m, s1_m, s2_m, \
|
||||
s3_m, s4_m, s5_m, s6_m, s7_m); \
|
||||
BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m); \
|
||||
ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m); \
|
||||
ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m); \
|
||||
SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m); \
|
||||
x1_m = __msa_ilvev_h(x1_m, x0_m); \
|
||||
out4 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \
|
||||
\
|
||||
SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m); \
|
||||
x2_m = -x2_m; \
|
||||
x2_m = __msa_ilvev_h(x3_m, x2_m); \
|
||||
out6 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \
|
||||
\
|
||||
out0 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \
|
||||
x2_m = __msa_splati_h(coeff_m, 2); \
|
||||
x2_m = __msa_ilvev_h(x2_m, x3_m); \
|
||||
out2 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \
|
||||
\
|
||||
/* stage2 */ \
|
||||
ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m); \
|
||||
\
|
||||
s6_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \
|
||||
s5_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \
|
||||
\
|
||||
/* stage3 */ \
|
||||
BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m); \
|
||||
\
|
||||
/* stage4 */ \
|
||||
ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m); \
|
||||
ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m); \
|
||||
\
|
||||
SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m); \
|
||||
x1_m = __msa_ilvev_h(x0_m, x1_m); \
|
||||
out1 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m); \
|
||||
\
|
||||
SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m); \
|
||||
x2_m = __msa_ilvev_h(x3_m, x2_m); \
|
||||
out5 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \
|
||||
\
|
||||
x1_m = __msa_splati_h(coeff_m, 5); \
|
||||
x0_m = -x0_m; \
|
||||
x0_m = __msa_ilvev_h(x1_m, x0_m); \
|
||||
out7 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m); \
|
||||
\
|
||||
x2_m = __msa_splati_h(coeff_m, 6); \
|
||||
x3_m = -x3_m; \
|
||||
x2_m = __msa_ilvev_h(x2_m, x3_m); \
|
||||
out3 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \
|
||||
}
|
||||
|
||||
#define FDCT8x16_EVEN(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
|
||||
out2, out3, out4, out5, out6, out7) \
|
||||
{ \
|
||||
v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m; \
|
||||
v8i16 x0_m, x1_m, x2_m, x3_m; \
|
||||
v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, \
|
||||
cospi_4_64, cospi_28_64, cospi_12_64, cospi_20_64 }; \
|
||||
\
|
||||
/* FDCT stage1 */ \
|
||||
BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, s0_m, s1_m, s2_m, \
|
||||
s3_m, s4_m, s5_m, s6_m, s7_m); \
|
||||
BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m); \
|
||||
ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m); \
|
||||
ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m); \
|
||||
SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m); \
|
||||
x1_m = __msa_ilvev_h(x1_m, x0_m); \
|
||||
out4 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \
|
||||
\
|
||||
SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m); \
|
||||
x2_m = -x2_m; \
|
||||
x2_m = __msa_ilvev_h(x3_m, x2_m); \
|
||||
out6 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \
|
||||
\
|
||||
out0 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \
|
||||
x2_m = __msa_splati_h(coeff_m, 2); \
|
||||
x2_m = __msa_ilvev_h(x2_m, x3_m); \
|
||||
out2 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \
|
||||
\
|
||||
/* stage2 */ \
|
||||
ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m); \
|
||||
\
|
||||
s6_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \
|
||||
s5_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \
|
||||
\
|
||||
/* stage3 */ \
|
||||
BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m); \
|
||||
\
|
||||
/* stage4 */ \
|
||||
ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m); \
|
||||
ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m); \
|
||||
\
|
||||
SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m); \
|
||||
x1_m = __msa_ilvev_h(x0_m, x1_m); \
|
||||
out1 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m); \
|
||||
\
|
||||
SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m); \
|
||||
x2_m = __msa_ilvev_h(x3_m, x2_m); \
|
||||
out5 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \
|
||||
\
|
||||
x1_m = __msa_splati_h(coeff_m, 5); \
|
||||
x0_m = -x0_m; \
|
||||
x0_m = __msa_ilvev_h(x1_m, x0_m); \
|
||||
out7 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m); \
|
||||
\
|
||||
x2_m = __msa_splati_h(coeff_m, 6); \
|
||||
x3_m = -x3_m; \
|
||||
x2_m = __msa_ilvev_h(x2_m, x3_m); \
|
||||
out3 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \
|
||||
}
|
||||
|
||||
#define FDCT8x16_ODD(input0, input1, input2, input3, input4, input5, input6, \
|
||||
input7, out1, out3, out5, out7, out9, out11, out13, \
|
||||
out15) \
|
||||
{ \
|
||||
v8i16 stp21_m, stp22_m, stp23_m, stp24_m, stp25_m, stp26_m; \
|
||||
v8i16 stp30_m, stp31_m, stp32_m, stp33_m, stp34_m, stp35_m; \
|
||||
v8i16 stp36_m, stp37_m, vec0_m, vec1_m; \
|
||||
v8i16 vec2_m, vec3_m, vec4_m, vec5_m, vec6_m; \
|
||||
v8i16 cnst0_m, cnst1_m, cnst4_m, cnst5_m; \
|
||||
v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, \
|
||||
-cospi_8_64, -cospi_24_64, cospi_12_64, cospi_20_64 }; \
|
||||
v8i16 coeff1_m = { cospi_2_64, cospi_30_64, cospi_14_64, cospi_18_64, \
|
||||
cospi_10_64, cospi_22_64, cospi_6_64, cospi_26_64 }; \
|
||||
v8i16 coeff2_m = { \
|
||||
-cospi_2_64, -cospi_10_64, -cospi_18_64, -cospi_26_64, 0, 0, 0, 0 \
|
||||
}; \
|
||||
\
|
||||
/* stp 1 */ \
|
||||
ILVL_H2_SH(input2, input5, input3, input4, vec2_m, vec4_m); \
|
||||
ILVR_H2_SH(input2, input5, input3, input4, vec3_m, vec5_m); \
|
||||
\
|
||||
cnst4_m = __msa_splati_h(coeff_m, 0); \
|
||||
stp25_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst4_m); \
|
||||
\
|
||||
cnst5_m = __msa_splati_h(coeff_m, 1); \
|
||||
cnst5_m = __msa_ilvev_h(cnst5_m, cnst4_m); \
|
||||
stp22_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst5_m); \
|
||||
stp24_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst4_m); \
|
||||
stp23_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst5_m); \
|
||||
\
|
||||
/* stp2 */ \
|
||||
BUTTERFLY_4(input0, input1, stp22_m, stp23_m, stp30_m, stp31_m, stp32_m, \
|
||||
stp33_m); \
|
||||
BUTTERFLY_4(input7, input6, stp25_m, stp24_m, stp37_m, stp36_m, stp35_m, \
|
||||
stp34_m); \
|
||||
\
|
||||
ILVL_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec2_m, vec4_m); \
|
||||
ILVR_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec3_m, vec5_m); \
|
||||
\
|
||||
SPLATI_H2_SH(coeff_m, 2, 3, cnst0_m, cnst1_m); \
|
||||
cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \
|
||||
stp26_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
|
||||
\
|
||||
cnst0_m = __msa_splati_h(coeff_m, 4); \
|
||||
cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
|
||||
stp21_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
|
||||
\
|
||||
SPLATI_H2_SH(coeff_m, 5, 2, cnst0_m, cnst1_m); \
|
||||
cnst1_m = __msa_ilvev_h(cnst0_m, cnst1_m); \
|
||||
stp25_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m); \
|
||||
\
|
||||
cnst0_m = __msa_splati_h(coeff_m, 3); \
|
||||
cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
|
||||
stp22_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m); \
|
||||
\
|
||||
/* stp4 */ \
|
||||
BUTTERFLY_4(stp30_m, stp37_m, stp26_m, stp21_m, vec6_m, vec2_m, vec4_m, \
|
||||
vec5_m); \
|
||||
BUTTERFLY_4(stp33_m, stp34_m, stp25_m, stp22_m, stp21_m, stp23_m, stp24_m, \
|
||||
stp31_m); \
|
||||
\
|
||||
ILVRL_H2_SH(vec2_m, vec6_m, vec1_m, vec0_m); \
|
||||
SPLATI_H2_SH(coeff1_m, 0, 1, cnst0_m, cnst1_m); \
|
||||
cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \
|
||||
\
|
||||
out1 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
|
||||
\
|
||||
cnst0_m = __msa_splati_h(coeff2_m, 0); \
|
||||
cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
|
||||
out15 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
|
||||
\
|
||||
ILVRL_H2_SH(vec4_m, vec5_m, vec1_m, vec0_m); \
|
||||
SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
|
||||
cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
|
||||
\
|
||||
out9 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
|
||||
\
|
||||
cnst1_m = __msa_splati_h(coeff2_m, 2); \
|
||||
cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \
|
||||
out7 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
|
||||
\
|
||||
ILVRL_H2_SH(stp23_m, stp21_m, vec1_m, vec0_m); \
|
||||
SPLATI_H2_SH(coeff1_m, 4, 5, cnst0_m, cnst1_m); \
|
||||
cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \
|
||||
out5 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
|
||||
\
|
||||
cnst0_m = __msa_splati_h(coeff2_m, 1); \
|
||||
cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
|
||||
out11 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
|
||||
\
|
||||
ILVRL_H2_SH(stp24_m, stp31_m, vec1_m, vec0_m); \
|
||||
SPLATI_H2_SH(coeff1_m, 6, 7, cnst0_m, cnst1_m); \
|
||||
cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
|
||||
\
|
||||
out13 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
|
||||
\
|
||||
cnst1_m = __msa_splati_h(coeff2_m, 3); \
|
||||
cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \
|
||||
out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
|
||||
}
|
||||
|
||||
#define FDCT_POSTPROC_2V_NEG_H(vec0, vec1) \
|
||||
{ \
|
||||
v8i16 tp0_m, tp1_m; \
|
||||
v8i16 one_m = __msa_ldi_h(1); \
|
||||
\
|
||||
tp0_m = __msa_clti_s_h(vec0, 0); \
|
||||
tp1_m = __msa_clti_s_h(vec1, 0); \
|
||||
vec0 += 1; \
|
||||
vec1 += 1; \
|
||||
tp0_m = one_m & tp0_m; \
|
||||
tp1_m = one_m & tp1_m; \
|
||||
vec0 += tp0_m; \
|
||||
vec1 += tp1_m; \
|
||||
vec0 >>= 2; \
|
||||
vec1 >>= 2; \
|
||||
}
|
||||
|
||||
#define FDCT32_POSTPROC_NEG_W(vec) \
|
||||
{ \
|
||||
v4i32 temp_m; \
|
||||
v4i32 one_m = __msa_ldi_w(1); \
|
||||
\
|
||||
temp_m = __msa_clti_s_w(vec, 0); \
|
||||
vec += 1; \
|
||||
temp_m = one_m & temp_m; \
|
||||
vec += temp_m; \
|
||||
vec >>= 2; \
|
||||
}
|
||||
|
||||
#define FDCT32_POSTPROC_2V_POS_H(vec0, vec1) \
|
||||
{ \
|
||||
v8i16 tp0_m, tp1_m; \
|
||||
v8i16 one = __msa_ldi_h(1); \
|
||||
\
|
||||
tp0_m = __msa_clei_s_h(vec0, 0); \
|
||||
tp1_m = __msa_clei_s_h(vec1, 0); \
|
||||
tp0_m = (v8i16)__msa_xori_b((v16u8)tp0_m, 255); \
|
||||
tp1_m = (v8i16)__msa_xori_b((v16u8)tp1_m, 255); \
|
||||
vec0 += 1; \
|
||||
vec1 += 1; \
|
||||
tp0_m = one & tp0_m; \
|
||||
tp1_m = one & tp1_m; \
|
||||
vec0 += tp0_m; \
|
||||
vec1 += tp1_m; \
|
||||
vec0 >>= 2; \
|
||||
vec1 >>= 2; \
|
||||
}
|
||||
|
||||
#define DOTP_CONST_PAIR_W(reg0_left, reg1_left, reg0_right, reg1_right, \
|
||||
const0, const1, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v4i32 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m; \
|
||||
v2i64 tp0_m, tp1_m, tp2_m, tp3_m; \
|
||||
v4i32 k0_m = __msa_fill_w((int32_t)const0); \
|
||||
\
|
||||
s0_m = __msa_fill_w((int32_t)const1); \
|
||||
k0_m = __msa_ilvev_w(s0_m, k0_m); \
|
||||
\
|
||||
ILVRL_W2_SW(-reg1_left, reg0_left, s1_m, s0_m); \
|
||||
ILVRL_W2_SW(reg0_left, reg1_left, s3_m, s2_m); \
|
||||
ILVRL_W2_SW(-reg1_right, reg0_right, s5_m, s4_m); \
|
||||
ILVRL_W2_SW(reg0_right, reg1_right, s7_m, s6_m); \
|
||||
\
|
||||
DOTP_SW2_SD(s0_m, s1_m, k0_m, k0_m, tp0_m, tp1_m); \
|
||||
DOTP_SW2_SD(s4_m, s5_m, k0_m, k0_m, tp2_m, tp3_m); \
|
||||
tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS); \
|
||||
tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS); \
|
||||
tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS); \
|
||||
tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS); \
|
||||
out0 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m); \
|
||||
out1 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m); \
|
||||
\
|
||||
DOTP_SW2_SD(s2_m, s3_m, k0_m, k0_m, tp0_m, tp1_m); \
|
||||
DOTP_SW2_SD(s6_m, s7_m, k0_m, k0_m, tp2_m, tp3_m); \
|
||||
tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS); \
|
||||
tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS); \
|
||||
tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS); \
|
||||
tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS); \
|
||||
out2 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m); \
|
||||
out3 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m); \
|
||||
}
|
||||
|
||||
void fdct8x16_1d_column(const int16_t *input, int16_t *tmp_ptr,
|
||||
int32_t src_stride);
|
||||
void fdct16x8_1d_row(int16_t *input, int16_t *output);
|
||||
#endif // AOM_DSP_MIPS_FWD_TXFM_MSA_H_
|
||||
@@ -1,117 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include "aom_dsp/mips/inv_txfm_msa.h"
|
||||
|
||||
void aom_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst,
|
||||
int32_t dst_stride) {
|
||||
v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
|
||||
|
||||
/* load vector elements of 8x8 block */
|
||||
LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
|
||||
|
||||
/* rows transform */
|
||||
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
|
||||
in4, in5, in6, in7);
|
||||
/* 1D idct8x8 */
|
||||
AOM_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
|
||||
in4, in5, in6, in7);
|
||||
/* columns transform */
|
||||
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
|
||||
in4, in5, in6, in7);
|
||||
/* 1D idct8x8 */
|
||||
AOM_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
|
||||
in4, in5, in6, in7);
|
||||
/* final rounding (add 2^4, divide by 2^5) and shift */
|
||||
SRARI_H4_SH(in0, in1, in2, in3, 5);
|
||||
SRARI_H4_SH(in4, in5, in6, in7, 5);
|
||||
/* add block and store 8x8 */
|
||||
AOM_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
|
||||
dst += (4 * dst_stride);
|
||||
AOM_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
|
||||
}
|
||||
|
||||
void aom_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
|
||||
int32_t dst_stride) {
|
||||
v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
|
||||
v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3;
|
||||
v4i32 tmp0, tmp1, tmp2, tmp3;
|
||||
v8i16 zero = { 0 };
|
||||
|
||||
/* load vector elements of 8x8 block */
|
||||
LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
|
||||
TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
|
||||
|
||||
/* stage1 */
|
||||
ILVL_H2_SH(in3, in0, in2, in1, s0, s1);
|
||||
k0 = AOM_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
|
||||
k1 = AOM_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
|
||||
k2 = AOM_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
|
||||
k3 = AOM_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
|
||||
DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3);
|
||||
SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, DCT_CONST_BITS);
|
||||
PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1);
|
||||
PCKEV_H2_SH(zero, tmp2, zero, tmp3, s2, s3);
|
||||
BUTTERFLY_4(s0, s1, s3, s2, s4, s7, s6, s5);
|
||||
|
||||
/* stage2 */
|
||||
ILVR_H2_SH(in3, in1, in2, in0, s1, s0);
|
||||
k0 = AOM_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
|
||||
k1 = AOM_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
|
||||
k2 = AOM_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
|
||||
k3 = AOM_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
|
||||
DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3);
|
||||
SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, DCT_CONST_BITS);
|
||||
PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1);
|
||||
PCKEV_H2_SH(zero, tmp2, zero, tmp3, s2, s3);
|
||||
BUTTERFLY_4(s0, s1, s2, s3, m0, m1, m2, m3);
|
||||
|
||||
/* stage3 */
|
||||
s0 = __msa_ilvr_h(s6, s5);
|
||||
|
||||
k1 = AOM_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
|
||||
DOTP_SH2_SW(s0, s0, k1, k0, tmp0, tmp1);
|
||||
SRARI_W2_SW(tmp0, tmp1, DCT_CONST_BITS);
|
||||
PCKEV_H2_SH(zero, tmp0, zero, tmp1, s2, s3);
|
||||
|
||||
/* stage4 */
|
||||
BUTTERFLY_8(m0, m1, m2, m3, s4, s2, s3, s7, in0, in1, in2, in3, in4, in5, in6,
|
||||
in7);
|
||||
TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
|
||||
in4, in5, in6, in7);
|
||||
AOM_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
|
||||
in4, in5, in6, in7);
|
||||
|
||||
/* final rounding (add 2^4, divide by 2^5) and shift */
|
||||
SRARI_H4_SH(in0, in1, in2, in3, 5);
|
||||
SRARI_H4_SH(in4, in5, in6, in7, 5);
|
||||
|
||||
/* add block and store 8x8 */
|
||||
AOM_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
|
||||
dst += (4 * dst_stride);
|
||||
AOM_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
|
||||
}
|
||||
|
||||
void aom_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst,
|
||||
int32_t dst_stride) {
|
||||
int16_t out;
|
||||
int32_t val;
|
||||
v8i16 vec;
|
||||
|
||||
out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);
|
||||
out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);
|
||||
val = ROUND_POWER_OF_TWO(out, 5);
|
||||
vec = __msa_fill_h(val);
|
||||
|
||||
AOM_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
|
||||
dst += (4 * dst_stride);
|
||||
AOM_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
|
||||
}
|
||||
@@ -1,80 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_MIPS_INV_TXFM_DSPR2_H_
|
||||
#define AOM_DSP_MIPS_INV_TXFM_DSPR2_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/inv_txfm.h"
|
||||
#include "aom_dsp/mips/common_dspr2.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if HAVE_DSPR2
|
||||
#define DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input) \
|
||||
({ \
|
||||
\
|
||||
int32_t tmp, out; \
|
||||
int dct_cost_rounding = DCT_CONST_ROUNDING; \
|
||||
int in = input; \
|
||||
\
|
||||
__asm__ __volatile__(/* out = dct_const_round_shift(dc * cospi_16_64); */ \
|
||||
"mtlo %[dct_cost_rounding], $ac1 " \
|
||||
" \n\t" \
|
||||
"mthi $zero, $ac1 " \
|
||||
" \n\t" \
|
||||
"madd $ac1, %[in], " \
|
||||
"%[cospi_16_64] \n\t" \
|
||||
"extp %[tmp], $ac1, " \
|
||||
"31 \n\t" \
|
||||
\
|
||||
/* out = dct_const_round_shift(out * cospi_16_64); */ \
|
||||
"mtlo %[dct_cost_rounding], $ac2 " \
|
||||
" \n\t" \
|
||||
"mthi $zero, $ac2 " \
|
||||
" \n\t" \
|
||||
"madd $ac2, %[tmp], " \
|
||||
"%[cospi_16_64] \n\t" \
|
||||
"extp %[out], $ac2, " \
|
||||
"31 \n\t" \
|
||||
\
|
||||
: [tmp] "=&r"(tmp), [out] "=r"(out) \
|
||||
: [in] "r"(in), \
|
||||
[dct_cost_rounding] "r"(dct_cost_rounding), \
|
||||
[cospi_16_64] "r"(cospi_16_64)); \
|
||||
out; \
|
||||
})
|
||||
|
||||
void aom_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
|
||||
int dest_stride);
|
||||
void aom_idct4_rows_dspr2(const int16_t *input, int16_t *output);
|
||||
void aom_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
|
||||
int dest_stride);
|
||||
void iadst4_dspr2(const int16_t *input, int16_t *output);
|
||||
void idct8_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows);
|
||||
void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
|
||||
int dest_stride);
|
||||
void iadst8_dspr2(const int16_t *input, int16_t *output);
|
||||
void idct16_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows);
|
||||
void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int dest_stride);
|
||||
void iadst16_dspr2(const int16_t *input, int16_t *output);
|
||||
|
||||
#endif // #if HAVE_DSPR2
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_MIPS_INV_TXFM_DSPR2_H_
|
||||
@@ -1,412 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_MIPS_INV_TXFM_MSA_H_
|
||||
#define AOM_DSP_MIPS_INV_TXFM_MSA_H_
|
||||
|
||||
#include "aom_dsp/mips/macros_msa.h"
|
||||
#include "aom_dsp/mips/txfm_macros_msa.h"
|
||||
#include "aom_dsp/txfm_common.h"
|
||||
|
||||
#define AOM_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
|
||||
out3, out4, out5, out6, out7) \
|
||||
{ \
|
||||
v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \
|
||||
v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \
|
||||
v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \
|
||||
cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \
|
||||
v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \
|
||||
cospi_24_64, -cospi_24_64, 0, 0 }; \
|
||||
\
|
||||
SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \
|
||||
cnst2_m = -cnst0_m; \
|
||||
ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
|
||||
SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \
|
||||
cnst4_m = -cnst2_m; \
|
||||
ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
|
||||
\
|
||||
ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \
|
||||
ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \
|
||||
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \
|
||||
cnst2_m, cnst3_m, in7, in0, in4, in3); \
|
||||
\
|
||||
SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \
|
||||
cnst2_m = -cnst0_m; \
|
||||
ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
|
||||
SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \
|
||||
cnst4_m = -cnst2_m; \
|
||||
ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
|
||||
\
|
||||
ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
|
||||
ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
|
||||
\
|
||||
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \
|
||||
cnst2_m, cnst3_m, in5, in2, in6, in1); \
|
||||
BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \
|
||||
out7 = -s0_m; \
|
||||
out0 = s1_m; \
|
||||
\
|
||||
SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \
|
||||
\
|
||||
ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \
|
||||
cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
|
||||
cnst1_m = cnst0_m; \
|
||||
\
|
||||
ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \
|
||||
ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
|
||||
DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst2_m, \
|
||||
cnst3_m, cnst1_m, out1, out6, s0_m, s1_m); \
|
||||
\
|
||||
SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
|
||||
cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
|
||||
\
|
||||
ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
|
||||
ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \
|
||||
out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
|
||||
out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
|
||||
out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
|
||||
out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
|
||||
\
|
||||
out1 = -out1; \
|
||||
out3 = -out3; \
|
||||
out5 = -out5; \
|
||||
}
|
||||
|
||||
#define AOM_SET_COSPI_PAIR(c0_h, c1_h) \
|
||||
({ \
|
||||
v8i16 out0_m, r0_m, r1_m; \
|
||||
\
|
||||
r0_m = __msa_fill_h(c0_h); \
|
||||
r1_m = __msa_fill_h(c1_h); \
|
||||
out0_m = __msa_ilvev_h(r1_m, r0_m); \
|
||||
\
|
||||
out0_m; \
|
||||
})
|
||||
|
||||
#define AOM_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3) \
|
||||
{ \
|
||||
uint8_t *dst_m = (uint8_t *)(dst); \
|
||||
v16u8 dst0_m, dst1_m, dst2_m, dst3_m; \
|
||||
v16i8 tmp0_m, tmp1_m; \
|
||||
v16i8 zero_m = { 0 }; \
|
||||
v8i16 res0_m, res1_m, res2_m, res3_m; \
|
||||
\
|
||||
LD_UB4(dst_m, dst_stride, dst0_m, dst1_m, dst2_m, dst3_m); \
|
||||
ILVR_B4_SH(zero_m, dst0_m, zero_m, dst1_m, zero_m, dst2_m, zero_m, dst3_m, \
|
||||
res0_m, res1_m, res2_m, res3_m); \
|
||||
ADD4(res0_m, in0, res1_m, in1, res2_m, in2, res3_m, in3, res0_m, res1_m, \
|
||||
res2_m, res3_m); \
|
||||
CLIP_SH4_0_255(res0_m, res1_m, res2_m, res3_m); \
|
||||
PCKEV_B2_SB(res1_m, res0_m, res3_m, res2_m, tmp0_m, tmp1_m); \
|
||||
ST8x4_UB(tmp0_m, tmp1_m, dst_m, dst_stride); \
|
||||
}
|
||||
|
||||
#define AOM_IDCT4x4(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v8i16 c0_m, c1_m, c2_m, c3_m; \
|
||||
v8i16 step0_m, step1_m; \
|
||||
v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
|
||||
\
|
||||
c0_m = AOM_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); \
|
||||
c1_m = AOM_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); \
|
||||
step0_m = __msa_ilvr_h(in2, in0); \
|
||||
DOTP_SH2_SW(step0_m, step0_m, c0_m, c1_m, tmp0_m, tmp1_m); \
|
||||
\
|
||||
c2_m = AOM_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
|
||||
c3_m = AOM_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
|
||||
step1_m = __msa_ilvr_h(in3, in1); \
|
||||
DOTP_SH2_SW(step1_m, step1_m, c2_m, c3_m, tmp2_m, tmp3_m); \
|
||||
SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
|
||||
\
|
||||
PCKEV_H2_SW(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp0_m, tmp2_m); \
|
||||
SLDI_B2_0_SW(tmp0_m, tmp2_m, tmp1_m, tmp3_m, 8); \
|
||||
BUTTERFLY_4((v8i16)tmp0_m, (v8i16)tmp1_m, (v8i16)tmp2_m, (v8i16)tmp3_m, \
|
||||
out0, out1, out2, out3); \
|
||||
}
|
||||
|
||||
#define AOM_IADST4x4(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v8i16 res0_m, res1_m, c0_m, c1_m; \
|
||||
v8i16 k1_m, k2_m, k3_m, k4_m; \
|
||||
v8i16 zero_m = { 0 }; \
|
||||
v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
|
||||
v4i32 int0_m, int1_m, int2_m, int3_m; \
|
||||
v8i16 mask_m = { sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9, \
|
||||
-sinpi_1_9, -sinpi_2_9, -sinpi_3_9, -sinpi_4_9 }; \
|
||||
\
|
||||
SPLATI_H4_SH(mask_m, 3, 0, 1, 2, c0_m, c1_m, k1_m, k2_m); \
|
||||
ILVEV_H2_SH(c0_m, c1_m, k1_m, k2_m, c0_m, c1_m); \
|
||||
ILVR_H2_SH(in0, in2, in1, in3, res0_m, res1_m); \
|
||||
DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp2_m, tmp1_m); \
|
||||
int0_m = tmp2_m + tmp1_m; \
|
||||
\
|
||||
SPLATI_H2_SH(mask_m, 4, 7, k4_m, k3_m); \
|
||||
ILVEV_H2_SH(k4_m, k1_m, k3_m, k2_m, c0_m, c1_m); \
|
||||
DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp0_m, tmp1_m); \
|
||||
int1_m = tmp0_m + tmp1_m; \
|
||||
\
|
||||
c0_m = __msa_splati_h(mask_m, 6); \
|
||||
ILVL_H2_SH(k2_m, c0_m, zero_m, k2_m, c0_m, c1_m); \
|
||||
ILVR_H2_SH(in0, in2, in1, in3, res0_m, res1_m); \
|
||||
DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp0_m, tmp1_m); \
|
||||
int2_m = tmp0_m + tmp1_m; \
|
||||
\
|
||||
c0_m = __msa_splati_h(mask_m, 6); \
|
||||
c0_m = __msa_ilvev_h(c0_m, k1_m); \
|
||||
\
|
||||
res0_m = __msa_ilvr_h((in1), (in3)); \
|
||||
tmp0_m = __msa_dotp_s_w(res0_m, c0_m); \
|
||||
int3_m = tmp2_m + tmp0_m; \
|
||||
\
|
||||
res0_m = __msa_ilvr_h((in2), (in3)); \
|
||||
c1_m = __msa_ilvev_h(k4_m, k3_m); \
|
||||
\
|
||||
tmp2_m = __msa_dotp_s_w(res0_m, c1_m); \
|
||||
res1_m = __msa_ilvr_h((in0), (in2)); \
|
||||
c1_m = __msa_ilvev_h(k1_m, zero_m); \
|
||||
\
|
||||
tmp3_m = __msa_dotp_s_w(res1_m, c1_m); \
|
||||
int3_m += tmp2_m; \
|
||||
int3_m += tmp3_m; \
|
||||
\
|
||||
SRARI_W4_SW(int0_m, int1_m, int2_m, int3_m, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SH(int0_m, int0_m, int1_m, int1_m, out0, out1); \
|
||||
PCKEV_H2_SH(int2_m, int2_m, int3_m, int3_m, out2, out3); \
|
||||
}
|
||||
|
||||
#define AV1_SET_CONST_PAIR(mask_h, idx1_h, idx2_h) \
|
||||
({ \
|
||||
v8i16 c0_m, c1_m; \
|
||||
\
|
||||
SPLATI_H2_SH(mask_h, idx1_h, idx2_h, c0_m, c1_m); \
|
||||
c0_m = __msa_ilvev_h(c1_m, c0_m); \
|
||||
\
|
||||
c0_m; \
|
||||
})
|
||||
|
||||
/* multiply and add macro */
|
||||
#define AV1_MADD(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3, out0, out1, \
|
||||
out2, out3) \
|
||||
{ \
|
||||
v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \
|
||||
v4i32 tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd; \
|
||||
\
|
||||
ILVRL_H2_SH(inp1, inp0, madd_s1_m, madd_s0_m); \
|
||||
ILVRL_H2_SH(inp3, inp2, madd_s3_m, madd_s2_m); \
|
||||
DOTP_SH4_SW(madd_s1_m, madd_s0_m, madd_s1_m, madd_s0_m, cst0, cst0, cst1, \
|
||||
cst1, tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd); \
|
||||
SRARI_W4_SW(tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SH(tmp1_madd, tmp0_madd, tmp3_madd, tmp2_madd, out0, out1); \
|
||||
DOTP_SH4_SW(madd_s3_m, madd_s2_m, madd_s3_m, madd_s2_m, cst2, cst2, cst3, \
|
||||
cst3, tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd); \
|
||||
SRARI_W4_SW(tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SH(tmp1_madd, tmp0_madd, tmp3_madd, tmp2_madd, out2, out3); \
|
||||
}
|
||||
|
||||
/* idct 8x8 macro */
|
||||
#define AOM_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
|
||||
out2, out3, out4, out5, out6, out7) \
|
||||
{ \
|
||||
v8i16 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m; \
|
||||
v8i16 k0_m, k1_m, k2_m, k3_m, res0_m, res1_m, res2_m, res3_m; \
|
||||
v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
|
||||
v8i16 mask_m = { cospi_28_64, cospi_4_64, cospi_20_64, cospi_12_64, \
|
||||
cospi_16_64, -cospi_4_64, -cospi_20_64, -cospi_16_64 }; \
|
||||
\
|
||||
k0_m = AV1_SET_CONST_PAIR(mask_m, 0, 5); \
|
||||
k1_m = AV1_SET_CONST_PAIR(mask_m, 1, 0); \
|
||||
k2_m = AV1_SET_CONST_PAIR(mask_m, 6, 3); \
|
||||
k3_m = AV1_SET_CONST_PAIR(mask_m, 3, 2); \
|
||||
AV1_MADD(in1, in7, in3, in5, k0_m, k1_m, k2_m, k3_m, in1, in7, in3, in5); \
|
||||
SUB2(in1, in3, in7, in5, res0_m, res1_m); \
|
||||
k0_m = AV1_SET_CONST_PAIR(mask_m, 4, 7); \
|
||||
k1_m = __msa_splati_h(mask_m, 4); \
|
||||
\
|
||||
ILVRL_H2_SH(res0_m, res1_m, res2_m, res3_m); \
|
||||
DOTP_SH4_SW(res2_m, res3_m, res2_m, res3_m, k0_m, k0_m, k1_m, k1_m, \
|
||||
tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
|
||||
SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
|
||||
tp4_m = in1 + in3; \
|
||||
PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tp5_m, tp6_m); \
|
||||
tp7_m = in7 + in5; \
|
||||
k2_m = AOM_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
|
||||
k3_m = AOM_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
|
||||
AV1_MADD(in0, in4, in2, in6, k1_m, k0_m, k2_m, k3_m, in0, in4, in2, in6); \
|
||||
BUTTERFLY_4(in0, in4, in2, in6, tp0_m, tp1_m, tp2_m, tp3_m); \
|
||||
BUTTERFLY_8(tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m, out0, \
|
||||
out1, out2, out3, out4, out5, out6, out7); \
|
||||
}
|
||||
|
||||
#define AV1_IADST8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
|
||||
out2, out3, out4, out5, out6, out7) \
|
||||
{ \
|
||||
v4i32 r0_m, r1_m, r2_m, r3_m, r4_m, r5_m, r6_m, r7_m; \
|
||||
v4i32 m0_m, m1_m, m2_m, m3_m, t0_m, t1_m; \
|
||||
v8i16 res0_m, res1_m, res2_m, res3_m, k0_m, k1_m, in_s0, in_s1; \
|
||||
v8i16 mask1_m = { cospi_2_64, cospi_30_64, -cospi_2_64, cospi_10_64, \
|
||||
cospi_22_64, -cospi_10_64, cospi_18_64, cospi_14_64 }; \
|
||||
v8i16 mask2_m = { cospi_14_64, -cospi_18_64, cospi_26_64, cospi_6_64, \
|
||||
-cospi_26_64, cospi_8_64, cospi_24_64, -cospi_8_64 }; \
|
||||
v8i16 mask3_m = { \
|
||||
-cospi_24_64, cospi_8_64, cospi_16_64, -cospi_16_64, 0, 0, 0, 0 \
|
||||
}; \
|
||||
\
|
||||
k0_m = AV1_SET_CONST_PAIR(mask1_m, 0, 1); \
|
||||
k1_m = AV1_SET_CONST_PAIR(mask1_m, 1, 2); \
|
||||
ILVRL_H2_SH(in1, in0, in_s1, in_s0); \
|
||||
DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r0_m, \
|
||||
r1_m, r2_m, r3_m); \
|
||||
k0_m = AV1_SET_CONST_PAIR(mask1_m, 6, 7); \
|
||||
k1_m = AV1_SET_CONST_PAIR(mask2_m, 0, 1); \
|
||||
ILVRL_H2_SH(in5, in4, in_s1, in_s0); \
|
||||
DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r4_m, \
|
||||
r5_m, r6_m, r7_m); \
|
||||
ADD4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, m0_m, m1_m, m2_m, \
|
||||
m3_m); \
|
||||
SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, res0_m, res1_m); \
|
||||
SUB4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, m0_m, m1_m, m2_m, \
|
||||
m3_m); \
|
||||
SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SW(m1_m, m0_m, m3_m, m2_m, t0_m, t1_m); \
|
||||
k0_m = AV1_SET_CONST_PAIR(mask1_m, 3, 4); \
|
||||
k1_m = AV1_SET_CONST_PAIR(mask1_m, 4, 5); \
|
||||
ILVRL_H2_SH(in3, in2, in_s1, in_s0); \
|
||||
DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r0_m, \
|
||||
r1_m, r2_m, r3_m); \
|
||||
k0_m = AV1_SET_CONST_PAIR(mask2_m, 2, 3); \
|
||||
k1_m = AV1_SET_CONST_PAIR(mask2_m, 3, 4); \
|
||||
ILVRL_H2_SH(in7, in6, in_s1, in_s0); \
|
||||
DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r4_m, \
|
||||
r5_m, r6_m, r7_m); \
|
||||
ADD4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, m0_m, m1_m, m2_m, \
|
||||
m3_m); \
|
||||
SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, res2_m, res3_m); \
|
||||
SUB4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, m0_m, m1_m, m2_m, \
|
||||
m3_m); \
|
||||
SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SW(m1_m, m0_m, m3_m, m2_m, r2_m, r3_m); \
|
||||
ILVRL_H2_SW(r3_m, r2_m, m2_m, m3_m); \
|
||||
BUTTERFLY_4(res0_m, res1_m, res3_m, res2_m, out0, in7, in4, in3); \
|
||||
k0_m = AV1_SET_CONST_PAIR(mask2_m, 5, 6); \
|
||||
k1_m = AV1_SET_CONST_PAIR(mask2_m, 6, 7); \
|
||||
ILVRL_H2_SH(t1_m, t0_m, in_s1, in_s0); \
|
||||
DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r0_m, \
|
||||
r1_m, r2_m, r3_m); \
|
||||
k1_m = AV1_SET_CONST_PAIR(mask3_m, 0, 1); \
|
||||
DOTP_SH4_SW(m2_m, m3_m, m2_m, m3_m, k0_m, k0_m, k1_m, k1_m, r4_m, r5_m, \
|
||||
r6_m, r7_m); \
|
||||
ADD4(r0_m, r6_m, r1_m, r7_m, r2_m, r4_m, r3_m, r5_m, m0_m, m1_m, m2_m, \
|
||||
m3_m); \
|
||||
SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in1, out6); \
|
||||
SUB4(r0_m, r6_m, r1_m, r7_m, r2_m, r4_m, r3_m, r5_m, m0_m, m1_m, m2_m, \
|
||||
m3_m); \
|
||||
SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in2, in5); \
|
||||
k0_m = AV1_SET_CONST_PAIR(mask3_m, 2, 2); \
|
||||
k1_m = AV1_SET_CONST_PAIR(mask3_m, 2, 3); \
|
||||
ILVRL_H2_SH(in4, in3, in_s1, in_s0); \
|
||||
DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, m0_m, \
|
||||
m1_m, m2_m, m3_m); \
|
||||
SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in3, out4); \
|
||||
ILVRL_H2_SW(in5, in2, m2_m, m3_m); \
|
||||
DOTP_SH4_SW(m2_m, m3_m, m2_m, m3_m, k0_m, k0_m, k1_m, k1_m, m0_m, m1_m, \
|
||||
m2_m, m3_m); \
|
||||
SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, out2, in5); \
|
||||
\
|
||||
out1 = -in1; \
|
||||
out3 = -in3; \
|
||||
out5 = -in5; \
|
||||
out7 = -in7; \
|
||||
}
|
||||
|
||||
#define AOM_IADST8x16_1D(r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, \
|
||||
r12, r13, r14, r15, out0, out1, out2, out3, out4, \
|
||||
out5, out6, out7, out8, out9, out10, out11, out12, \
|
||||
out13, out14, out15) \
|
||||
{ \
|
||||
v8i16 g0_m, g1_m, g2_m, g3_m, g4_m, g5_m, g6_m, g7_m; \
|
||||
v8i16 g8_m, g9_m, g10_m, g11_m, g12_m, g13_m, g14_m, g15_m; \
|
||||
v8i16 h0_m, h1_m, h2_m, h3_m, h4_m, h5_m, h6_m, h7_m; \
|
||||
v8i16 h8_m, h9_m, h10_m, h11_m; \
|
||||
v8i16 k0_m, k1_m, k2_m, k3_m; \
|
||||
\
|
||||
/* stage 1 */ \
|
||||
k0_m = AOM_SET_COSPI_PAIR(cospi_1_64, cospi_31_64); \
|
||||
k1_m = AOM_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64); \
|
||||
k2_m = AOM_SET_COSPI_PAIR(cospi_17_64, cospi_15_64); \
|
||||
k3_m = AOM_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64); \
|
||||
MADD_BF(r15, r0, r7, r8, k0_m, k1_m, k2_m, k3_m, g0_m, g1_m, g2_m, g3_m); \
|
||||
k0_m = AOM_SET_COSPI_PAIR(cospi_5_64, cospi_27_64); \
|
||||
k1_m = AOM_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64); \
|
||||
k2_m = AOM_SET_COSPI_PAIR(cospi_21_64, cospi_11_64); \
|
||||
k3_m = AOM_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64); \
|
||||
MADD_BF(r13, r2, r5, r10, k0_m, k1_m, k2_m, k3_m, g4_m, g5_m, g6_m, g7_m); \
|
||||
k0_m = AOM_SET_COSPI_PAIR(cospi_9_64, cospi_23_64); \
|
||||
k1_m = AOM_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64); \
|
||||
k2_m = AOM_SET_COSPI_PAIR(cospi_25_64, cospi_7_64); \
|
||||
k3_m = AOM_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64); \
|
||||
MADD_BF(r11, r4, r3, r12, k0_m, k1_m, k2_m, k3_m, g8_m, g9_m, g10_m, \
|
||||
g11_m); \
|
||||
k0_m = AOM_SET_COSPI_PAIR(cospi_13_64, cospi_19_64); \
|
||||
k1_m = AOM_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64); \
|
||||
k2_m = AOM_SET_COSPI_PAIR(cospi_29_64, cospi_3_64); \
|
||||
k3_m = AOM_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64); \
|
||||
MADD_BF(r9, r6, r1, r14, k0_m, k1_m, k2_m, k3_m, g12_m, g13_m, g14_m, \
|
||||
g15_m); \
|
||||
\
|
||||
/* stage 2 */ \
|
||||
k0_m = AOM_SET_COSPI_PAIR(cospi_4_64, cospi_28_64); \
|
||||
k1_m = AOM_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64); \
|
||||
k2_m = AOM_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64); \
|
||||
MADD_BF(g1_m, g3_m, g9_m, g11_m, k0_m, k1_m, k2_m, k0_m, h0_m, h1_m, h2_m, \
|
||||
h3_m); \
|
||||
k0_m = AOM_SET_COSPI_PAIR(cospi_12_64, cospi_20_64); \
|
||||
k1_m = AOM_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64); \
|
||||
k2_m = AOM_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64); \
|
||||
MADD_BF(g7_m, g5_m, g15_m, g13_m, k0_m, k1_m, k2_m, k0_m, h4_m, h5_m, \
|
||||
h6_m, h7_m); \
|
||||
BUTTERFLY_4(h0_m, h2_m, h6_m, h4_m, out8, out9, out11, out10); \
|
||||
BUTTERFLY_8(g0_m, g2_m, g4_m, g6_m, g14_m, g12_m, g10_m, g8_m, h8_m, h9_m, \
|
||||
h10_m, h11_m, h6_m, h4_m, h2_m, h0_m); \
|
||||
\
|
||||
/* stage 3 */ \
|
||||
BUTTERFLY_4(h8_m, h9_m, h11_m, h10_m, out0, out1, h11_m, h10_m); \
|
||||
k0_m = AOM_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
|
||||
k1_m = AOM_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
|
||||
k2_m = AOM_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64); \
|
||||
MADD_BF(h0_m, h2_m, h4_m, h6_m, k0_m, k1_m, k2_m, k0_m, out4, out6, out5, \
|
||||
out7); \
|
||||
MADD_BF(h1_m, h3_m, h5_m, h7_m, k0_m, k1_m, k2_m, k0_m, out12, out14, \
|
||||
out13, out15); \
|
||||
\
|
||||
/* stage 4 */ \
|
||||
k0_m = AOM_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); \
|
||||
k1_m = AOM_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64); \
|
||||
k2_m = AOM_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); \
|
||||
k3_m = AOM_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64); \
|
||||
MADD_SHORT(h10_m, h11_m, k1_m, k2_m, out2, out3); \
|
||||
MADD_SHORT(out6, out7, k0_m, k3_m, out6, out7); \
|
||||
MADD_SHORT(out10, out11, k0_m, k3_m, out10, out11); \
|
||||
MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15); \
|
||||
}
|
||||
|
||||
void aom_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
|
||||
int32_t dst_stride);
|
||||
void aom_idct16_1d_rows_msa(const int16_t *input, int16_t *output);
|
||||
void aom_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
|
||||
int32_t dst_stride);
|
||||
void aom_iadst16_1d_rows_msa(const int16_t *input, int16_t *output);
|
||||
#endif // AOM_DSP_MIPS_INV_TXFM_MSA_H_
|
||||
@@ -1,327 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/mips/common_dspr2.h"
|
||||
#include "aom_dsp/mips/loopfilter_filters_dspr2.h"
|
||||
#include "aom_dsp/mips/loopfilter_macros_dspr2.h"
|
||||
#include "aom_dsp/mips/loopfilter_masks_dspr2.h"
|
||||
#include "aom_mem/aom_mem.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
void aom_lpf_horizontal_4_dspr2(unsigned char *s, int pitch,
|
||||
const uint8_t *blimit, const uint8_t *limit,
|
||||
const uint8_t *thresh) {
|
||||
uint8_t i;
|
||||
uint32_t mask;
|
||||
uint32_t hev;
|
||||
uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
|
||||
uint8_t *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6;
|
||||
uint32_t thresh_vec, flimit_vec, limit_vec;
|
||||
uint32_t uflimit, ulimit, uthresh;
|
||||
|
||||
uflimit = *blimit;
|
||||
ulimit = *limit;
|
||||
uthresh = *thresh;
|
||||
|
||||
/* create quad-byte */
|
||||
__asm__ __volatile__(
|
||||
"replv.qb %[thresh_vec], %[uthresh] \n\t"
|
||||
"replv.qb %[flimit_vec], %[uflimit] \n\t"
|
||||
"replv.qb %[limit_vec], %[ulimit] \n\t"
|
||||
|
||||
: [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
|
||||
[limit_vec] "=r"(limit_vec)
|
||||
: [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
|
||||
|
||||
/* prefetch data for store */
|
||||
prefetch_store(s);
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
of 8 bit simd instructions. */
|
||||
for (i = 0; i < 2; i++) {
|
||||
sm1 = s - (pitch << 2);
|
||||
s0 = sm1 + pitch;
|
||||
s1 = s0 + pitch;
|
||||
s2 = s - pitch;
|
||||
s3 = s;
|
||||
s4 = s + pitch;
|
||||
s5 = s4 + pitch;
|
||||
s6 = s5 + pitch;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"lw %[p1], (%[s1]) \n\t"
|
||||
"lw %[p2], (%[s2]) \n\t"
|
||||
"lw %[p3], (%[s3]) \n\t"
|
||||
"lw %[p4], (%[s4]) \n\t"
|
||||
|
||||
: [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4)
|
||||
: [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
|
||||
|
||||
/* if (p1 - p4 == 0) and (p2 - p3 == 0)
|
||||
mask will be zero and filtering is not needed */
|
||||
if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
|
||||
__asm__ __volatile__(
|
||||
"lw %[pm1], (%[sm1]) \n\t"
|
||||
"lw %[p0], (%[s0]) \n\t"
|
||||
"lw %[p5], (%[s5]) \n\t"
|
||||
"lw %[p6], (%[s6]) \n\t"
|
||||
|
||||
: [pm1] "=&r"(pm1), [p0] "=&r"(p0), [p5] "=&r"(p5), [p6] "=&r"(p6)
|
||||
: [sm1] "r"(sm1), [s0] "r"(s0), [s5] "r"(s5), [s6] "r"(s6));
|
||||
|
||||
filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, p0, p3, p4, p5,
|
||||
p6, thresh_vec, &hev, &mask);
|
||||
|
||||
/* if mask == 0 do filtering is not needed */
|
||||
if (mask) {
|
||||
/* filtering */
|
||||
filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"sw %[p1], (%[s1]) \n\t"
|
||||
"sw %[p2], (%[s2]) \n\t"
|
||||
"sw %[p3], (%[s3]) \n\t"
|
||||
"sw %[p4], (%[s4]) \n\t"
|
||||
|
||||
:
|
||||
: [p1] "r"(p1), [p2] "r"(p2), [p3] "r"(p3), [p4] "r"(p4),
|
||||
[s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
|
||||
}
|
||||
}
|
||||
|
||||
s = s + 4;
|
||||
}
|
||||
}
|
||||
|
||||
void aom_lpf_vertical_4_dspr2(unsigned char *s, int pitch,
|
||||
const uint8_t *blimit, const uint8_t *limit,
|
||||
const uint8_t *thresh) {
|
||||
uint8_t i;
|
||||
uint32_t mask, hev;
|
||||
uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
|
||||
uint8_t *s1, *s2, *s3, *s4;
|
||||
uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
|
||||
uint32_t thresh_vec, flimit_vec, limit_vec;
|
||||
uint32_t uflimit, ulimit, uthresh;
|
||||
|
||||
uflimit = *blimit;
|
||||
ulimit = *limit;
|
||||
uthresh = *thresh;
|
||||
|
||||
/* create quad-byte */
|
||||
__asm__ __volatile__(
|
||||
"replv.qb %[thresh_vec], %[uthresh] \n\t"
|
||||
"replv.qb %[flimit_vec], %[uflimit] \n\t"
|
||||
"replv.qb %[limit_vec], %[ulimit] \n\t"
|
||||
|
||||
: [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
|
||||
[limit_vec] "=r"(limit_vec)
|
||||
: [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
|
||||
|
||||
/* prefetch data for store */
|
||||
prefetch_store(s + pitch);
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
s1 = s;
|
||||
s2 = s + pitch;
|
||||
s3 = s2 + pitch;
|
||||
s4 = s3 + pitch;
|
||||
s = s4 + pitch;
|
||||
|
||||
/* load quad-byte vectors
|
||||
* memory is 4 byte aligned
|
||||
*/
|
||||
p2 = *((uint32_t *)(s1 - 4));
|
||||
p6 = *((uint32_t *)(s1));
|
||||
p1 = *((uint32_t *)(s2 - 4));
|
||||
p5 = *((uint32_t *)(s2));
|
||||
p0 = *((uint32_t *)(s3 - 4));
|
||||
p4 = *((uint32_t *)(s3));
|
||||
pm1 = *((uint32_t *)(s4 - 4));
|
||||
p3 = *((uint32_t *)(s4));
|
||||
|
||||
/* transpose pm1, p0, p1, p2 */
|
||||
__asm__ __volatile__(
|
||||
"precrq.qb.ph %[prim1], %[p2], %[p1] \n\t"
|
||||
"precr.qb.ph %[prim2], %[p2], %[p1] \n\t"
|
||||
"precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t"
|
||||
"precr.qb.ph %[prim4], %[p0], %[pm1] \n\t"
|
||||
|
||||
"precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t"
|
||||
"precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t"
|
||||
"precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
|
||||
"precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
|
||||
|
||||
"precrq.ph.w %[p2], %[p1], %[sec3] \n\t"
|
||||
"precrq.ph.w %[p0], %[pm1], %[sec4] \n\t"
|
||||
"append %[p1], %[sec3], 16 \n\t"
|
||||
"append %[pm1], %[sec4], 16 \n\t"
|
||||
|
||||
: [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
|
||||
[prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0),
|
||||
[pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
|
||||
:);
|
||||
|
||||
/* transpose p3, p4, p5, p6 */
|
||||
__asm__ __volatile__(
|
||||
"precrq.qb.ph %[prim1], %[p6], %[p5] \n\t"
|
||||
"precr.qb.ph %[prim2], %[p6], %[p5] \n\t"
|
||||
"precrq.qb.ph %[prim3], %[p4], %[p3] \n\t"
|
||||
"precr.qb.ph %[prim4], %[p4], %[p3] \n\t"
|
||||
|
||||
"precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t"
|
||||
"precr.qb.ph %[p3], %[prim1], %[prim2] \n\t"
|
||||
"precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
|
||||
"precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
|
||||
|
||||
"precrq.ph.w %[p6], %[p5], %[sec3] \n\t"
|
||||
"precrq.ph.w %[p4], %[p3], %[sec4] \n\t"
|
||||
"append %[p5], %[sec3], 16 \n\t"
|
||||
"append %[p3], %[sec4], 16 \n\t"
|
||||
|
||||
: [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
|
||||
[prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4),
|
||||
[p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
|
||||
:);
|
||||
|
||||
/* if (p1 - p4 == 0) and (p2 - p3 == 0)
|
||||
* mask will be zero and filtering is not needed
|
||||
*/
|
||||
if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
|
||||
filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, p0, p3, p4, p5,
|
||||
p6, thresh_vec, &hev, &mask);
|
||||
|
||||
/* if mask == 0 do filtering is not needed */
|
||||
if (mask) {
|
||||
/* filtering */
|
||||
filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
|
||||
|
||||
/* unpack processed 4x4 neighborhood
|
||||
* don't use transpose on output data
|
||||
* because memory isn't aligned
|
||||
*/
|
||||
__asm__ __volatile__(
|
||||
"sb %[p4], 1(%[s4]) \n\t"
|
||||
"sb %[p3], 0(%[s4]) \n\t"
|
||||
"sb %[p2], -1(%[s4]) \n\t"
|
||||
"sb %[p1], -2(%[s4]) \n\t"
|
||||
|
||||
:
|
||||
: [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
|
||||
[s4] "r"(s4));
|
||||
|
||||
__asm__ __volatile__(
|
||||
"srl %[p4], %[p4], 8 \n\t"
|
||||
"srl %[p3], %[p3], 8 \n\t"
|
||||
"srl %[p2], %[p2], 8 \n\t"
|
||||
"srl %[p1], %[p1], 8 \n\t"
|
||||
|
||||
: [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1)
|
||||
:);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"sb %[p4], 1(%[s3]) \n\t"
|
||||
"sb %[p3], 0(%[s3]) \n\t"
|
||||
"sb %[p2], -1(%[s3]) \n\t"
|
||||
"sb %[p1], -2(%[s3]) \n\t"
|
||||
|
||||
: [p1] "+r"(p1)
|
||||
: [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [s3] "r"(s3));
|
||||
|
||||
__asm__ __volatile__(
|
||||
"srl %[p4], %[p4], 8 \n\t"
|
||||
"srl %[p3], %[p3], 8 \n\t"
|
||||
"srl %[p2], %[p2], 8 \n\t"
|
||||
"srl %[p1], %[p1], 8 \n\t"
|
||||
|
||||
: [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1)
|
||||
:);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"sb %[p4], 1(%[s2]) \n\t"
|
||||
"sb %[p3], 0(%[s2]) \n\t"
|
||||
"sb %[p2], -1(%[s2]) \n\t"
|
||||
"sb %[p1], -2(%[s2]) \n\t"
|
||||
|
||||
:
|
||||
: [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
|
||||
[s2] "r"(s2));
|
||||
|
||||
__asm__ __volatile__(
|
||||
"srl %[p4], %[p4], 8 \n\t"
|
||||
"srl %[p3], %[p3], 8 \n\t"
|
||||
"srl %[p2], %[p2], 8 \n\t"
|
||||
"srl %[p1], %[p1], 8 \n\t"
|
||||
|
||||
: [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1)
|
||||
:);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"sb %[p4], 1(%[s1]) \n\t"
|
||||
"sb %[p3], 0(%[s1]) \n\t"
|
||||
"sb %[p2], -1(%[s1]) \n\t"
|
||||
"sb %[p1], -2(%[s1]) \n\t"
|
||||
|
||||
:
|
||||
: [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
|
||||
[s1] "r"(s1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void aom_lpf_horizontal_4_dual_dspr2(
|
||||
uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
|
||||
const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
|
||||
const uint8_t *limit1, const uint8_t *thresh1) {
|
||||
aom_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0);
|
||||
aom_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1);
|
||||
}
|
||||
|
||||
void aom_lpf_horizontal_8_dual_dspr2(
|
||||
uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
|
||||
const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
|
||||
const uint8_t *limit1, const uint8_t *thresh1) {
|
||||
aom_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0);
|
||||
aom_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1);
|
||||
}
|
||||
|
||||
void aom_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0,
|
||||
const uint8_t *limit0,
|
||||
const uint8_t *thresh0,
|
||||
const uint8_t *blimit1,
|
||||
const uint8_t *limit1,
|
||||
const uint8_t *thresh1) {
|
||||
aom_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0);
|
||||
aom_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||
}
|
||||
|
||||
void aom_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0,
|
||||
const uint8_t *limit0,
|
||||
const uint8_t *thresh0,
|
||||
const uint8_t *blimit1,
|
||||
const uint8_t *limit1,
|
||||
const uint8_t *thresh1) {
|
||||
aom_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0);
|
||||
aom_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||
}
|
||||
|
||||
void aom_lpf_vertical_16_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit,
|
||||
const uint8_t *limit,
|
||||
const uint8_t *thresh) {
|
||||
aom_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
|
||||
aom_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
|
||||
}
|
||||
#endif // #if HAVE_DSPR2
|
||||
@@ -1,436 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
|
||||
#define AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./aom_dsp_rtcd.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_mem/aom_mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if HAVE_DSPR2
|
||||
#define STORE_F0() \
|
||||
{ \
|
||||
__asm__ __volatile__( \
|
||||
"sb %[q1_f0], 1(%[s4]) \n\t" \
|
||||
"sb %[q0_f0], 0(%[s4]) \n\t" \
|
||||
"sb %[p0_f0], -1(%[s4]) \n\t" \
|
||||
"sb %[p1_f0], -2(%[s4]) \n\t" \
|
||||
\
|
||||
: \
|
||||
: [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0), \
|
||||
[p1_f0] "r"(p1_f0), [s4] "r"(s4)); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"srl %[q1_f0], %[q1_f0], 8 \n\t" \
|
||||
"srl %[q0_f0], %[q0_f0], 8 \n\t" \
|
||||
"srl %[p0_f0], %[p0_f0], 8 \n\t" \
|
||||
"srl %[p1_f0], %[p1_f0], 8 \n\t" \
|
||||
\
|
||||
: [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
|
||||
[p1_f0] "+r"(p1_f0) \
|
||||
:); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"sb %[q1_f0], 1(%[s3]) \n\t" \
|
||||
"sb %[q0_f0], 0(%[s3]) \n\t" \
|
||||
"sb %[p0_f0], -1(%[s3]) \n\t" \
|
||||
"sb %[p1_f0], -2(%[s3]) \n\t" \
|
||||
\
|
||||
: [p1_f0] "+r"(p1_f0) \
|
||||
: [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [s3] "r"(s3), \
|
||||
[p0_f0] "r"(p0_f0)); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"srl %[q1_f0], %[q1_f0], 8 \n\t" \
|
||||
"srl %[q0_f0], %[q0_f0], 8 \n\t" \
|
||||
"srl %[p0_f0], %[p0_f0], 8 \n\t" \
|
||||
"srl %[p1_f0], %[p1_f0], 8 \n\t" \
|
||||
\
|
||||
: [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
|
||||
[p1_f0] "+r"(p1_f0) \
|
||||
:); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"sb %[q1_f0], 1(%[s2]) \n\t" \
|
||||
"sb %[q0_f0], 0(%[s2]) \n\t" \
|
||||
"sb %[p0_f0], -1(%[s2]) \n\t" \
|
||||
"sb %[p1_f0], -2(%[s2]) \n\t" \
|
||||
\
|
||||
: \
|
||||
: [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0), \
|
||||
[p1_f0] "r"(p1_f0), [s2] "r"(s2)); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"srl %[q1_f0], %[q1_f0], 8 \n\t" \
|
||||
"srl %[q0_f0], %[q0_f0], 8 \n\t" \
|
||||
"srl %[p0_f0], %[p0_f0], 8 \n\t" \
|
||||
"srl %[p1_f0], %[p1_f0], 8 \n\t" \
|
||||
\
|
||||
: [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
|
||||
[p1_f0] "+r"(p1_f0) \
|
||||
:); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"sb %[q1_f0], 1(%[s1]) \n\t" \
|
||||
"sb %[q0_f0], 0(%[s1]) \n\t" \
|
||||
"sb %[p0_f0], -1(%[s1]) \n\t" \
|
||||
"sb %[p1_f0], -2(%[s1]) \n\t" \
|
||||
\
|
||||
: \
|
||||
: [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0), \
|
||||
[p1_f0] "r"(p1_f0), [s1] "r"(s1)); \
|
||||
}
|
||||
|
||||
#define STORE_F1() \
|
||||
{ \
|
||||
__asm__ __volatile__( \
|
||||
"sb %[q2_r], 2(%[s4]) \n\t" \
|
||||
"sb %[q1_r], 1(%[s4]) \n\t" \
|
||||
"sb %[q0_r], 0(%[s4]) \n\t" \
|
||||
"sb %[p0_r], -1(%[s4]) \n\t" \
|
||||
"sb %[p1_r], -2(%[s4]) \n\t" \
|
||||
"sb %[p2_r], -3(%[s4]) \n\t" \
|
||||
\
|
||||
: \
|
||||
: [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r), \
|
||||
[p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s4] "r"(s4)); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"srl %[q2_r], %[q2_r], 16 \n\t" \
|
||||
"srl %[q1_r], %[q1_r], 16 \n\t" \
|
||||
"srl %[q0_r], %[q0_r], 16 \n\t" \
|
||||
"srl %[p0_r], %[p0_r], 16 \n\t" \
|
||||
"srl %[p1_r], %[p1_r], 16 \n\t" \
|
||||
"srl %[p2_r], %[p2_r], 16 \n\t" \
|
||||
\
|
||||
: [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), [q0_r] "+r"(q0_r), \
|
||||
[p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), [p2_r] "+r"(p2_r) \
|
||||
:); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"sb %[q2_r], 2(%[s3]) \n\t" \
|
||||
"sb %[q1_r], 1(%[s3]) \n\t" \
|
||||
"sb %[q0_r], 0(%[s3]) \n\t" \
|
||||
"sb %[p0_r], -1(%[s3]) \n\t" \
|
||||
"sb %[p1_r], -2(%[s3]) \n\t" \
|
||||
"sb %[p2_r], -3(%[s3]) \n\t" \
|
||||
\
|
||||
: \
|
||||
: [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r), \
|
||||
[p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s3] "r"(s3)); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"sb %[q2_l], 2(%[s2]) \n\t" \
|
||||
"sb %[q1_l], 1(%[s2]) \n\t" \
|
||||
"sb %[q0_l], 0(%[s2]) \n\t" \
|
||||
"sb %[p0_l], -1(%[s2]) \n\t" \
|
||||
"sb %[p1_l], -2(%[s2]) \n\t" \
|
||||
"sb %[p2_l], -3(%[s2]) \n\t" \
|
||||
\
|
||||
: \
|
||||
: [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l), \
|
||||
[p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s2] "r"(s2)); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"srl %[q2_l], %[q2_l], 16 \n\t" \
|
||||
"srl %[q1_l], %[q1_l], 16 \n\t" \
|
||||
"srl %[q0_l], %[q0_l], 16 \n\t" \
|
||||
"srl %[p0_l], %[p0_l], 16 \n\t" \
|
||||
"srl %[p1_l], %[p1_l], 16 \n\t" \
|
||||
"srl %[p2_l], %[p2_l], 16 \n\t" \
|
||||
\
|
||||
: [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), [q0_l] "+r"(q0_l), \
|
||||
[p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), [p2_l] "+r"(p2_l) \
|
||||
:); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"sb %[q2_l], 2(%[s1]) \n\t" \
|
||||
"sb %[q1_l], 1(%[s1]) \n\t" \
|
||||
"sb %[q0_l], 0(%[s1]) \n\t" \
|
||||
"sb %[p0_l], -1(%[s1]) \n\t" \
|
||||
"sb %[p1_l], -2(%[s1]) \n\t" \
|
||||
"sb %[p2_l], -3(%[s1]) \n\t" \
|
||||
\
|
||||
: \
|
||||
: [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l), \
|
||||
[p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s1] "r"(s1)); \
|
||||
}
|
||||
|
||||
#define STORE_F2() \
|
||||
{ \
|
||||
__asm__ __volatile__( \
|
||||
"sb %[q6_r], 6(%[s4]) \n\t" \
|
||||
"sb %[q5_r], 5(%[s4]) \n\t" \
|
||||
"sb %[q4_r], 4(%[s4]) \n\t" \
|
||||
"sb %[q3_r], 3(%[s4]) \n\t" \
|
||||
"sb %[q2_r], 2(%[s4]) \n\t" \
|
||||
"sb %[q1_r], 1(%[s4]) \n\t" \
|
||||
"sb %[q0_r], 0(%[s4]) \n\t" \
|
||||
"sb %[p0_r], -1(%[s4]) \n\t" \
|
||||
"sb %[p1_r], -2(%[s4]) \n\t" \
|
||||
"sb %[p2_r], -3(%[s4]) \n\t" \
|
||||
"sb %[p3_r], -4(%[s4]) \n\t" \
|
||||
"sb %[p4_r], -5(%[s4]) \n\t" \
|
||||
"sb %[p5_r], -6(%[s4]) \n\t" \
|
||||
"sb %[p6_r], -7(%[s4]) \n\t" \
|
||||
\
|
||||
: \
|
||||
: [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r), \
|
||||
[q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), \
|
||||
[q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), \
|
||||
[p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r), \
|
||||
[p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s4] "r"(s4)); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"srl %[q6_r], %[q6_r], 16 \n\t" \
|
||||
"srl %[q5_r], %[q5_r], 16 \n\t" \
|
||||
"srl %[q4_r], %[q4_r], 16 \n\t" \
|
||||
"srl %[q3_r], %[q3_r], 16 \n\t" \
|
||||
"srl %[q2_r], %[q2_r], 16 \n\t" \
|
||||
"srl %[q1_r], %[q1_r], 16 \n\t" \
|
||||
"srl %[q0_r], %[q0_r], 16 \n\t" \
|
||||
"srl %[p0_r], %[p0_r], 16 \n\t" \
|
||||
"srl %[p1_r], %[p1_r], 16 \n\t" \
|
||||
"srl %[p2_r], %[p2_r], 16 \n\t" \
|
||||
"srl %[p3_r], %[p3_r], 16 \n\t" \
|
||||
"srl %[p4_r], %[p4_r], 16 \n\t" \
|
||||
"srl %[p5_r], %[p5_r], 16 \n\t" \
|
||||
"srl %[p6_r], %[p6_r], 16 \n\t" \
|
||||
\
|
||||
: [q6_r] "+r"(q6_r), [q5_r] "+r"(q5_r), [q4_r] "+r"(q4_r), \
|
||||
[q3_r] "+r"(q3_r), [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), \
|
||||
[q0_r] "+r"(q0_r), [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), \
|
||||
[p2_r] "+r"(p2_r), [p3_r] "+r"(p3_r), [p4_r] "+r"(p4_r), \
|
||||
[p5_r] "+r"(p5_r), [p6_r] "+r"(p6_r) \
|
||||
:); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"sb %[q6_r], 6(%[s3]) \n\t" \
|
||||
"sb %[q5_r], 5(%[s3]) \n\t" \
|
||||
"sb %[q4_r], 4(%[s3]) \n\t" \
|
||||
"sb %[q3_r], 3(%[s3]) \n\t" \
|
||||
"sb %[q2_r], 2(%[s3]) \n\t" \
|
||||
"sb %[q1_r], 1(%[s3]) \n\t" \
|
||||
"sb %[q0_r], 0(%[s3]) \n\t" \
|
||||
"sb %[p0_r], -1(%[s3]) \n\t" \
|
||||
"sb %[p1_r], -2(%[s3]) \n\t" \
|
||||
"sb %[p2_r], -3(%[s3]) \n\t" \
|
||||
"sb %[p3_r], -4(%[s3]) \n\t" \
|
||||
"sb %[p4_r], -5(%[s3]) \n\t" \
|
||||
"sb %[p5_r], -6(%[s3]) \n\t" \
|
||||
"sb %[p6_r], -7(%[s3]) \n\t" \
|
||||
\
|
||||
: \
|
||||
: [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r), \
|
||||
[q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), \
|
||||
[q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), \
|
||||
[p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r), \
|
||||
[p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s3] "r"(s3)); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"sb %[q6_l], 6(%[s2]) \n\t" \
|
||||
"sb %[q5_l], 5(%[s2]) \n\t" \
|
||||
"sb %[q4_l], 4(%[s2]) \n\t" \
|
||||
"sb %[q3_l], 3(%[s2]) \n\t" \
|
||||
"sb %[q2_l], 2(%[s2]) \n\t" \
|
||||
"sb %[q1_l], 1(%[s2]) \n\t" \
|
||||
"sb %[q0_l], 0(%[s2]) \n\t" \
|
||||
"sb %[p0_l], -1(%[s2]) \n\t" \
|
||||
"sb %[p1_l], -2(%[s2]) \n\t" \
|
||||
"sb %[p2_l], -3(%[s2]) \n\t" \
|
||||
"sb %[p3_l], -4(%[s2]) \n\t" \
|
||||
"sb %[p4_l], -5(%[s2]) \n\t" \
|
||||
"sb %[p5_l], -6(%[s2]) \n\t" \
|
||||
"sb %[p6_l], -7(%[s2]) \n\t" \
|
||||
\
|
||||
: \
|
||||
: [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l), \
|
||||
[q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), \
|
||||
[q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), \
|
||||
[p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l), \
|
||||
[p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s2] "r"(s2)); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"srl %[q6_l], %[q6_l], 16 \n\t" \
|
||||
"srl %[q5_l], %[q5_l], 16 \n\t" \
|
||||
"srl %[q4_l], %[q4_l], 16 \n\t" \
|
||||
"srl %[q3_l], %[q3_l], 16 \n\t" \
|
||||
"srl %[q2_l], %[q2_l], 16 \n\t" \
|
||||
"srl %[q1_l], %[q1_l], 16 \n\t" \
|
||||
"srl %[q0_l], %[q0_l], 16 \n\t" \
|
||||
"srl %[p0_l], %[p0_l], 16 \n\t" \
|
||||
"srl %[p1_l], %[p1_l], 16 \n\t" \
|
||||
"srl %[p2_l], %[p2_l], 16 \n\t" \
|
||||
"srl %[p3_l], %[p3_l], 16 \n\t" \
|
||||
"srl %[p4_l], %[p4_l], 16 \n\t" \
|
||||
"srl %[p5_l], %[p5_l], 16 \n\t" \
|
||||
"srl %[p6_l], %[p6_l], 16 \n\t" \
|
||||
\
|
||||
: [q6_l] "+r"(q6_l), [q5_l] "+r"(q5_l), [q4_l] "+r"(q4_l), \
|
||||
[q3_l] "+r"(q3_l), [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), \
|
||||
[q0_l] "+r"(q0_l), [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), \
|
||||
[p2_l] "+r"(p2_l), [p3_l] "+r"(p3_l), [p4_l] "+r"(p4_l), \
|
||||
[p5_l] "+r"(p5_l), [p6_l] "+r"(p6_l) \
|
||||
:); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"sb %[q6_l], 6(%[s1]) \n\t" \
|
||||
"sb %[q5_l], 5(%[s1]) \n\t" \
|
||||
"sb %[q4_l], 4(%[s1]) \n\t" \
|
||||
"sb %[q3_l], 3(%[s1]) \n\t" \
|
||||
"sb %[q2_l], 2(%[s1]) \n\t" \
|
||||
"sb %[q1_l], 1(%[s1]) \n\t" \
|
||||
"sb %[q0_l], 0(%[s1]) \n\t" \
|
||||
"sb %[p0_l], -1(%[s1]) \n\t" \
|
||||
"sb %[p1_l], -2(%[s1]) \n\t" \
|
||||
"sb %[p2_l], -3(%[s1]) \n\t" \
|
||||
"sb %[p3_l], -4(%[s1]) \n\t" \
|
||||
"sb %[p4_l], -5(%[s1]) \n\t" \
|
||||
"sb %[p5_l], -6(%[s1]) \n\t" \
|
||||
"sb %[p6_l], -7(%[s1]) \n\t" \
|
||||
\
|
||||
: \
|
||||
: [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l), \
|
||||
[q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), \
|
||||
[q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), \
|
||||
[p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l), \
|
||||
[p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s1] "r"(s1)); \
|
||||
}
|
||||
|
||||
#define PACK_LEFT_0TO3() \
|
||||
{ \
|
||||
__asm__ __volatile__( \
|
||||
"preceu.ph.qbl %[p3_l], %[p3] \n\t" \
|
||||
"preceu.ph.qbl %[p2_l], %[p2] \n\t" \
|
||||
"preceu.ph.qbl %[p1_l], %[p1] \n\t" \
|
||||
"preceu.ph.qbl %[p0_l], %[p0] \n\t" \
|
||||
"preceu.ph.qbl %[q0_l], %[q0] \n\t" \
|
||||
"preceu.ph.qbl %[q1_l], %[q1] \n\t" \
|
||||
"preceu.ph.qbl %[q2_l], %[q2] \n\t" \
|
||||
"preceu.ph.qbl %[q3_l], %[q3] \n\t" \
|
||||
\
|
||||
: [p3_l] "=&r"(p3_l), [p2_l] "=&r"(p2_l), [p1_l] "=&r"(p1_l), \
|
||||
[p0_l] "=&r"(p0_l), [q0_l] "=&r"(q0_l), [q1_l] "=&r"(q1_l), \
|
||||
[q2_l] "=&r"(q2_l), [q3_l] "=&r"(q3_l) \
|
||||
: [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), \
|
||||
[q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3)); \
|
||||
}
|
||||
|
||||
#define PACK_LEFT_4TO7() \
|
||||
{ \
|
||||
__asm__ __volatile__( \
|
||||
"preceu.ph.qbl %[p7_l], %[p7] \n\t" \
|
||||
"preceu.ph.qbl %[p6_l], %[p6] \n\t" \
|
||||
"preceu.ph.qbl %[p5_l], %[p5] \n\t" \
|
||||
"preceu.ph.qbl %[p4_l], %[p4] \n\t" \
|
||||
"preceu.ph.qbl %[q4_l], %[q4] \n\t" \
|
||||
"preceu.ph.qbl %[q5_l], %[q5] \n\t" \
|
||||
"preceu.ph.qbl %[q6_l], %[q6] \n\t" \
|
||||
"preceu.ph.qbl %[q7_l], %[q7] \n\t" \
|
||||
\
|
||||
: [p7_l] "=&r"(p7_l), [p6_l] "=&r"(p6_l), [p5_l] "=&r"(p5_l), \
|
||||
[p4_l] "=&r"(p4_l), [q4_l] "=&r"(q4_l), [q5_l] "=&r"(q5_l), \
|
||||
[q6_l] "=&r"(q6_l), [q7_l] "=&r"(q7_l) \
|
||||
: [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), \
|
||||
[q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7)); \
|
||||
}
|
||||
|
||||
#define PACK_RIGHT_0TO3() \
|
||||
{ \
|
||||
__asm__ __volatile__( \
|
||||
"preceu.ph.qbr %[p3_r], %[p3] \n\t" \
|
||||
"preceu.ph.qbr %[p2_r], %[p2] \n\t" \
|
||||
"preceu.ph.qbr %[p1_r], %[p1] \n\t" \
|
||||
"preceu.ph.qbr %[p0_r], %[p0] \n\t" \
|
||||
"preceu.ph.qbr %[q0_r], %[q0] \n\t" \
|
||||
"preceu.ph.qbr %[q1_r], %[q1] \n\t" \
|
||||
"preceu.ph.qbr %[q2_r], %[q2] \n\t" \
|
||||
"preceu.ph.qbr %[q3_r], %[q3] \n\t" \
|
||||
\
|
||||
: [p3_r] "=&r"(p3_r), [p2_r] "=&r"(p2_r), [p1_r] "=&r"(p1_r), \
|
||||
[p0_r] "=&r"(p0_r), [q0_r] "=&r"(q0_r), [q1_r] "=&r"(q1_r), \
|
||||
[q2_r] "=&r"(q2_r), [q3_r] "=&r"(q3_r) \
|
||||
: [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), \
|
||||
[q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3)); \
|
||||
}
|
||||
|
||||
#define PACK_RIGHT_4TO7() \
|
||||
{ \
|
||||
__asm__ __volatile__( \
|
||||
"preceu.ph.qbr %[p7_r], %[p7] \n\t" \
|
||||
"preceu.ph.qbr %[p6_r], %[p6] \n\t" \
|
||||
"preceu.ph.qbr %[p5_r], %[p5] \n\t" \
|
||||
"preceu.ph.qbr %[p4_r], %[p4] \n\t" \
|
||||
"preceu.ph.qbr %[q4_r], %[q4] \n\t" \
|
||||
"preceu.ph.qbr %[q5_r], %[q5] \n\t" \
|
||||
"preceu.ph.qbr %[q6_r], %[q6] \n\t" \
|
||||
"preceu.ph.qbr %[q7_r], %[q7] \n\t" \
|
||||
\
|
||||
: [p7_r] "=&r"(p7_r), [p6_r] "=&r"(p6_r), [p5_r] "=&r"(p5_r), \
|
||||
[p4_r] "=&r"(p4_r), [q4_r] "=&r"(q4_r), [q5_r] "=&r"(q5_r), \
|
||||
[q6_r] "=&r"(q6_r), [q7_r] "=&r"(q7_r) \
|
||||
: [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), \
|
||||
[q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7)); \
|
||||
}
|
||||
|
||||
#define COMBINE_LEFT_RIGHT_0TO2() \
|
||||
{ \
|
||||
__asm__ __volatile__( \
|
||||
"precr.qb.ph %[p2], %[p2_l], %[p2_r] \n\t" \
|
||||
"precr.qb.ph %[p1], %[p1_l], %[p1_r] \n\t" \
|
||||
"precr.qb.ph %[p0], %[p0_l], %[p0_r] \n\t" \
|
||||
"precr.qb.ph %[q0], %[q0_l], %[q0_r] \n\t" \
|
||||
"precr.qb.ph %[q1], %[q1_l], %[q1_r] \n\t" \
|
||||
"precr.qb.ph %[q2], %[q2_l], %[q2_r] \n\t" \
|
||||
\
|
||||
: [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0), [q0] "=&r"(q0), \
|
||||
[q1] "=&r"(q1), [q2] "=&r"(q2) \
|
||||
: [p2_l] "r"(p2_l), [p2_r] "r"(p2_r), [p1_l] "r"(p1_l), \
|
||||
[p1_r] "r"(p1_r), [p0_l] "r"(p0_l), [p0_r] "r"(p0_r), \
|
||||
[q0_l] "r"(q0_l), [q0_r] "r"(q0_r), [q1_l] "r"(q1_l), \
|
||||
[q1_r] "r"(q1_r), [q2_l] "r"(q2_l), [q2_r] "r"(q2_r)); \
|
||||
}
|
||||
|
||||
#define COMBINE_LEFT_RIGHT_3TO6() \
|
||||
{ \
|
||||
__asm__ __volatile__( \
|
||||
"precr.qb.ph %[p6], %[p6_l], %[p6_r] \n\t" \
|
||||
"precr.qb.ph %[p5], %[p5_l], %[p5_r] \n\t" \
|
||||
"precr.qb.ph %[p4], %[p4_l], %[p4_r] \n\t" \
|
||||
"precr.qb.ph %[p3], %[p3_l], %[p3_r] \n\t" \
|
||||
"precr.qb.ph %[q3], %[q3_l], %[q3_r] \n\t" \
|
||||
"precr.qb.ph %[q4], %[q4_l], %[q4_r] \n\t" \
|
||||
"precr.qb.ph %[q5], %[q5_l], %[q5_r] \n\t" \
|
||||
"precr.qb.ph %[q6], %[q6_l], %[q6_r] \n\t" \
|
||||
\
|
||||
: [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4), [p3] "=&r"(p3), \
|
||||
[q3] "=&r"(q3), [q4] "=&r"(q4), [q5] "=&r"(q5), [q6] "=&r"(q6) \
|
||||
: [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l), \
|
||||
[p3_l] "r"(p3_l), [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), \
|
||||
[p4_r] "r"(p4_r), [p3_r] "r"(p3_r), [q3_l] "r"(q3_l), \
|
||||
[q4_l] "r"(q4_l), [q5_l] "r"(q5_l), [q6_l] "r"(q6_l), \
|
||||
[q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r), \
|
||||
[q6_r] "r"(q6_r)); \
|
||||
}
|
||||
|
||||
#endif // #if HAVE_DSPR2
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
|
||||
@@ -1,251 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_LOOPFILTER_MSA_H_
|
||||
#define AOM_DSP_LOOPFILTER_MSA_H_
|
||||
|
||||
#include "aom_dsp/mips/macros_msa.h"
|
||||
|
||||
#define AOM_LPF_FILTER4_8W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in, \
|
||||
p1_out, p0_out, q0_out, q1_out) \
|
||||
{ \
|
||||
v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign; \
|
||||
v16i8 filt, filt1, filt2, cnst4b, cnst3b; \
|
||||
v8i16 q0_sub_p0_r, filt_r, cnst3h; \
|
||||
\
|
||||
p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
|
||||
p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
|
||||
q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
|
||||
q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
|
||||
\
|
||||
filt = __msa_subs_s_b(p1_m, q1_m); \
|
||||
filt = filt & (v16i8)hev_in; \
|
||||
q0_sub_p0 = q0_m - p0_m; \
|
||||
filt_sign = __msa_clti_s_b(filt, 0); \
|
||||
\
|
||||
cnst3h = __msa_ldi_h(3); \
|
||||
q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0); \
|
||||
q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h); \
|
||||
filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
|
||||
filt_r += q0_sub_p0_r; \
|
||||
filt_r = __msa_sat_s_h(filt_r, 7); \
|
||||
\
|
||||
/* combine left and right part */ \
|
||||
filt = __msa_pckev_b((v16i8)filt_r, (v16i8)filt_r); \
|
||||
\
|
||||
filt = filt & (v16i8)mask_in; \
|
||||
cnst4b = __msa_ldi_b(4); \
|
||||
filt1 = __msa_adds_s_b(filt, cnst4b); \
|
||||
filt1 >>= 3; \
|
||||
\
|
||||
cnst3b = __msa_ldi_b(3); \
|
||||
filt2 = __msa_adds_s_b(filt, cnst3b); \
|
||||
filt2 >>= 3; \
|
||||
\
|
||||
q0_m = __msa_subs_s_b(q0_m, filt1); \
|
||||
q0_out = __msa_xori_b((v16u8)q0_m, 0x80); \
|
||||
p0_m = __msa_adds_s_b(p0_m, filt2); \
|
||||
p0_out = __msa_xori_b((v16u8)p0_m, 0x80); \
|
||||
\
|
||||
filt = __msa_srari_b(filt1, 1); \
|
||||
hev_in = __msa_xori_b((v16u8)hev_in, 0xff); \
|
||||
filt = filt & (v16i8)hev_in; \
|
||||
\
|
||||
q1_m = __msa_subs_s_b(q1_m, filt); \
|
||||
q1_out = __msa_xori_b((v16u8)q1_m, 0x80); \
|
||||
p1_m = __msa_adds_s_b(p1_m, filt); \
|
||||
p1_out = __msa_xori_b((v16u8)p1_m, 0x80); \
|
||||
}
|
||||
|
||||
#define AOM_LPF_FILTER4_4W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in, \
|
||||
p1_out, p0_out, q0_out, q1_out) \
|
||||
{ \
|
||||
v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign; \
|
||||
v16i8 filt, filt1, filt2, cnst4b, cnst3b; \
|
||||
v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h; \
|
||||
\
|
||||
p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
|
||||
p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
|
||||
q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
|
||||
q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
|
||||
\
|
||||
filt = __msa_subs_s_b(p1_m, q1_m); \
|
||||
\
|
||||
filt = filt & (v16i8)hev_in; \
|
||||
\
|
||||
q0_sub_p0 = q0_m - p0_m; \
|
||||
filt_sign = __msa_clti_s_b(filt, 0); \
|
||||
\
|
||||
cnst3h = __msa_ldi_h(3); \
|
||||
q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0); \
|
||||
q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h); \
|
||||
filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
|
||||
filt_r += q0_sub_p0_r; \
|
||||
filt_r = __msa_sat_s_h(filt_r, 7); \
|
||||
\
|
||||
q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0, q0_sub_p0); \
|
||||
q0_sub_p0_l = __msa_dotp_s_h((v16i8)q0_sub_p0_l, (v16i8)cnst3h); \
|
||||
filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
|
||||
filt_l += q0_sub_p0_l; \
|
||||
filt_l = __msa_sat_s_h(filt_l, 7); \
|
||||
\
|
||||
filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
|
||||
filt = filt & (v16i8)mask_in; \
|
||||
\
|
||||
cnst4b = __msa_ldi_b(4); \
|
||||
filt1 = __msa_adds_s_b(filt, cnst4b); \
|
||||
filt1 >>= 3; \
|
||||
\
|
||||
cnst3b = __msa_ldi_b(3); \
|
||||
filt2 = __msa_adds_s_b(filt, cnst3b); \
|
||||
filt2 >>= 3; \
|
||||
\
|
||||
q0_m = __msa_subs_s_b(q0_m, filt1); \
|
||||
q0_out = __msa_xori_b((v16u8)q0_m, 0x80); \
|
||||
p0_m = __msa_adds_s_b(p0_m, filt2); \
|
||||
p0_out = __msa_xori_b((v16u8)p0_m, 0x80); \
|
||||
\
|
||||
filt = __msa_srari_b(filt1, 1); \
|
||||
hev_in = __msa_xori_b((v16u8)hev_in, 0xff); \
|
||||
filt = filt & (v16i8)hev_in; \
|
||||
\
|
||||
q1_m = __msa_subs_s_b(q1_m, filt); \
|
||||
q1_out = __msa_xori_b((v16u8)q1_m, 0x80); \
|
||||
p1_m = __msa_adds_s_b(p1_m, filt); \
|
||||
p1_out = __msa_xori_b((v16u8)p1_m, 0x80); \
|
||||
}
|
||||
|
||||
#define AOM_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out) \
|
||||
{ \
|
||||
v16u8 tmp_flat4, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0; \
|
||||
v16u8 zero_in = { 0 }; \
|
||||
\
|
||||
tmp_flat4 = __msa_ori_b(zero_in, 1); \
|
||||
p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in); \
|
||||
q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in); \
|
||||
p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in); \
|
||||
q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in); \
|
||||
\
|
||||
p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0); \
|
||||
flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out); \
|
||||
p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0); \
|
||||
flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out); \
|
||||
\
|
||||
flat_out = (tmp_flat4 < (v16u8)flat_out); \
|
||||
flat_out = __msa_xori_b(flat_out, 0xff); \
|
||||
flat_out = flat_out & (mask); \
|
||||
}
|
||||
|
||||
#define AOM_FLAT5(p7_in, p6_in, p5_in, p4_in, p0_in, q0_in, q4_in, q5_in, \
|
||||
q6_in, q7_in, flat_in, flat2_out) \
|
||||
{ \
|
||||
v16u8 tmp_flat5, zero_in = { 0 }; \
|
||||
v16u8 p4_a_sub_p0, q4_a_sub_q0, p5_a_sub_p0, q5_a_sub_q0; \
|
||||
v16u8 p6_a_sub_p0, q6_a_sub_q0, p7_a_sub_p0, q7_a_sub_q0; \
|
||||
\
|
||||
tmp_flat5 = __msa_ori_b(zero_in, 1); \
|
||||
p4_a_sub_p0 = __msa_asub_u_b(p4_in, p0_in); \
|
||||
q4_a_sub_q0 = __msa_asub_u_b(q4_in, q0_in); \
|
||||
p5_a_sub_p0 = __msa_asub_u_b(p5_in, p0_in); \
|
||||
q5_a_sub_q0 = __msa_asub_u_b(q5_in, q0_in); \
|
||||
p6_a_sub_p0 = __msa_asub_u_b(p6_in, p0_in); \
|
||||
q6_a_sub_q0 = __msa_asub_u_b(q6_in, q0_in); \
|
||||
p7_a_sub_p0 = __msa_asub_u_b(p7_in, p0_in); \
|
||||
q7_a_sub_q0 = __msa_asub_u_b(q7_in, q0_in); \
|
||||
\
|
||||
p4_a_sub_p0 = __msa_max_u_b(p4_a_sub_p0, q4_a_sub_q0); \
|
||||
flat2_out = __msa_max_u_b(p5_a_sub_p0, q5_a_sub_q0); \
|
||||
flat2_out = __msa_max_u_b(p4_a_sub_p0, flat2_out); \
|
||||
p6_a_sub_p0 = __msa_max_u_b(p6_a_sub_p0, q6_a_sub_q0); \
|
||||
flat2_out = __msa_max_u_b(p6_a_sub_p0, flat2_out); \
|
||||
p7_a_sub_p0 = __msa_max_u_b(p7_a_sub_p0, q7_a_sub_q0); \
|
||||
flat2_out = __msa_max_u_b(p7_a_sub_p0, flat2_out); \
|
||||
\
|
||||
flat2_out = (tmp_flat5 < (v16u8)flat2_out); \
|
||||
flat2_out = __msa_xori_b(flat2_out, 0xff); \
|
||||
flat2_out = flat2_out & flat_in; \
|
||||
}
|
||||
|
||||
#define AOM_FILTER8(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
|
||||
p2_filt8_out, p1_filt8_out, p0_filt8_out, q0_filt8_out, \
|
||||
q1_filt8_out, q2_filt8_out) \
|
||||
{ \
|
||||
v8u16 tmp_filt8_0, tmp_filt8_1, tmp_filt8_2; \
|
||||
\
|
||||
tmp_filt8_2 = p2_in + p1_in + p0_in; \
|
||||
tmp_filt8_0 = p3_in << 1; \
|
||||
\
|
||||
tmp_filt8_0 = tmp_filt8_0 + tmp_filt8_2 + q0_in; \
|
||||
tmp_filt8_1 = tmp_filt8_0 + p3_in + p2_in; \
|
||||
p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
|
||||
\
|
||||
tmp_filt8_1 = tmp_filt8_0 + p1_in + q1_in; \
|
||||
p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
|
||||
\
|
||||
tmp_filt8_1 = q2_in + q1_in + q0_in; \
|
||||
tmp_filt8_2 = tmp_filt8_2 + tmp_filt8_1; \
|
||||
tmp_filt8_0 = tmp_filt8_2 + (p0_in); \
|
||||
tmp_filt8_0 = tmp_filt8_0 + (p3_in); \
|
||||
p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_0, 3); \
|
||||
\
|
||||
tmp_filt8_0 = q2_in + q3_in; \
|
||||
tmp_filt8_0 = p0_in + tmp_filt8_1 + tmp_filt8_0; \
|
||||
tmp_filt8_1 = q3_in + q3_in; \
|
||||
tmp_filt8_1 = tmp_filt8_1 + tmp_filt8_0; \
|
||||
q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
|
||||
\
|
||||
tmp_filt8_0 = tmp_filt8_2 + q3_in; \
|
||||
tmp_filt8_1 = tmp_filt8_0 + q0_in; \
|
||||
q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
|
||||
\
|
||||
tmp_filt8_1 = tmp_filt8_0 - p2_in; \
|
||||
tmp_filt8_0 = q1_in + q3_in; \
|
||||
tmp_filt8_1 = tmp_filt8_0 + tmp_filt8_1; \
|
||||
q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
|
||||
}
|
||||
|
||||
#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
|
||||
limit_in, b_limit_in, thresh_in, hev_out, mask_out, \
|
||||
flat_out) \
|
||||
{ \
|
||||
v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \
|
||||
v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \
|
||||
\
|
||||
/* absolute subtraction of pixel values */ \
|
||||
p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in); \
|
||||
p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in); \
|
||||
p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in); \
|
||||
q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in); \
|
||||
q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in); \
|
||||
q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in); \
|
||||
p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in); \
|
||||
p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in); \
|
||||
\
|
||||
/* calculation of hev */ \
|
||||
flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m); \
|
||||
hev_out = thresh_in < (v16u8)flat_out; \
|
||||
\
|
||||
/* calculation of mask */ \
|
||||
p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m); \
|
||||
p1_asub_q1_m >>= 1; \
|
||||
p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m); \
|
||||
\
|
||||
mask_out = b_limit_in < p0_asub_q0_m; \
|
||||
mask_out = __msa_max_u_b(flat_out, mask_out); \
|
||||
p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m); \
|
||||
mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out); \
|
||||
q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m); \
|
||||
mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out); \
|
||||
\
|
||||
mask_out = limit_in < (v16u8)mask_out; \
|
||||
mask_out = __msa_xori_b(mask_out, 0xff); \
|
||||
}
|
||||
#endif /* AOM_DSP_LOOPFILTER_MSA_H_ */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,97 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_MIPS_TXFM_MACROS_MIPS_MSA_H_
|
||||
#define AOM_DSP_MIPS_TXFM_MACROS_MIPS_MSA_H_
|
||||
|
||||
#include "aom_dsp/mips/macros_msa.h"
|
||||
|
||||
#define DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1) \
|
||||
{ \
|
||||
v8i16 k0_m = __msa_fill_h(cnst0); \
|
||||
v4i32 s0_m, s1_m, s2_m, s3_m; \
|
||||
\
|
||||
s0_m = (v4i32)__msa_fill_h(cnst1); \
|
||||
k0_m = __msa_ilvev_h((v8i16)s0_m, k0_m); \
|
||||
\
|
||||
ILVRL_H2_SW((-reg1), reg0, s1_m, s0_m); \
|
||||
ILVRL_H2_SW(reg0, reg1, s3_m, s2_m); \
|
||||
DOTP_SH2_SW(s1_m, s0_m, k0_m, k0_m, s1_m, s0_m); \
|
||||
SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \
|
||||
out0 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \
|
||||
\
|
||||
DOTP_SH2_SW(s3_m, s2_m, k0_m, k0_m, s1_m, s0_m); \
|
||||
SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \
|
||||
out1 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \
|
||||
}
|
||||
|
||||
#define DOT_ADD_SUB_SRARI_PCK(in0, in1, in2, in3, in4, in5, in6, in7, dst0, \
|
||||
dst1, dst2, dst3) \
|
||||
{ \
|
||||
v4i32 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m; \
|
||||
v4i32 tp5_m, tp6_m, tp7_m, tp8_m, tp9_m; \
|
||||
\
|
||||
DOTP_SH4_SW(in0, in1, in0, in1, in4, in4, in5, in5, tp0_m, tp2_m, tp3_m, \
|
||||
tp4_m); \
|
||||
DOTP_SH4_SW(in2, in3, in2, in3, in6, in6, in7, in7, tp5_m, tp6_m, tp7_m, \
|
||||
tp8_m); \
|
||||
BUTTERFLY_4(tp0_m, tp3_m, tp7_m, tp5_m, tp1_m, tp9_m, tp7_m, tp5_m); \
|
||||
BUTTERFLY_4(tp2_m, tp4_m, tp8_m, tp6_m, tp3_m, tp0_m, tp4_m, tp2_m); \
|
||||
SRARI_W4_SW(tp1_m, tp9_m, tp7_m, tp5_m, DCT_CONST_BITS); \
|
||||
SRARI_W4_SW(tp3_m, tp0_m, tp4_m, tp2_m, DCT_CONST_BITS); \
|
||||
PCKEV_H4_SH(tp1_m, tp3_m, tp9_m, tp0_m, tp7_m, tp4_m, tp5_m, tp2_m, dst0, \
|
||||
dst1, dst2, dst3); \
|
||||
}
|
||||
|
||||
#define DOT_SHIFT_RIGHT_PCK_H(in0, in1, in2) \
|
||||
({ \
|
||||
v8i16 dst_m; \
|
||||
v4i32 tp0_m, tp1_m; \
|
||||
\
|
||||
DOTP_SH2_SW(in0, in1, in2, in2, tp1_m, tp0_m); \
|
||||
SRARI_W2_SW(tp1_m, tp0_m, DCT_CONST_BITS); \
|
||||
dst_m = __msa_pckev_h((v8i16)tp1_m, (v8i16)tp0_m); \
|
||||
\
|
||||
dst_m; \
|
||||
})
|
||||
|
||||
#define MADD_SHORT(m0, m1, c0, c1, res0, res1) \
|
||||
{ \
|
||||
v4i32 madd0_m, madd1_m, madd2_m, madd3_m; \
|
||||
v8i16 madd_s0_m, madd_s1_m; \
|
||||
\
|
||||
ILVRL_H2_SH(m1, m0, madd_s0_m, madd_s1_m); \
|
||||
DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s0_m, madd_s1_m, c0, c0, c1, c1, \
|
||||
madd0_m, madd1_m, madd2_m, madd3_m); \
|
||||
SRARI_W4_SW(madd0_m, madd1_m, madd2_m, madd3_m, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SH(madd1_m, madd0_m, madd3_m, madd2_m, res0, res1); \
|
||||
}
|
||||
|
||||
#define MADD_BF(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3, out0, out1, \
|
||||
out2, out3) \
|
||||
{ \
|
||||
v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \
|
||||
v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m, m4_m, m5_m; \
|
||||
\
|
||||
ILVRL_H2_SH(inp1, inp0, madd_s0_m, madd_s1_m); \
|
||||
ILVRL_H2_SH(inp3, inp2, madd_s2_m, madd_s3_m); \
|
||||
DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, cst0, cst0, cst2, \
|
||||
cst2, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
|
||||
BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, m4_m, m5_m, tmp3_m, tmp2_m); \
|
||||
SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1); \
|
||||
DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, cst1, cst1, cst3, \
|
||||
cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
|
||||
BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, m4_m, m5_m, tmp3_m, tmp2_m); \
|
||||
SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
|
||||
PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3); \
|
||||
}
|
||||
#endif // AOM_DSP_MIPS_TXFM_MACROS_MIPS_MSA_H_
|
||||
226
aom_dsp/prob.c
226
aom_dsp/prob.c
@@ -1,226 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include "./aom_config.h"
|
||||
|
||||
#if CONFIG_EC_MULTISYMBOL
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#include "aom_dsp/prob.h"
|
||||
|
||||
#if CONFIG_DAALA_EC
|
||||
#include "aom_dsp/entcode.h"
|
||||
#endif
|
||||
|
||||
const uint8_t aom_norm[256] = {
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
static unsigned int tree_merge_probs_impl(unsigned int i,
|
||||
const aom_tree_index *tree,
|
||||
const aom_prob *pre_probs,
|
||||
const unsigned int *counts,
|
||||
aom_prob *probs) {
|
||||
const int l = tree[i];
|
||||
const unsigned int left_count =
|
||||
(l <= 0) ? counts[-l]
|
||||
: tree_merge_probs_impl(l, tree, pre_probs, counts, probs);
|
||||
const int r = tree[i + 1];
|
||||
const unsigned int right_count =
|
||||
(r <= 0) ? counts[-r]
|
||||
: tree_merge_probs_impl(r, tree, pre_probs, counts, probs);
|
||||
const unsigned int ct[2] = { left_count, right_count };
|
||||
probs[i >> 1] = mode_mv_merge_probs(pre_probs[i >> 1], ct);
|
||||
return left_count + right_count;
|
||||
}
|
||||
|
||||
void aom_tree_merge_probs(const aom_tree_index *tree, const aom_prob *pre_probs,
|
||||
const unsigned int *counts, aom_prob *probs) {
|
||||
tree_merge_probs_impl(0, tree, pre_probs, counts, probs);
|
||||
}
|
||||
|
||||
#if CONFIG_EC_MULTISYMBOL
|
||||
typedef struct tree_node tree_node;
|
||||
|
||||
struct tree_node {
|
||||
aom_tree_index index;
|
||||
uint8_t probs[16];
|
||||
uint8_t prob;
|
||||
int path;
|
||||
int len;
|
||||
int l;
|
||||
int r;
|
||||
aom_cdf_prob pdf;
|
||||
};
|
||||
|
||||
/* Compute the probability of this node in Q23 */
|
||||
static uint32_t tree_node_prob(tree_node n, int i) {
|
||||
uint32_t prob;
|
||||
/* 1.0 in Q23 */
|
||||
prob = 16777216;
|
||||
for (; i < n.len; i++) {
|
||||
prob = prob * n.probs[i] >> 8;
|
||||
}
|
||||
return prob;
|
||||
}
|
||||
|
||||
static int tree_node_cmp(tree_node a, tree_node b) {
|
||||
int i;
|
||||
uint32_t pa;
|
||||
uint32_t pb;
|
||||
for (i = 0; i < AOMMIN(a.len, b.len) && a.probs[i] == b.probs[i]; i++) {
|
||||
}
|
||||
pa = tree_node_prob(a, i);
|
||||
pb = tree_node_prob(b, i);
|
||||
return pa > pb ? 1 : pa < pb ? -1 : 0;
|
||||
}
|
||||
|
||||
/* Given a Q15 probability for symbol subtree rooted at tree[n], this function
|
||||
computes the probability of each symbol (defined as a node that has no
|
||||
children). */
|
||||
static aom_cdf_prob tree_node_compute_probs(tree_node *tree, int n,
|
||||
aom_cdf_prob pdf) {
|
||||
if (tree[n].l == 0) {
|
||||
/* This prevents probability computations in Q15 that underflow from
|
||||
producing a symbol that has zero probability. */
|
||||
if (pdf == 0) pdf = 1;
|
||||
tree[n].pdf = pdf;
|
||||
return pdf;
|
||||
} else {
|
||||
/* We process the smaller probability first, */
|
||||
if (tree[n].prob < 128) {
|
||||
aom_cdf_prob lp;
|
||||
aom_cdf_prob rp;
|
||||
lp = (((uint32_t)pdf) * tree[n].prob + 128) >> 8;
|
||||
lp = tree_node_compute_probs(tree, tree[n].l, lp);
|
||||
rp = tree_node_compute_probs(tree, tree[n].r, lp > pdf ? 0 : pdf - lp);
|
||||
return lp + rp;
|
||||
} else {
|
||||
aom_cdf_prob rp;
|
||||
aom_cdf_prob lp;
|
||||
rp = (((uint32_t)pdf) * (256 - tree[n].prob) + 128) >> 8;
|
||||
rp = tree_node_compute_probs(tree, tree[n].r, rp);
|
||||
lp = tree_node_compute_probs(tree, tree[n].l, rp > pdf ? 0 : pdf - rp);
|
||||
return lp + rp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int tree_node_extract(tree_node *tree, int n, int symb,
|
||||
aom_cdf_prob *pdf, aom_tree_index *index,
|
||||
int *path, int *len) {
|
||||
if (tree[n].l == 0) {
|
||||
pdf[symb] = tree[n].pdf;
|
||||
if (index != NULL) index[symb] = tree[n].index;
|
||||
if (path != NULL) path[symb] = tree[n].path;
|
||||
if (len != NULL) len[symb] = tree[n].len;
|
||||
return symb + 1;
|
||||
} else {
|
||||
symb = tree_node_extract(tree, tree[n].l, symb, pdf, index, path, len);
|
||||
return tree_node_extract(tree, tree[n].r, symb, pdf, index, path, len);
|
||||
}
|
||||
}
|
||||
|
||||
int tree_to_cdf(const aom_tree_index *tree, const aom_prob *probs,
|
||||
aom_tree_index root, aom_cdf_prob *cdf, aom_tree_index *index,
|
||||
int *path, int *len) {
|
||||
tree_node symb[2 * 16 - 1];
|
||||
int nodes;
|
||||
int next[16];
|
||||
int size;
|
||||
int nsymbs;
|
||||
int i;
|
||||
/* Create the root node with probability 1 in Q15. */
|
||||
symb[0].index = root;
|
||||
symb[0].path = 0;
|
||||
symb[0].len = 0;
|
||||
symb[0].l = symb[0].r = 0;
|
||||
nodes = 1;
|
||||
next[0] = 0;
|
||||
size = 1;
|
||||
nsymbs = 1;
|
||||
while (size > 0 && nsymbs < 16) {
|
||||
int m;
|
||||
tree_node n;
|
||||
aom_tree_index j;
|
||||
uint8_t prob;
|
||||
m = 0;
|
||||
/* Find the internal node with the largest probability. */
|
||||
for (i = 1; i < size; i++) {
|
||||
if (tree_node_cmp(symb[next[i]], symb[next[m]]) > 0) m = i;
|
||||
}
|
||||
i = next[m];
|
||||
memmove(&next[m], &next[m + 1], sizeof(*next) * (size - (m + 1)));
|
||||
size--;
|
||||
/* Split this symbol into two symbols */
|
||||
n = symb[i];
|
||||
j = n.index;
|
||||
prob = probs[j >> 1];
|
||||
/* Left */
|
||||
n.index = tree[j];
|
||||
n.path <<= 1;
|
||||
n.len++;
|
||||
n.probs[n.len - 1] = prob;
|
||||
symb[nodes] = n;
|
||||
if (n.index > 0) {
|
||||
next[size++] = nodes;
|
||||
}
|
||||
/* Right */
|
||||
n.index = tree[j + 1];
|
||||
n.path += 1;
|
||||
n.probs[n.len - 1] = 256 - prob;
|
||||
symb[nodes + 1] = n;
|
||||
if (n.index > 0) {
|
||||
next[size++] = nodes + 1;
|
||||
}
|
||||
symb[i].prob = prob;
|
||||
symb[i].l = nodes;
|
||||
symb[i].r = nodes + 1;
|
||||
nodes += 2;
|
||||
nsymbs++;
|
||||
}
|
||||
/* Compute the probabilities of each symbol in Q15 */
|
||||
tree_node_compute_probs(symb, 0, 32768);
|
||||
/* Extract the cdf, index, path and length */
|
||||
tree_node_extract(symb, 0, 0, cdf, index, path, len);
|
||||
/* Convert to CDF */
|
||||
for (i = 1; i < nsymbs; i++) {
|
||||
cdf[i] = cdf[i - 1] + cdf[i];
|
||||
}
|
||||
return nsymbs;
|
||||
}
|
||||
|
||||
/* This code assumes that tree contains as unique leaf nodes the integer values
|
||||
0 to len - 1 and produces the forward and inverse mapping tables in ind[]
|
||||
and inv[] respectively. */
|
||||
void av1_indices_from_tree(int *ind, int *inv, int len,
|
||||
const aom_tree_index *tree) {
|
||||
int i;
|
||||
int index;
|
||||
for (i = index = 0; i < TREE_SIZE(len); i++) {
|
||||
const aom_tree_index j = tree[i];
|
||||
if (j <= 0) {
|
||||
inv[index] = -j;
|
||||
ind[-j] = index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
158
aom_dsp/prob.h
158
aom_dsp/prob.h
@@ -1,158 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_PROB_H_
|
||||
#define AOM_DSP_PROB_H_
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "./aom_dsp_common.h"
|
||||
|
||||
#include "aom_ports/bitops.h"
|
||||
#include "aom_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef uint8_t aom_prob;
|
||||
|
||||
// TODO(negge): Rename this aom_prob once we remove vpxbool.
|
||||
typedef uint16_t aom_cdf_prob;
|
||||
|
||||
#define MAX_PROB 255
|
||||
|
||||
#define aom_prob_half ((aom_prob)128)
|
||||
|
||||
typedef int8_t aom_tree_index;
|
||||
|
||||
#define TREE_SIZE(leaf_count) (-2 + 2 * (leaf_count))
|
||||
|
||||
#define aom_complement(x) (255 - x)
|
||||
|
||||
#define MODE_MV_COUNT_SAT 20
|
||||
|
||||
/* We build coding trees compactly in arrays.
|
||||
Each node of the tree is a pair of aom_tree_indices.
|
||||
Array index often references a corresponding probability table.
|
||||
Index <= 0 means done encoding/decoding and value = -Index,
|
||||
Index > 0 means need another bit, specification at index.
|
||||
Nonnegative indices are always even; processing begins at node 0. */
|
||||
|
||||
typedef const aom_tree_index aom_tree[];
|
||||
|
||||
static INLINE aom_prob clip_prob(int p) {
|
||||
return (p > 255) ? 255 : (p < 1) ? 1 : p;
|
||||
}
|
||||
|
||||
static INLINE aom_prob get_prob(int num, int den) {
|
||||
return (den == 0) ? 128u : clip_prob(((int64_t)num * 256 + (den >> 1)) / den);
|
||||
}
|
||||
|
||||
static INLINE aom_prob get_binary_prob(int n0, int n1) {
|
||||
return get_prob(n0, n0 + n1);
|
||||
}
|
||||
|
||||
/* This function assumes prob1 and prob2 are already within [1,255] range. */
|
||||
static INLINE aom_prob weighted_prob(int prob1, int prob2, int factor) {
|
||||
return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8);
|
||||
}
|
||||
|
||||
static INLINE aom_prob merge_probs(aom_prob pre_prob, const unsigned int ct[2],
|
||||
unsigned int count_sat,
|
||||
unsigned int max_update_factor) {
|
||||
const aom_prob prob = get_binary_prob(ct[0], ct[1]);
|
||||
const unsigned int count = AOMMIN(ct[0] + ct[1], count_sat);
|
||||
const unsigned int factor = max_update_factor * count / count_sat;
|
||||
return weighted_prob(pre_prob, prob, factor);
|
||||
}
|
||||
|
||||
// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT;
|
||||
static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = {
|
||||
0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64,
|
||||
70, 76, 83, 89, 96, 102, 108, 115, 121, 128
|
||||
};
|
||||
|
||||
static INLINE aom_prob mode_mv_merge_probs(aom_prob pre_prob,
|
||||
const unsigned int ct[2]) {
|
||||
const unsigned int den = ct[0] + ct[1];
|
||||
if (den == 0) {
|
||||
return pre_prob;
|
||||
} else {
|
||||
const unsigned int count = AOMMIN(den, MODE_MV_COUNT_SAT);
|
||||
const unsigned int factor = count_to_update_factor[count];
|
||||
const aom_prob prob =
|
||||
clip_prob(((int64_t)(ct[0]) * 256 + (den >> 1)) / den);
|
||||
return weighted_prob(pre_prob, prob, factor);
|
||||
}
|
||||
}
|
||||
|
||||
void aom_tree_merge_probs(const aom_tree_index *tree, const aom_prob *pre_probs,
|
||||
const unsigned int *counts, aom_prob *probs);
|
||||
|
||||
#if CONFIG_EC_MULTISYMBOL
|
||||
int tree_to_cdf(const aom_tree_index *tree, const aom_prob *probs,
|
||||
aom_tree_index root, aom_cdf_prob *cdf, aom_tree_index *ind,
|
||||
int *pth, int *len);
|
||||
|
||||
static INLINE void av1_tree_to_cdf(const aom_tree_index *tree,
|
||||
const aom_prob *probs, aom_cdf_prob *cdf) {
|
||||
aom_tree_index index[16];
|
||||
int path[16];
|
||||
int dist[16];
|
||||
tree_to_cdf(tree, probs, 0, cdf, index, path, dist);
|
||||
}
|
||||
|
||||
#define av1_tree_to_cdf_1D(tree, probs, cdf, u) \
|
||||
do { \
|
||||
int i; \
|
||||
for (i = 0; i < u; i++) { \
|
||||
av1_tree_to_cdf(tree, probs[i], cdf[i]); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define av1_tree_to_cdf_2D(tree, probs, cdf, v, u) \
|
||||
do { \
|
||||
int j; \
|
||||
int i; \
|
||||
for (j = 0; j < v; j++) { \
|
||||
for (i = 0; i < u; i++) { \
|
||||
av1_tree_to_cdf(tree, probs[j][i], cdf[j][i]); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void av1_indices_from_tree(int *ind, int *inv, int len,
|
||||
const aom_tree_index *tree);
|
||||
#endif
|
||||
|
||||
DECLARE_ALIGNED(16, extern const uint8_t, aom_norm[256]);
|
||||
|
||||
#if CONFIG_EC_ADAPT
|
||||
static INLINE void update_cdf(aom_cdf_prob *cdf, int val, int nsymbs) {
|
||||
const int rate = 4 + get_msb(nsymbs);
|
||||
int i, diff, tmp;
|
||||
for (i = 0; i < nsymbs; ++i) {
|
||||
tmp = (i + 1) << (12 - rate);
|
||||
cdf[i] -= ((cdf[i] - tmp) >> rate);
|
||||
}
|
||||
diff = 32768 - cdf[nsymbs - 1];
|
||||
|
||||
for (i = val; i < nsymbs; ++i) {
|
||||
cdf[i] += diff;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_PROB_H_
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user