Compare commits
160 Commits
sandbox/wa
...
m66-3359
Author | SHA1 | Date | |
---|---|---|---|
![]() |
e9fff8a9db | ||
![]() |
edc9a46876 | ||
![]() |
4410d729d1 | ||
![]() |
11b55a0614 | ||
![]() |
46adbc4af8 | ||
![]() |
efaaf387fc | ||
![]() |
62b013abe8 | ||
![]() |
c930ea7dcd | ||
![]() |
2fa333c2ae | ||
![]() |
4e5b4b5848 | ||
![]() |
b78dad3ffa | ||
![]() |
1acc25f11b | ||
![]() |
0f3edc6625 | ||
![]() |
d8497e1fcd | ||
![]() |
c104f4cbdc | ||
![]() |
82e9c30334 | ||
![]() |
3636330490 | ||
![]() |
0fe4371cc0 | ||
![]() |
ac54d233b6 | ||
![]() |
519fed01c2 | ||
![]() |
73d1236384 | ||
![]() |
534e9af53b | ||
![]() |
79c14b83e9 | ||
![]() |
14b21b84e3 | ||
![]() |
cb16652598 | ||
![]() |
2c950e131c | ||
![]() |
41d3331d42 | ||
![]() |
efa786d464 | ||
![]() |
658eb1d675 | ||
![]() |
7edd1a6cea | ||
![]() |
848d6004a4 | ||
![]() |
fdb64ec289 | ||
![]() |
8001c5c7a8 | ||
![]() |
c59c84fc74 | ||
![]() |
de3a7e2630 | ||
![]() |
8f952bada7 | ||
![]() |
254e2f5501 | ||
![]() |
5eca3c23c3 | ||
![]() |
e14e9c9964 | ||
![]() |
b14b616d96 | ||
![]() |
ddf40ec156 | ||
![]() |
9a96b18f03 | ||
![]() |
77108f5001 | ||
![]() |
7e75e8a622 | ||
![]() |
903bc150da | ||
![]() |
884d1681f8 | ||
![]() |
2654afc16c | ||
![]() |
ea1d0a6b53 | ||
![]() |
738b829b8c | ||
![]() |
3fa713caee | ||
![]() |
cc91abb325 | ||
![]() |
a9bbff1049 | ||
![]() |
067457339b | ||
![]() |
d069f4c29d | ||
![]() |
cee96c7d85 | ||
![]() |
43caed4e42 | ||
![]() |
15b261d854 | ||
![]() |
3bfadfcd62 | ||
![]() |
f80be22a10 | ||
![]() |
6248f0c91f | ||
![]() |
3b85a5beb7 | ||
![]() |
cb9f4dc105 | ||
![]() |
b9e44842fc | ||
![]() |
7c69136494 | ||
![]() |
742ae4b24d | ||
![]() |
81d66e2cc6 | ||
![]() |
9f36419bf2 | ||
![]() |
d1e9635402 | ||
![]() |
dcbe6750e1 | ||
![]() |
6ee88546c0 | ||
![]() |
004fb91416 | ||
![]() |
7e14e0f109 | ||
![]() |
231012fdab | ||
![]() |
8fd648c78a | ||
![]() |
b8159fab38 | ||
![]() |
8f50e06012 | ||
![]() |
6fea41abee | ||
![]() |
d344ab03cc | ||
![]() |
2c2fea2c5b | ||
![]() |
5b6ae020b6 | ||
![]() |
3761254119 | ||
![]() |
9874ec07bd | ||
![]() |
c7449b482c | ||
![]() |
281f68a81f | ||
![]() |
68cc1dc422 | ||
![]() |
f95bf1db50 | ||
![]() |
97acbbb701 | ||
![]() |
9debbc2ec7 | ||
![]() |
740883897a | ||
![]() |
f87a4594fb | ||
![]() |
eedda5f924 | ||
![]() |
373e08f921 | ||
![]() |
7d19739949 | ||
![]() |
f915e6d4af | ||
![]() |
733820c509 | ||
![]() |
eb20d6f64c | ||
![]() |
b4fb99220b | ||
![]() |
fd7de8362d | ||
![]() |
b87250c56e | ||
![]() |
e1c69544b1 | ||
![]() |
8f25c3ff8f | ||
![]() |
8c0cd2bd76 | ||
![]() |
2879e0d2cc | ||
![]() |
f5b2dd2a66 | ||
![]() |
f8639b1554 | ||
![]() |
1633786bfb | ||
![]() |
bbdbee429f | ||
![]() |
32f86ce276 | ||
![]() |
bb4052b873 | ||
![]() |
c5dc3373db | ||
![]() |
e20ca4fead | ||
![]() |
7a41610581 | ||
![]() |
b25b2ca455 | ||
![]() |
bed28a55f5 | ||
![]() |
321f295632 | ||
![]() |
0226ce79e9 | ||
![]() |
55db4f033f | ||
![]() |
867b593caa | ||
![]() |
fe5d87aaeb | ||
![]() |
8a4336ed2e | ||
![]() |
1a7bf0d1f9 | ||
![]() |
117893a717 | ||
![]() |
84a7263d4c | ||
![]() |
7a245adb18 | ||
![]() |
de50e8052c | ||
![]() |
1a9c7bee88 | ||
![]() |
9ca9c12dbd | ||
![]() |
86842855d3 | ||
![]() |
a2127236ae | ||
![]() |
5203b40a2a | ||
![]() |
08a668af32 | ||
![]() |
09519a55c7 | ||
![]() |
7970cc02df | ||
![]() |
d95ddc7c71 | ||
![]() |
e4b3f03c64 | ||
![]() |
7d1bf5d12a | ||
![]() |
9f8433ffe2 | ||
![]() |
920ba82409 | ||
![]() |
fe4de1ff63 | ||
![]() |
6746ba6d01 | ||
![]() |
05e6e9ac83 | ||
![]() |
f2ad523461 | ||
![]() |
c58f01724c | ||
![]() |
028429310a | ||
![]() |
e9ad5d2aee | ||
![]() |
a40fa1f95d | ||
![]() |
94eaecaa91 | ||
![]() |
f9ecdc35ec | ||
![]() |
c1e511fd82 | ||
![]() |
a1689ed16b | ||
![]() |
f1ce050f44 | ||
![]() |
2a602f745d | ||
![]() |
14dbdd95e6 | ||
![]() |
bd1d995cd3 | ||
![]() |
3562d6b0a2 | ||
![]() |
575c1933ea | ||
![]() |
2e44f16443 | ||
![]() |
33953f310e | ||
![]() |
9f9d4f8dc9 | ||
![]() |
07b12aad77 |
@@ -1,12 +1,12 @@
|
||||
---
|
||||
Language: Cpp
|
||||
# BasedOnStyle: Google
|
||||
# Generated with clang-format 4.0.1
|
||||
# Generated with clang-format 5.0.0
|
||||
AccessModifierOffset: -1
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignConsecutiveDeclarations: false
|
||||
AlignEscapedNewlinesLeft: true
|
||||
AlignEscapedNewlines: Left
|
||||
AlignOperands: true
|
||||
AlignTrailingComments: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
@@ -33,14 +33,20 @@ BraceWrapping:
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
IndentBraces: false
|
||||
SplitEmptyFunction: true
|
||||
SplitEmptyRecord: true
|
||||
SplitEmptyNamespace: true
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeBraces: Attach
|
||||
BreakBeforeInheritanceComma: false
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
BreakConstructorInitializers: BeforeColon
|
||||
BreakAfterJavaFieldAnnotations: false
|
||||
BreakStringLiterals: true
|
||||
ColumnLimit: 80
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
CompactNamespaces: false
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
@@ -48,7 +54,11 @@ Cpp11BracedListStyle: false
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
|
||||
FixNamespaceComments: true
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- Q_FOREACH
|
||||
- BOOST_FOREACH
|
||||
IncludeCategories:
|
||||
- Regex: '^<.*\.h>'
|
||||
Priority: 1
|
||||
@@ -70,6 +80,7 @@ NamespaceIndentation: None
|
||||
ObjCBlockIndentWidth: 2
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: false
|
||||
PenaltyBreakAssignment: 2
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
@@ -79,6 +90,7 @@ PenaltyReturnTypeOnItsOwnLine: 200
|
||||
PointerAlignment: Right
|
||||
ReflowComments: true
|
||||
SortIncludes: false
|
||||
SortUsingDeclarations: true
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterTemplateKeyword: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
|
4
.mailmap
4
.mailmap
@@ -3,6 +3,7 @@ Aℓex Converse <aconverse@google.com>
|
||||
Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com>
|
||||
Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
|
||||
Alpha Lam <hclam@google.com> <hclam@chromium.org>
|
||||
Chris Cunningham <chcunningham@chromium.org>
|
||||
Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
|
||||
Deb Mukherjee <debargha@google.com>
|
||||
Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
|
||||
@@ -21,18 +22,21 @@ Marco Paniconi <marpan@google.com>
|
||||
Marco Paniconi <marpan@google.com> <marpan@chromium.org>
|
||||
Pascal Massimino <pascal.massimino@gmail.com>
|
||||
Paul Wilkins <paulwilkins@google.com>
|
||||
Peter Boström <pbos@chromium.org> <pbos@google.com>
|
||||
Peter de Rivaz <peter.derivaz@gmail.com>
|
||||
Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com>
|
||||
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
|
||||
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
|
||||
Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
|
||||
Sami Pietilä <samipietila@google.com>
|
||||
Shiyou Yin <yinshiyou-hf@loongson.cn>
|
||||
Tamar Levy <tamar.levy@intel.com>
|
||||
Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
|
||||
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
|
||||
Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com>
|
||||
Tom Finegan <tomfinegan@google.com>
|
||||
Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
|
||||
Urvang Joshi <urvang@google.com> <urvang@chromium.org>
|
||||
Yaowu Xu <yaowu@google.com> <adam@xuyaowu.com>
|
||||
Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
|
||||
Yaowu Xu <yaowu@google.com> <Yaowu Xu>
|
||||
|
16
AUTHORS
16
AUTHORS
@@ -3,13 +3,13 @@
|
||||
|
||||
Aaron Watry <awatry@gmail.com>
|
||||
Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
|
||||
Adam Xu <adam@xuyaowu.com>
|
||||
Adrian Grange <agrange@google.com>
|
||||
Aℓex Converse <aconverse@google.com>
|
||||
Ahmad Sharif <asharif@google.com>
|
||||
Aleksey Vasenev <margtu-fivt@ya.ru>
|
||||
Alexander Potapenko <glider@google.com>
|
||||
Alexander Voronov <avoronov@graphics.cs.msu.ru>
|
||||
Alexandra Hájková <alexandra.khirnova@gmail.com>
|
||||
Alexis Ballier <aballier@gentoo.org>
|
||||
Alok Ahuja <waveletcoeff@gmail.com>
|
||||
Alpha Lam <hclam@google.com>
|
||||
@@ -17,6 +17,7 @@ A.Mahfoodh <ab.mahfoodh@gmail.com>
|
||||
Ami Fischman <fischman@chromium.org>
|
||||
Andoni Morales Alastruey <ylatuya@gmail.com>
|
||||
Andres Mejia <mcitadel@gmail.com>
|
||||
Andrew Lewis <andrewlewis@google.com>
|
||||
Andrew Russell <anrussell@google.com>
|
||||
Angie Chiang <angiebird@google.com>
|
||||
Aron Rosenberg <arosenberg@logitech.com>
|
||||
@@ -24,7 +25,9 @@ Attila Nagy <attilanagy@google.com>
|
||||
Brion Vibber <bvibber@wikimedia.org>
|
||||
changjun.yang <changjun.yang@intel.com>
|
||||
Charles 'Buck' Krasic <ckrasic@google.com>
|
||||
Cheng Chen <chengchen@google.com>
|
||||
chm <chm@rock-chips.com>
|
||||
Chris Cunningham <chcunningham@chromium.org>
|
||||
Christian Duvivier <cduvivier@google.com>
|
||||
Daniele Castagna <dcastagna@chromium.org>
|
||||
Daniel Kang <ddkang@google.com>
|
||||
@@ -46,10 +49,12 @@ Geza Lore <gezalore@gmail.com>
|
||||
Ghislain MARY <ghislainmary2@gmail.com>
|
||||
Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
|
||||
Gregor Jasny <gjasny@gmail.com>
|
||||
Guillaume Martres <gmartres@google.com>
|
||||
Guillermo Ballester Valor <gbvalor@gmail.com>
|
||||
Hangyu Kuang <hkuang@google.com>
|
||||
Hanno Böck <hanno@hboeck.de>
|
||||
Han Shen <shenhan@google.com>
|
||||
Henrik Lundin <hlundin@google.com>
|
||||
Hui Su <huisu@google.com>
|
||||
Ivan Krasin <krasin@chromium.org>
|
||||
@@ -83,6 +88,7 @@ Justin Clift <justin@salasaga.org>
|
||||
Justin Lebar <justin.lebar@gmail.com>
|
||||
Kaustubh Raste <kaustubh.raste@imgtec.com>
|
||||
KO Myung-Hun <komh@chollian.net>
|
||||
Kyle Siefring <kylesiefring@gmail.com>
|
||||
Lawrence Velázquez <larryv@macports.org>
|
||||
Linfeng Zhang <linfengz@google.com>
|
||||
Lou Quillio <louquillio@google.com>
|
||||
@@ -101,6 +107,7 @@ Mikhal Shemer <mikhal@google.com>
|
||||
Min Chen <chenm003@gmail.com>
|
||||
Minghai Shang <minghai@google.com>
|
||||
Min Ye <yeemmi@google.com>
|
||||
Moriyoshi Koizumi <mozo@mozo.jp>
|
||||
Morton Jonuschat <yabawock@gmail.com>
|
||||
Nathan E. Egge <negge@mozilla.com>
|
||||
Nico Weber <thakis@chromium.org>
|
||||
@@ -111,12 +118,15 @@ Paul Wilkins <paulwilkins@google.com>
|
||||
Pavol Rusnak <stick@gk2.sk>
|
||||
Paweł Hajdan <phajdan@google.com>
|
||||
Pengchong Jin <pengchong@google.com>
|
||||
Peter Boström <pbos@google.com>
|
||||
Peter Boström <pbos@chromium.org>
|
||||
Peter Collingbourne <pcc@chromium.org>
|
||||
Peter de Rivaz <peter.derivaz@gmail.com>
|
||||
Philip Jägenstedt <philipj@opera.com>
|
||||
Priit Laes <plaes@plaes.org>
|
||||
Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
|
||||
Rafaël Carré <funman@videolan.org>
|
||||
Rafael de Lucena Valle <rafaeldelucena@gmail.com>
|
||||
Rahul Chaudhry <rahulchaudhry@google.com>
|
||||
Ralph Giles <giles@xiph.org>
|
||||
Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com>
|
||||
Rob Bradford <rob@linux.intel.com>
|
||||
@@ -135,6 +145,7 @@ Shiyou Yin <yinshiyou-hf@loongson.cn>
|
||||
Shunyao Li <shunyaoli@google.com>
|
||||
Stefan Holmer <holmer@google.com>
|
||||
Suman Sunkara <sunkaras@google.com>
|
||||
Sylvestre Ledru <sylvestre@mozilla.com>
|
||||
Taekhyun Kim <takim@nvidia.com>
|
||||
Takanori MATSUURA <t.matsuu@gmail.com>
|
||||
Tamar Levy <tamar.levy@intel.com>
|
||||
@@ -147,6 +158,7 @@ Tom Finegan <tomfinegan@google.com>
|
||||
Tristan Matthews <le.businessman@gmail.com>
|
||||
Urvang Joshi <urvang@google.com>
|
||||
Vignesh Venkatasubramanian <vigneshv@google.com>
|
||||
Vlad Tsyrklevich <vtsyrklevich@chromium.org>
|
||||
Yaowu Xu <yaowu@google.com>
|
||||
Yi Luo <luoyi@google.com>
|
||||
Yongzhe Wang <yongzhe@google.com>
|
||||
|
25
CHANGELOG
25
CHANGELOG
@@ -1,3 +1,28 @@
|
||||
2017-01-04 v1.7.0 "Mandarin Duck"
|
||||
This release focused on high bit depth performance (10/12 bit) and vp9
|
||||
encoding improvements.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI incompatible due to new vp9 encoder features.
|
||||
|
||||
Frame parallel decoding for vp9 has been removed.
|
||||
|
||||
- Enhancements:
|
||||
vp9 encoding supports additional threads with --row-mt. This can be greater
|
||||
than the number of tiles.
|
||||
|
||||
Two new vp9 encoder options have been added:
|
||||
--corpus-complexity
|
||||
--tune-content=film
|
||||
|
||||
Additional tooling for respecting the vp9 "level" profiles has been added.
|
||||
|
||||
- Bug fixes:
|
||||
A variety of fuzzing issues.
|
||||
vp8 threading fix for ARM.
|
||||
Codec control VP9_SET_SKIP_LOOP_FILTER fixed.
|
||||
Reject invalid multi resolution configurations.
|
||||
|
||||
2017-01-09 v1.6.1 "Long Tailed Duck"
|
||||
This release improves upon the VP9 encoder and speeds up the encoding and
|
||||
decoding processes.
|
||||
|
4
README
4
README
@@ -1,4 +1,4 @@
|
||||
README - 26 January 2017
|
||||
README - 24 January 2018
|
||||
|
||||
Welcome to the WebM VP8/VP9 Codec SDK!
|
||||
|
||||
@@ -63,6 +63,8 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
armv8-linux-gcc
|
||||
mips32-linux-gcc
|
||||
mips64-linux-gcc
|
||||
ppc64-linux-gcc
|
||||
ppc64le-linux-gcc
|
||||
sparc-solaris-gcc
|
||||
x86-android-gcc
|
||||
x86-darwin8-gcc
|
||||
|
@@ -1,4 +1,13 @@
|
||||
#!/usr/bin/env perl
|
||||
##
|
||||
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
## Use of this source code is governed by a BSD-style license
|
||||
## that can be found in the LICENSE file in the root of the source
|
||||
## tree. An additional intellectual property rights grant can be found
|
||||
## in the file PATENTS. All contributing project authors may
|
||||
## be found in the AUTHORS file in the root of the source tree.
|
||||
##
|
||||
|
||||
no strict 'refs';
|
||||
use warnings;
|
||||
@@ -200,6 +209,7 @@ sub filter {
|
||||
sub common_top() {
|
||||
my $include_guard = uc($opts{sym})."_H_";
|
||||
print <<EOF;
|
||||
// This file is generated. Do not edit.
|
||||
#ifndef ${include_guard}
|
||||
#define ${include_guard}
|
||||
|
||||
|
@@ -60,6 +60,7 @@ if [ ${bare} ]; then
|
||||
echo "${changelog_version}${git_version_id}" > $$.tmp
|
||||
else
|
||||
cat<<EOF>$$.tmp
|
||||
// This file is generated. Do not edit.
|
||||
#define VERSION_MAJOR $major_version
|
||||
#define VERSION_MINOR $minor_version
|
||||
#define VERSION_PATCH $patch_version
|
||||
|
2
configure
vendored
2
configure
vendored
@@ -665,7 +665,7 @@ process_toolchain() {
|
||||
gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
|
||||
enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
|
||||
all_targets="${all_targets} solution"
|
||||
INLINE="__forceinline"
|
||||
INLINE="__inline"
|
||||
;;
|
||||
esac
|
||||
|
||||
|
@@ -429,8 +429,9 @@ static void set_rate_control_stats(struct RateControlStats *rc,
|
||||
rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl];
|
||||
if (tl > 0) {
|
||||
rc->layer_pfb[layer] =
|
||||
1000.0 * (cfg->layer_target_bitrate[layer] -
|
||||
cfg->layer_target_bitrate[layer - 1]) /
|
||||
1000.0 *
|
||||
(cfg->layer_target_bitrate[layer] -
|
||||
cfg->layer_target_bitrate[layer - 1]) /
|
||||
(rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
|
||||
} else {
|
||||
rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] /
|
||||
@@ -573,8 +574,8 @@ void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
|
||||
} else {
|
||||
if (is_key_frame) {
|
||||
ref_frame_config->frame_flags[sl] =
|
||||
VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
|
||||
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
|
||||
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
|
||||
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
|
||||
} else {
|
||||
ref_frame_config->frame_flags[sl] =
|
||||
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
|
||||
@@ -588,14 +589,24 @@ void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
|
||||
} else {
|
||||
ref_frame_config->frame_flags[sl] =
|
||||
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
|
||||
if (sl == num_spatial_layers - 1)
|
||||
ref_frame_config->frame_flags[sl] =
|
||||
VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_ARF |
|
||||
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
|
||||
}
|
||||
}
|
||||
if (tl == 0) {
|
||||
ref_frame_config->lst_fb_idx[sl] = sl;
|
||||
if (sl)
|
||||
ref_frame_config->gld_fb_idx[sl] = sl - 1;
|
||||
else
|
||||
if (sl) {
|
||||
if (is_key_frame) {
|
||||
ref_frame_config->lst_fb_idx[sl] = sl - 1;
|
||||
ref_frame_config->gld_fb_idx[sl] = sl;
|
||||
} else {
|
||||
ref_frame_config->gld_fb_idx[sl] = sl - 1;
|
||||
}
|
||||
} else {
|
||||
ref_frame_config->gld_fb_idx[sl] = 0;
|
||||
}
|
||||
ref_frame_config->alt_fb_idx[sl] = 0;
|
||||
} else if (tl == 1) {
|
||||
ref_frame_config->lst_fb_idx[sl] = sl;
|
||||
@@ -738,6 +749,8 @@ int main(int argc, const char **argv) {
|
||||
// the encode for the whole superframe. The encoder will internally loop
|
||||
// over all the spatial layers for the current superframe.
|
||||
vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
|
||||
// TODO(jianj): Fix the parameter passing for "is_key_frame" in
|
||||
// set_frame_flags_bypass_model() for case of periodic key frames.
|
||||
set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id,
|
||||
svc_ctx.spatial_layers, frame_cnt == 0,
|
||||
&ref_frame_config);
|
||||
|
@@ -26,19 +26,29 @@
|
||||
#include "../tools_common.h"
|
||||
#include "../video_writer.h"
|
||||
|
||||
#define VP8_ROI_MAP 0
|
||||
#define ROI_MAP 0
|
||||
|
||||
#define zero(Dest) memset(&Dest, 0, sizeof(Dest));
|
||||
|
||||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) { exit(EXIT_FAILURE); }
|
||||
|
||||
// Denoiser states, for temporal denoising.
|
||||
enum denoiserState {
|
||||
kDenoiserOff,
|
||||
kDenoiserOnYOnly,
|
||||
kDenoiserOnYUV,
|
||||
kDenoiserOnYUVAggressive,
|
||||
kDenoiserOnAdaptive
|
||||
// Denoiser states for vp8, for temporal denoising.
|
||||
enum denoiserStateVp8 {
|
||||
kVp8DenoiserOff,
|
||||
kVp8DenoiserOnYOnly,
|
||||
kVp8DenoiserOnYUV,
|
||||
kVp8DenoiserOnYUVAggressive,
|
||||
kVp8DenoiserOnAdaptive
|
||||
};
|
||||
|
||||
// Denoiser states for vp9, for temporal denoising.
|
||||
enum denoiserStateVp9 {
|
||||
kVp9DenoiserOff,
|
||||
kVp9DenoiserOnYOnly,
|
||||
// For SVC: denoise the top two spatial layers.
|
||||
kVp9DenoiserOnYTwoSpatialLayers
|
||||
};
|
||||
|
||||
static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 };
|
||||
@@ -91,9 +101,10 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
|
||||
for (i = 0; i < cfg->ts_number_layers; ++i) {
|
||||
if (i > 0) {
|
||||
rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
|
||||
rc->layer_pfb[i] = 1000.0 * (rc->layer_target_bitrate[i] -
|
||||
rc->layer_target_bitrate[i - 1]) /
|
||||
(rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
|
||||
rc->layer_pfb[i] =
|
||||
1000.0 *
|
||||
(rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
|
||||
(rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
|
||||
}
|
||||
rc->layer_input_frames[i] = 0;
|
||||
rc->layer_enc_frames[i] = 0;
|
||||
@@ -156,38 +167,60 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
|
||||
die("Error: Number of input frames not equal to output! \n");
|
||||
}
|
||||
|
||||
#if VP8_ROI_MAP
|
||||
static void vp8_set_roi_map(vpx_codec_enc_cfg_t *cfg, vpx_roi_map_t *roi) {
|
||||
#if ROI_MAP
|
||||
static void set_roi_map(const char *enc_name, vpx_codec_enc_cfg_t *cfg,
|
||||
vpx_roi_map_t *roi) {
|
||||
unsigned int i, j;
|
||||
memset(roi, 0, sizeof(*roi));
|
||||
int block_size = 0;
|
||||
uint8_t is_vp8 = strncmp(enc_name, "vp8", 3) == 0 ? 1 : 0;
|
||||
uint8_t is_vp9 = strncmp(enc_name, "vp9", 3) == 0 ? 1 : 0;
|
||||
if (!is_vp8 && !is_vp9) {
|
||||
die("unsupported codec.");
|
||||
}
|
||||
zero(*roi);
|
||||
|
||||
block_size = is_vp9 && !is_vp8 ? 8 : 16;
|
||||
|
||||
// ROI is based on the segments (4 for vp8, 8 for vp9), smallest unit for
|
||||
// segment is 16x16 for vp8, 8x8 for vp9.
|
||||
roi->rows = (cfg->g_h + 15) / 16;
|
||||
roi->cols = (cfg->g_w + 15) / 16;
|
||||
roi->rows = (cfg->g_h + block_size - 1) / block_size;
|
||||
roi->cols = (cfg->g_w + block_size - 1) / block_size;
|
||||
|
||||
// Applies delta QP on the segment blocks, varies from -63 to 63.
|
||||
// Setting to negative means lower QP (better quality).
|
||||
// Below we set delta_q to the extreme (-63) to show strong effect.
|
||||
roi->delta_q[0] = 0;
|
||||
// VP8 uses the first 4 segments. VP9 uses all 8 segments.
|
||||
zero(roi->delta_q);
|
||||
roi->delta_q[1] = -63;
|
||||
roi->delta_q[2] = 0;
|
||||
roi->delta_q[3] = 0;
|
||||
|
||||
// Applies delta loopfilter strength on the segment blocks, varies from -63 to
|
||||
// 63. Setting to positive means stronger loopfilter.
|
||||
roi->delta_lf[0] = 0;
|
||||
roi->delta_lf[1] = 0;
|
||||
roi->delta_lf[2] = 0;
|
||||
roi->delta_lf[3] = 0;
|
||||
// 63. Setting to positive means stronger loopfilter. VP8 uses the first 4
|
||||
// segments. VP9 uses all 8 segments.
|
||||
zero(roi->delta_lf);
|
||||
|
||||
// Applies skip encoding threshold on the segment blocks, varies from 0 to
|
||||
// UINT_MAX. Larger value means more skipping of encoding is possible.
|
||||
// This skip threshold only applies on delta frames.
|
||||
roi->static_threshold[0] = 0;
|
||||
roi->static_threshold[1] = 0;
|
||||
roi->static_threshold[2] = 0;
|
||||
roi->static_threshold[3] = 0;
|
||||
if (is_vp8) {
|
||||
// Applies skip encoding threshold on the segment blocks, varies from 0 to
|
||||
// UINT_MAX. Larger value means more skipping of encoding is possible.
|
||||
// This skip threshold only applies on delta frames.
|
||||
zero(roi->static_threshold);
|
||||
}
|
||||
|
||||
if (is_vp9) {
|
||||
// Apply skip segment. Setting to 1 means this block will be copied from
|
||||
// previous frame.
|
||||
zero(roi->skip);
|
||||
}
|
||||
|
||||
if (is_vp9) {
|
||||
// Apply ref frame segment.
|
||||
// -1 : Do not apply this segment.
|
||||
// 0 : Froce using intra.
|
||||
// 1 : Force using last.
|
||||
// 2 : Force using golden.
|
||||
// 3 : Force using alfref but not used in non-rd pickmode for 0 lag.
|
||||
memset(roi->ref_frame, -1, sizeof(roi->ref_frame));
|
||||
roi->ref_frame[1] = 1;
|
||||
}
|
||||
|
||||
// Use 2 states: 1 is center square, 0 is the rest.
|
||||
roi->roi_map =
|
||||
@@ -555,7 +588,7 @@ int main(int argc, char **argv) {
|
||||
int layering_mode = 0;
|
||||
int layer_flags[VPX_TS_MAX_PERIODICITY] = { 0 };
|
||||
int flag_periodicity = 1;
|
||||
#if VP8_ROI_MAP
|
||||
#if ROI_MAP
|
||||
vpx_roi_map_t roi;
|
||||
#endif
|
||||
vpx_svc_layer_id_t layer_id = { 0, 0 };
|
||||
@@ -755,11 +788,11 @@ int main(int argc, char **argv) {
|
||||
|
||||
if (strncmp(encoder->name, "vp8", 3) == 0) {
|
||||
vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
|
||||
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
|
||||
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kVp8DenoiserOff);
|
||||
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
|
||||
vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0);
|
||||
#if VP8_ROI_MAP
|
||||
vp8_set_roi_map(&cfg, &roi);
|
||||
#if ROI_MAP
|
||||
set_roi_map(encoder->name, &cfg, &roi);
|
||||
if (vpx_codec_control(&codec, VP8E_SET_ROI_MAP, &roi))
|
||||
die_codec(&codec, "Failed to set ROI map");
|
||||
#endif
|
||||
@@ -772,10 +805,16 @@ int main(int argc, char **argv) {
|
||||
vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);
|
||||
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kVp9DenoiserOff);
|
||||
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
|
||||
vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
|
||||
#if ROI_MAP
|
||||
set_roi_map(encoder->name, &cfg, &roi);
|
||||
if (vpx_codec_control(&codec, VP9E_SET_ROI_MAP, &roi))
|
||||
die_codec(&codec, "Failed to set ROI map");
|
||||
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 0);
|
||||
#endif
|
||||
// TODO(marpan/jianj): There is an issue with row-mt for low resolutons at
|
||||
// high speed settings, disable its use for those cases for now.
|
||||
if (cfg.g_threads > 1 && ((cfg.g_w > 320 && cfg.g_h > 240) || speed < 7))
|
||||
@@ -903,5 +942,8 @@ int main(int argc, char **argv) {
|
||||
for (i = 0; i < cfg.ts_number_layers; ++i) vpx_video_writer_close(outfile[i]);
|
||||
|
||||
vpx_img_free(&raw);
|
||||
#if ROI_MAP
|
||||
free(roi.roi_map);
|
||||
#endif
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@@ -943,18 +943,6 @@ GENERATE_XML = NO
|
||||
|
||||
XML_OUTPUT = xml
|
||||
|
||||
# The XML_SCHEMA tag can be used to specify an XML schema,
|
||||
# which can be used by a validating XML parser to check the
|
||||
# syntax of the XML files.
|
||||
|
||||
XML_SCHEMA =
|
||||
|
||||
# The XML_DTD tag can be used to specify an XML DTD,
|
||||
# which can be used by a validating XML parser to check the
|
||||
# syntax of the XML files.
|
||||
|
||||
XML_DTD =
|
||||
|
||||
# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
|
||||
# dump the program listings (including syntax highlighting
|
||||
# and cross-referencing information) to the XML output. Note that
|
||||
|
4
libs.mk
4
libs.mk
@@ -233,8 +233,8 @@ OBJS-yes += $(LIBVPX_OBJS)
|
||||
LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
|
||||
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
|
||||
|
||||
SO_VERSION_MAJOR := 4
|
||||
SO_VERSION_MINOR := 1
|
||||
SO_VERSION_MAJOR := 5
|
||||
SO_VERSION_MINOR := 0
|
||||
SO_VERSION_PATCH := 0
|
||||
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
|
||||
LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib
|
||||
|
@@ -215,7 +215,7 @@ using std::tr1::make_tuple;
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
const BlockinessParam c_vp9_tests[] = {
|
||||
make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238),
|
||||
make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238)
|
||||
};
|
||||
INSTANTIATE_TEST_CASE_P(C, BlockinessVP9Test, ::testing::ValuesIn(c_vp9_tests));
|
||||
#endif
|
||||
|
@@ -205,7 +205,7 @@ using std::tr1::make_tuple;
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
const ConsistencyParam c_vp9_tests[] = {
|
||||
make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238),
|
||||
make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238)
|
||||
};
|
||||
INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test,
|
||||
::testing::ValuesIn(c_vp9_tests));
|
||||
|
@@ -539,6 +539,7 @@ class DatarateTestVP9Large
|
||||
denoiser_offon_test_ = 0;
|
||||
denoiser_offon_period_ = -1;
|
||||
frame_parallel_decoding_mode_ = 1;
|
||||
use_roi_ = 0;
|
||||
}
|
||||
|
||||
//
|
||||
@@ -621,6 +622,10 @@ class DatarateTestVP9Large
|
||||
encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING,
|
||||
frame_parallel_decoding_mode_);
|
||||
|
||||
if (use_roi_) {
|
||||
encoder->Control(VP9E_SET_ROI_MAP, &roi_);
|
||||
}
|
||||
|
||||
if (cfg_.ts_number_layers > 1) {
|
||||
if (video->frame() == 0) {
|
||||
encoder->Control(VP9E_SET_SVC, 1);
|
||||
@@ -701,6 +706,8 @@ class DatarateTestVP9Large
|
||||
int denoiser_offon_test_;
|
||||
int denoiser_offon_period_;
|
||||
int frame_parallel_decoding_mode_;
|
||||
bool use_roi_;
|
||||
vpx_roi_map_t roi_;
|
||||
};
|
||||
|
||||
// Check basic rate targeting for VBR mode with 0 lag.
|
||||
@@ -1073,6 +1080,68 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
|
||||
}
|
||||
}
|
||||
|
||||
class DatarateTestVP9RealTime : public DatarateTestVP9Large {
|
||||
public:
|
||||
virtual ~DatarateTestVP9RealTime() {}
|
||||
};
|
||||
|
||||
// Check VP9 region of interest feature.
|
||||
TEST_P(DatarateTestVP9RealTime, RegionOfInterest) {
|
||||
if (deadline_ != VPX_DL_REALTIME || set_cpu_used_ < 5) return;
|
||||
cfg_.rc_buf_initial_sz = 500;
|
||||
cfg_.rc_buf_optimal_sz = 500;
|
||||
cfg_.rc_buf_sz = 1000;
|
||||
cfg_.rc_dropframe_thresh = 0;
|
||||
cfg_.rc_min_quantizer = 0;
|
||||
cfg_.rc_max_quantizer = 63;
|
||||
cfg_.rc_end_usage = VPX_CBR;
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, 300);
|
||||
|
||||
cfg_.rc_target_bitrate = 450;
|
||||
cfg_.g_w = 352;
|
||||
cfg_.g_h = 288;
|
||||
|
||||
ResetModel();
|
||||
|
||||
// Set ROI parameters
|
||||
use_roi_ = true;
|
||||
memset(&roi_, 0, sizeof(roi_));
|
||||
|
||||
roi_.rows = (cfg_.g_h + 7) / 8;
|
||||
roi_.cols = (cfg_.g_w + 7) / 8;
|
||||
|
||||
roi_.delta_q[1] = -20;
|
||||
roi_.delta_lf[1] = -20;
|
||||
memset(roi_.ref_frame, -1, sizeof(roi_.ref_frame));
|
||||
roi_.ref_frame[1] = 1;
|
||||
|
||||
// Use 2 states: 1 is center square, 0 is the rest.
|
||||
roi_.roi_map = reinterpret_cast<uint8_t *>(
|
||||
calloc(roi_.rows * roi_.cols, sizeof(*roi_.roi_map)));
|
||||
ASSERT_TRUE(roi_.roi_map != NULL);
|
||||
|
||||
for (unsigned int i = 0; i < roi_.rows; ++i) {
|
||||
for (unsigned int j = 0; j < roi_.cols; ++j) {
|
||||
if (i > (roi_.rows >> 2) && i < ((roi_.rows * 3) >> 2) &&
|
||||
j > (roi_.cols >> 2) && j < ((roi_.cols * 3) >> 2)) {
|
||||
roi_.roi_map[i * roi_.cols + j] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_[0] * 0.90)
|
||||
<< " The datarate for the file exceeds the target!";
|
||||
|
||||
ASSERT_LE(cfg_.rc_target_bitrate, effective_datarate_[0] * 1.4)
|
||||
<< " The datarate for the file missed the target!";
|
||||
|
||||
free(roi_.roi_map);
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
class DatarateTestVP9LargeDenoiser : public DatarateTestVP9Large {
|
||||
public:
|
||||
@@ -1216,18 +1285,78 @@ class DatarateOnePassCbrSvc
|
||||
}
|
||||
virtual void ResetModel() {
|
||||
last_pts_ = 0;
|
||||
bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
|
||||
frame_number_ = 0;
|
||||
first_drop_ = 0;
|
||||
bits_total_ = 0;
|
||||
duration_ = 0.0;
|
||||
mismatch_psnr_ = 0.0;
|
||||
mismatch_nframes_ = 0;
|
||||
denoiser_on_ = 0;
|
||||
tune_content_ = 0;
|
||||
base_speed_setting_ = 5;
|
||||
spatial_layer_id_ = 0;
|
||||
temporal_layer_id_ = 0;
|
||||
update_pattern_ = 0;
|
||||
memset(bits_in_buffer_model_, 0, sizeof(bits_in_buffer_model_));
|
||||
memset(bits_total_, 0, sizeof(bits_total_));
|
||||
memset(layer_target_avg_bandwidth_, 0, sizeof(layer_target_avg_bandwidth_));
|
||||
dynamic_drop_layer_ = false;
|
||||
}
|
||||
virtual void BeginPassHook(unsigned int /*pass*/) {}
|
||||
|
||||
// Example pattern for spatial layers and 2 temporal layers used in the
|
||||
// bypass/flexible mode. The pattern corresponds to the pattern
|
||||
// VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
|
||||
// non-flexible mode, except that we disable inter-layer prediction.
|
||||
void set_frame_flags_bypass_mode(
|
||||
int tl, int num_spatial_layers, int is_key_frame,
|
||||
vpx_svc_ref_frame_config_t *ref_frame_config) {
|
||||
for (int sl = 0; sl < num_spatial_layers; ++sl) {
|
||||
if (!tl) {
|
||||
if (!sl) {
|
||||
ref_frame_config->frame_flags[sl] =
|
||||
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
|
||||
VP8_EFLAG_NO_UPD_ARF;
|
||||
} else {
|
||||
if (is_key_frame) {
|
||||
ref_frame_config->frame_flags[sl] =
|
||||
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
|
||||
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
|
||||
} else {
|
||||
ref_frame_config->frame_flags[sl] =
|
||||
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
|
||||
VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF;
|
||||
}
|
||||
}
|
||||
} else if (tl == 1) {
|
||||
if (!sl) {
|
||||
ref_frame_config->frame_flags[sl] =
|
||||
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
|
||||
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
|
||||
} else {
|
||||
ref_frame_config->frame_flags[sl] =
|
||||
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
|
||||
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_REF_GF;
|
||||
}
|
||||
}
|
||||
if (tl == 0) {
|
||||
ref_frame_config->lst_fb_idx[sl] = sl;
|
||||
if (sl) {
|
||||
if (is_key_frame) {
|
||||
ref_frame_config->lst_fb_idx[sl] = sl - 1;
|
||||
ref_frame_config->gld_fb_idx[sl] = sl;
|
||||
} else {
|
||||
ref_frame_config->gld_fb_idx[sl] = sl - 1;
|
||||
}
|
||||
} else {
|
||||
ref_frame_config->gld_fb_idx[sl] = 0;
|
||||
}
|
||||
ref_frame_config->alt_fb_idx[sl] = 0;
|
||||
} else if (tl == 1) {
|
||||
ref_frame_config->lst_fb_idx[sl] = sl;
|
||||
ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1;
|
||||
ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
|
||||
::libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == 0) {
|
||||
@@ -1252,36 +1381,137 @@ class DatarateOnePassCbrSvc
|
||||
encoder->Control(VP8E_SET_STATIC_THRESHOLD, 1);
|
||||
encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_);
|
||||
}
|
||||
|
||||
if (update_pattern_ && video->frame() >= 100) {
|
||||
vpx_svc_layer_id_t layer_id;
|
||||
if (video->frame() == 100) {
|
||||
cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
|
||||
encoder->Config(&cfg_);
|
||||
}
|
||||
// Set layer id since the pattern changed.
|
||||
layer_id.spatial_layer_id = 0;
|
||||
layer_id.temporal_layer_id = (video->frame() % 2 != 0);
|
||||
encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
|
||||
set_frame_flags_bypass_mode(layer_id.temporal_layer_id,
|
||||
number_spatial_layers_, 0, &ref_frame_config);
|
||||
encoder->Control(VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config);
|
||||
}
|
||||
|
||||
if (dynamic_drop_layer_) {
|
||||
if (video->frame() == 100) {
|
||||
// Change layer bitrates to set top layer to 0. This will trigger skip
|
||||
// encoding/dropping of top spatial layer.
|
||||
cfg_.rc_target_bitrate -= cfg_.layer_target_bitrate[2];
|
||||
cfg_.layer_target_bitrate[2] = 0;
|
||||
encoder->Config(&cfg_);
|
||||
} else if (video->frame() == 300) {
|
||||
// Change layer bitrate on top layer to non-zero to start encoding it
|
||||
// again.
|
||||
cfg_.layer_target_bitrate[2] = 500;
|
||||
cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[2];
|
||||
encoder->Config(&cfg_);
|
||||
}
|
||||
}
|
||||
const vpx_rational_t tb = video->timebase();
|
||||
timebase_ = static_cast<double>(tb.num) / tb.den;
|
||||
duration_ = 0;
|
||||
}
|
||||
|
||||
virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {
|
||||
vpx_svc_layer_id_t layer_id;
|
||||
encoder->Control(VP9E_GET_SVC_LAYER_ID, &layer_id);
|
||||
spatial_layer_id_ = layer_id.spatial_layer_id;
|
||||
temporal_layer_id_ = layer_id.temporal_layer_id;
|
||||
// Update buffer with per-layer target frame bandwidth, this is done
|
||||
// for every frame passed to the encoder (encoded or dropped).
|
||||
// For temporal layers, update the cumulative buffer level.
|
||||
for (int sl = 0; sl < number_spatial_layers_; ++sl) {
|
||||
for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) {
|
||||
const int layer = sl * number_temporal_layers_ + tl;
|
||||
bits_in_buffer_model_[layer] +=
|
||||
static_cast<int64_t>(layer_target_avg_bandwidth_[layer]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz,
|
||||
uint32_t sizes[8], int *count) {
|
||||
uint8_t marker;
|
||||
marker = *(data + data_sz - 1);
|
||||
*count = 0;
|
||||
if ((marker & 0xe0) == 0xc0) {
|
||||
const uint32_t frames = (marker & 0x7) + 1;
|
||||
const uint32_t mag = ((marker >> 3) & 0x3) + 1;
|
||||
const size_t index_sz = 2 + mag * frames;
|
||||
// This chunk is marked as having a superframe index but doesn't have
|
||||
// enough data for it, thus it's an invalid superframe index.
|
||||
if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME;
|
||||
{
|
||||
const uint8_t marker2 = *(data + data_sz - index_sz);
|
||||
// This chunk is marked as having a superframe index but doesn't have
|
||||
// the matching marker byte at the front of the index therefore it's an
|
||||
// invalid chunk.
|
||||
if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME;
|
||||
}
|
||||
{
|
||||
uint32_t i, j;
|
||||
const uint8_t *x = &data[data_sz - index_sz + 1];
|
||||
for (i = 0; i < frames; ++i) {
|
||||
uint32_t this_sz = 0;
|
||||
|
||||
for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8);
|
||||
sizes[i] = this_sz;
|
||||
}
|
||||
*count = frames;
|
||||
}
|
||||
}
|
||||
return VPX_CODEC_OK;
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
|
||||
if (last_pts_ == 0) duration = 1;
|
||||
bits_in_buffer_model_ += static_cast<int64_t>(
|
||||
duration * timebase_ * cfg_.rc_target_bitrate * 1000);
|
||||
uint32_t sizes[8] = { 0 };
|
||||
int count = 0;
|
||||
last_pts_ = pkt->data.frame.pts;
|
||||
const bool key_frame =
|
||||
(pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
|
||||
if (!key_frame) {
|
||||
// TODO(marpan): This check currently fails for some of the SVC tests,
|
||||
// re-enable when issue (webm:1350) is resolved.
|
||||
// ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
|
||||
// << pkt->data.frame.pts;
|
||||
parse_superframe_index(static_cast<const uint8_t *>(pkt->data.frame.buf),
|
||||
pkt->data.frame.sz, sizes, &count);
|
||||
if (!dynamic_drop_layer_) ASSERT_EQ(count, number_spatial_layers_);
|
||||
for (int sl = 0; sl < number_spatial_layers_; ++sl) {
|
||||
sizes[sl] = sizes[sl] << 3;
|
||||
// Update the total encoded bits per layer.
|
||||
// For temporal layers, update the cumulative encoded bits per layer.
|
||||
for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) {
|
||||
const int layer = sl * number_temporal_layers_ + tl;
|
||||
bits_total_[layer] += static_cast<int64_t>(sizes[sl]);
|
||||
// Update the per-layer buffer level with the encoded frame size.
|
||||
bits_in_buffer_model_[layer] -= static_cast<int64_t>(sizes[sl]);
|
||||
// There should be no buffer underrun, except on the base
|
||||
// temporal layer, since there may be key frames there.
|
||||
if (!key_frame && tl > 0) {
|
||||
ASSERT_GE(bits_in_buffer_model_[layer], 0)
|
||||
<< "Buffer Underrun at frame " << pkt->data.frame.pts;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(pkt->data.frame.width[sl],
|
||||
top_sl_width_ * svc_params_.scaling_factor_num[sl] /
|
||||
svc_params_.scaling_factor_den[sl]);
|
||||
|
||||
ASSERT_EQ(pkt->data.frame.height[sl],
|
||||
top_sl_height_ * svc_params_.scaling_factor_num[sl] /
|
||||
svc_params_.scaling_factor_den[sl]);
|
||||
}
|
||||
const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
|
||||
bits_in_buffer_model_ -= static_cast<int64_t>(frame_size_in_bits);
|
||||
bits_total_ += frame_size_in_bits;
|
||||
if (!first_drop_ && duration > 1) first_drop_ = last_pts_ + 1;
|
||||
last_pts_ = pkt->data.frame.pts;
|
||||
bits_in_last_frame_ = frame_size_in_bits;
|
||||
++frame_number_;
|
||||
}
|
||||
|
||||
virtual void EndPassHook(void) {
|
||||
if (bits_total_) {
|
||||
const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit
|
||||
duration_ = (last_pts_ + 1) * timebase_;
|
||||
file_datarate_ = file_size_in_kb / duration_;
|
||||
for (int sl = 0; sl < number_spatial_layers_; ++sl) {
|
||||
for (int tl = 0; tl < number_temporal_layers_; ++tl) {
|
||||
const int layer = sl * number_temporal_layers_ + tl;
|
||||
const double file_size_in_kb = bits_total_[layer] / 1000.;
|
||||
duration_ = (last_pts_ + 1) * timebase_;
|
||||
file_datarate_[layer] = file_size_in_kb / duration_;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1294,13 +1524,11 @@ class DatarateOnePassCbrSvc
|
||||
unsigned int GetMismatchFrames() { return mismatch_nframes_; }
|
||||
|
||||
vpx_codec_pts_t last_pts_;
|
||||
int64_t bits_in_buffer_model_;
|
||||
int64_t bits_in_buffer_model_[VPX_MAX_LAYERS];
|
||||
double timebase_;
|
||||
int frame_number_;
|
||||
vpx_codec_pts_t first_drop_;
|
||||
int64_t bits_total_;
|
||||
int64_t bits_total_[VPX_MAX_LAYERS];
|
||||
double duration_;
|
||||
double file_datarate_;
|
||||
double file_datarate_[VPX_MAX_LAYERS];
|
||||
size_t bits_in_last_frame_;
|
||||
vpx_svc_extra_cfg_t svc_params_;
|
||||
int speed_setting_;
|
||||
@@ -1309,14 +1537,27 @@ class DatarateOnePassCbrSvc
|
||||
int denoiser_on_;
|
||||
int tune_content_;
|
||||
int base_speed_setting_;
|
||||
int spatial_layer_id_;
|
||||
int temporal_layer_id_;
|
||||
int number_spatial_layers_;
|
||||
int number_temporal_layers_;
|
||||
int layer_target_avg_bandwidth_[VPX_MAX_LAYERS];
|
||||
bool dynamic_drop_layer_;
|
||||
unsigned int top_sl_width_;
|
||||
unsigned int top_sl_height_;
|
||||
vpx_svc_ref_frame_config_t ref_frame_config;
|
||||
int update_pattern_;
|
||||
};
|
||||
static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
|
||||
const vpx_svc_extra_cfg_t *svc_params,
|
||||
int spatial_layers, int temporal_layers,
|
||||
int temporal_layering_mode) {
|
||||
int temporal_layering_mode,
|
||||
int *layer_target_avg_bandwidth,
|
||||
int64_t *bits_in_buffer_model) {
|
||||
int sl, spatial_layer_target;
|
||||
float total = 0;
|
||||
float alloc_ratio[VPX_MAX_LAYERS] = { 0 };
|
||||
float framerate = 30.0;
|
||||
for (sl = 0; sl < spatial_layers; ++sl) {
|
||||
if (svc_params->scaling_factor_den[sl] > 0) {
|
||||
alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] * 1.0 /
|
||||
@@ -1336,8 +1577,41 @@ static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
|
||||
} else if (temporal_layering_mode == 2) {
|
||||
enc_cfg->layer_target_bitrate[index] = spatial_layer_target * 2 / 3;
|
||||
enc_cfg->layer_target_bitrate[index + 1] = spatial_layer_target;
|
||||
} else if (temporal_layering_mode <= 1) {
|
||||
enc_cfg->layer_target_bitrate[index] = spatial_layer_target;
|
||||
}
|
||||
}
|
||||
for (sl = 0; sl < spatial_layers; ++sl) {
|
||||
for (int tl = 0; tl < temporal_layers; ++tl) {
|
||||
const int layer = sl * temporal_layers + tl;
|
||||
float layer_framerate = framerate;
|
||||
if (temporal_layers == 2 && tl == 0) layer_framerate = framerate / 2;
|
||||
if (temporal_layers == 3 && tl == 0) layer_framerate = framerate / 4;
|
||||
if (temporal_layers == 3 && tl == 1) layer_framerate = framerate / 2;
|
||||
layer_target_avg_bandwidth[layer] = static_cast<int>(
|
||||
enc_cfg->layer_target_bitrate[layer] * 1000.0 / layer_framerate);
|
||||
bits_in_buffer_model[layer] =
|
||||
enc_cfg->layer_target_bitrate[layer] * enc_cfg->rc_buf_initial_sz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void CheckLayerRateTargeting(vpx_codec_enc_cfg_t *const cfg,
|
||||
int number_spatial_layers,
|
||||
int number_temporal_layers,
|
||||
double *file_datarate,
|
||||
double thresh_overshoot,
|
||||
double thresh_undershoot) {
|
||||
for (int sl = 0; sl < number_spatial_layers; ++sl)
|
||||
for (int tl = 0; tl < number_temporal_layers; ++tl) {
|
||||
const int layer = sl * number_temporal_layers + tl;
|
||||
ASSERT_GE(cfg->layer_target_bitrate[layer],
|
||||
file_datarate[layer] * thresh_overshoot)
|
||||
<< " The datarate for the file exceeds the target by too much!";
|
||||
ASSERT_LE(cfg->layer_target_bitrate[layer],
|
||||
file_datarate[layer] * thresh_undershoot)
|
||||
<< " The datarate for the file is lower than the target by too much!";
|
||||
}
|
||||
}
|
||||
|
||||
// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1
|
||||
@@ -1363,14 +1637,21 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TLScreenContent1) {
|
||||
svc_params_.scaling_factor_den[1] = 288;
|
||||
cfg_.rc_dropframe_thresh = 10;
|
||||
cfg_.kf_max_dist = 9999;
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
|
||||
number_spatial_layers_ = cfg_.ss_number_layers;
|
||||
number_temporal_layers_ = cfg_.ts_number_layers;
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
|
||||
top_sl_width_ = 1280;
|
||||
top_sl_height_ = 720;
|
||||
cfg_.rc_target_bitrate = 500;
|
||||
ResetModel();
|
||||
tune_content_ = 1;
|
||||
base_speed_setting_ = speed_setting_;
|
||||
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
|
||||
layer_target_avg_bandwidth_, bits_in_buffer_model_);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
|
||||
number_temporal_layers_, file_datarate_, 0.78, 1.15);
|
||||
EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
|
||||
}
|
||||
|
||||
@@ -1398,26 +1679,30 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL) {
|
||||
svc_params_.scaling_factor_den[1] = 288;
|
||||
cfg_.rc_dropframe_thresh = 0;
|
||||
cfg_.kf_max_dist = 9999;
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, 200);
|
||||
number_spatial_layers_ = cfg_.ss_number_layers;
|
||||
number_temporal_layers_ = cfg_.ts_number_layers;
|
||||
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
|
||||
0, 400);
|
||||
top_sl_width_ = 640;
|
||||
top_sl_height_ = 480;
|
||||
// TODO(marpan): Check that effective_datarate for each layer hits the
|
||||
// layer target_bitrate.
|
||||
for (int i = 200; i <= 800; i += 200) {
|
||||
cfg_.rc_target_bitrate = i;
|
||||
ResetModel();
|
||||
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
|
||||
layer_target_avg_bandwidth_, bits_in_buffer_model_);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
|
||||
<< " The datarate for the file exceeds the target by too much!";
|
||||
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
|
||||
<< " The datarate for the file is lower than the target by too much!";
|
||||
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
|
||||
number_temporal_layers_, file_datarate_, 0.78,
|
||||
1.15);
|
||||
#if CONFIG_VP9_DECODER
|
||||
// Number of temporal layers > 1, so half of the frames in this SVC pattern
|
||||
// will be non-reference frame and hence encoder will avoid loopfilter.
|
||||
// Since frame dropper is off, we can expcet 100 (half of the sequence)
|
||||
// Since frame dropper is off, we can expect 200 (half of the sequence)
|
||||
// mismatched frames.
|
||||
EXPECT_EQ(static_cast<unsigned int>(100), GetMismatchFrames());
|
||||
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -1446,33 +1731,43 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLDenoiserOn) {
|
||||
svc_params_.scaling_factor_den[1] = 288;
|
||||
cfg_.rc_dropframe_thresh = 0;
|
||||
cfg_.kf_max_dist = 9999;
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
|
||||
number_spatial_layers_ = cfg_.ss_number_layers;
|
||||
number_temporal_layers_ = cfg_.ts_number_layers;
|
||||
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
|
||||
0, 400);
|
||||
top_sl_width_ = 640;
|
||||
top_sl_height_ = 480;
|
||||
// TODO(marpan): Check that effective_datarate for each layer hits the
|
||||
// layer target_bitrate.
|
||||
for (int i = 600; i <= 1000; i += 200) {
|
||||
cfg_.rc_target_bitrate = i;
|
||||
ResetModel();
|
||||
denoiser_on_ = 1;
|
||||
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
|
||||
<< " The datarate for the file exceeds the target by too much!";
|
||||
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
|
||||
<< " The datarate for the file is lower than the target by too much!";
|
||||
// For SVC, noise_sen = 1 means denoising only the top spatial layer
|
||||
// noise_sen = 2 means denoising the two top spatial layers.
|
||||
for (int noise_sen = 1; noise_sen <= 2; noise_sen++) {
|
||||
for (int i = 600; i <= 1000; i += 200) {
|
||||
cfg_.rc_target_bitrate = i;
|
||||
ResetModel();
|
||||
denoiser_on_ = noise_sen;
|
||||
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
|
||||
layer_target_avg_bandwidth_, bits_in_buffer_model_);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
|
||||
number_temporal_layers_, file_datarate_, 0.78,
|
||||
1.15);
|
||||
#if CONFIG_VP9_DECODER
|
||||
// Number of temporal layers > 1, so half of the frames in this SVC pattern
|
||||
// will be non-reference frame and hence encoder will avoid loopfilter.
|
||||
// Since frame dropper is off, we can expcet 150 (half of the sequence)
|
||||
// mismatched frames.
|
||||
EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
|
||||
// Number of temporal layers > 1, so half of the frames in this SVC
|
||||
// pattern
|
||||
// will be non-reference frame and hence encoder will avoid loopfilter.
|
||||
// Since frame dropper is off, we can expect 200 (half of the sequence)
|
||||
// mismatched frames.
|
||||
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 3
|
||||
// temporal layers. Run CIF clip with 1 thread, and few short key frame periods.
|
||||
TEST_P(DatarateOnePassCbrSvc, DISABLED_OnePassCbrSvc2SL3TLSmallKf) {
|
||||
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLSmallKf) {
|
||||
cfg_.rc_buf_initial_sz = 500;
|
||||
cfg_.rc_buf_optimal_sz = 500;
|
||||
cfg_.rc_buf_sz = 1000;
|
||||
@@ -1493,21 +1788,25 @@ TEST_P(DatarateOnePassCbrSvc, DISABLED_OnePassCbrSvc2SL3TLSmallKf) {
|
||||
svc_params_.scaling_factor_num[1] = 288;
|
||||
svc_params_.scaling_factor_den[1] = 288;
|
||||
cfg_.rc_dropframe_thresh = 10;
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, 200);
|
||||
cfg_.rc_target_bitrate = 400;
|
||||
number_spatial_layers_ = cfg_.ss_number_layers;
|
||||
number_temporal_layers_ = cfg_.ts_number_layers;
|
||||
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
|
||||
0, 400);
|
||||
top_sl_width_ = 640;
|
||||
top_sl_height_ = 480;
|
||||
// For this 3 temporal layer case, pattern repeats every 4 frames, so choose
|
||||
// 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
|
||||
for (int j = 64; j <= 67; j++) {
|
||||
cfg_.kf_max_dist = j;
|
||||
ResetModel();
|
||||
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
|
||||
layer_target_avg_bandwidth_, bits_in_buffer_model_);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.80)
|
||||
<< " The datarate for the file exceeds the target by too much!";
|
||||
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
|
||||
<< " The datarate for the file is lower than the target by too much!";
|
||||
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
|
||||
number_temporal_layers_, file_datarate_, 0.78,
|
||||
1.15);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1535,22 +1834,25 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL4Threads) {
|
||||
svc_params_.scaling_factor_den[1] = 288;
|
||||
cfg_.rc_dropframe_thresh = 0;
|
||||
cfg_.kf_max_dist = 9999;
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
|
||||
number_spatial_layers_ = cfg_.ss_number_layers;
|
||||
number_temporal_layers_ = cfg_.ts_number_layers;
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
|
||||
top_sl_width_ = 1280;
|
||||
top_sl_height_ = 720;
|
||||
cfg_.rc_target_bitrate = 800;
|
||||
ResetModel();
|
||||
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
|
||||
layer_target_avg_bandwidth_, bits_in_buffer_model_);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
|
||||
<< " The datarate for the file exceeds the target by too much!";
|
||||
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
|
||||
<< " The datarate for the file is lower than the target by too much!";
|
||||
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
|
||||
number_temporal_layers_, file_datarate_, 0.78, 1.15);
|
||||
#if CONFIG_VP9_DECODER
|
||||
// Number of temporal layers > 1, so half of the frames in this SVC pattern
|
||||
// will be non-reference frame and hence encoder will avoid loopfilter.
|
||||
// Since frame dropper is off, we can expcet 150 (half of the sequence)
|
||||
// Since frame dropper is off, we can expect 30 (half of the sequence)
|
||||
// mismatched frames.
|
||||
EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
|
||||
EXPECT_EQ(static_cast<unsigned int>(30), GetMismatchFrames());
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1580,25 +1882,126 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL) {
|
||||
svc_params_.scaling_factor_den[2] = 288;
|
||||
cfg_.rc_dropframe_thresh = 0;
|
||||
cfg_.kf_max_dist = 9999;
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
|
||||
number_spatial_layers_ = cfg_.ss_number_layers;
|
||||
number_temporal_layers_ = cfg_.ts_number_layers;
|
||||
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
|
||||
0, 400);
|
||||
top_sl_width_ = 640;
|
||||
top_sl_height_ = 480;
|
||||
cfg_.rc_target_bitrate = 800;
|
||||
ResetModel();
|
||||
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
|
||||
layer_target_avg_bandwidth_, bits_in_buffer_model_);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
|
||||
<< " The datarate for the file exceeds the target by too much!";
|
||||
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22)
|
||||
<< " The datarate for the file is lower than the target by too much!";
|
||||
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
|
||||
number_temporal_layers_, file_datarate_, 0.78, 1.15);
|
||||
#if CONFIG_VP9_DECODER
|
||||
// Number of temporal layers > 1, so half of the frames in this SVC pattern
|
||||
// will be non-reference frame and hence encoder will avoid loopfilter.
|
||||
// Since frame dropper is off, we can expcet 150 (half of the sequence)
|
||||
// Since frame dropper is off, we can expect 200 (half of the sequence)
|
||||
// mismatched frames.
|
||||
EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
|
||||
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
|
||||
#endif
|
||||
}
|
||||
|
||||
// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
|
||||
// 2 temporal layers, with a change on the fly from the fixed SVC pattern to one
|
||||
// generate via SVC_SET_REF_FRAME_CONFIG. The new pattern also disables
|
||||
// inter-layer prediction.
|
||||
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL2TLDynamicPatternChange) {
|
||||
cfg_.rc_buf_initial_sz = 500;
|
||||
cfg_.rc_buf_optimal_sz = 500;
|
||||
cfg_.rc_buf_sz = 1000;
|
||||
cfg_.rc_min_quantizer = 0;
|
||||
cfg_.rc_max_quantizer = 63;
|
||||
cfg_.rc_end_usage = VPX_CBR;
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
cfg_.ss_number_layers = 3;
|
||||
cfg_.ts_number_layers = 2;
|
||||
cfg_.ts_rate_decimator[0] = 2;
|
||||
cfg_.ts_rate_decimator[1] = 1;
|
||||
cfg_.g_error_resilient = 1;
|
||||
cfg_.g_threads = 1;
|
||||
cfg_.temporal_layering_mode = 2;
|
||||
svc_params_.scaling_factor_num[0] = 72;
|
||||
svc_params_.scaling_factor_den[0] = 288;
|
||||
svc_params_.scaling_factor_num[1] = 144;
|
||||
svc_params_.scaling_factor_den[1] = 288;
|
||||
svc_params_.scaling_factor_num[2] = 288;
|
||||
svc_params_.scaling_factor_den[2] = 288;
|
||||
cfg_.rc_dropframe_thresh = 0;
|
||||
cfg_.kf_max_dist = 9999;
|
||||
number_spatial_layers_ = cfg_.ss_number_layers;
|
||||
number_temporal_layers_ = cfg_.ts_number_layers;
|
||||
// Change SVC pattern on the fly.
|
||||
update_pattern_ = 1;
|
||||
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
|
||||
0, 400);
|
||||
top_sl_width_ = 640;
|
||||
top_sl_height_ = 480;
|
||||
cfg_.rc_target_bitrate = 800;
|
||||
ResetModel();
|
||||
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
|
||||
layer_target_avg_bandwidth_, bits_in_buffer_model_);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
|
||||
number_temporal_layers_, file_datarate_, 0.78, 1.15);
|
||||
#if CONFIG_VP9_DECODER
|
||||
// Number of temporal layers > 1, so half of the frames in this SVC pattern
|
||||
// will be non-reference frame and hence encoder will avoid loopfilter.
|
||||
// Since frame dropper is off, we can expect 200 (half of the sequence)
|
||||
// mismatched frames.
|
||||
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
|
||||
#endif
|
||||
}
|
||||
|
||||
// Check basic rate targeting for 1 pass CBR SVC with 3 spatial layers and on
|
||||
// the fly switching to 2 spatial layers and then back to 3. This switch is done
|
||||
// by setting top spatial layer bitrate to 0, and then back to non-zero, during
|
||||
// the sequence.
|
||||
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL_to_2SL_dynamic) {
|
||||
cfg_.rc_buf_initial_sz = 500;
|
||||
cfg_.rc_buf_optimal_sz = 500;
|
||||
cfg_.rc_buf_sz = 1000;
|
||||
cfg_.rc_min_quantizer = 0;
|
||||
cfg_.rc_max_quantizer = 63;
|
||||
cfg_.rc_end_usage = VPX_CBR;
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
cfg_.ss_number_layers = 3;
|
||||
cfg_.ts_number_layers = 1;
|
||||
cfg_.ts_rate_decimator[0] = 1;
|
||||
cfg_.g_error_resilient = 1;
|
||||
cfg_.g_threads = 1;
|
||||
cfg_.temporal_layering_mode = 0;
|
||||
svc_params_.scaling_factor_num[0] = 72;
|
||||
svc_params_.scaling_factor_den[0] = 288;
|
||||
svc_params_.scaling_factor_num[1] = 144;
|
||||
svc_params_.scaling_factor_den[1] = 288;
|
||||
svc_params_.scaling_factor_num[2] = 288;
|
||||
svc_params_.scaling_factor_den[2] = 288;
|
||||
cfg_.rc_dropframe_thresh = 0;
|
||||
cfg_.kf_max_dist = 9999;
|
||||
number_spatial_layers_ = cfg_.ss_number_layers;
|
||||
number_temporal_layers_ = cfg_.ts_number_layers;
|
||||
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
|
||||
0, 400);
|
||||
top_sl_width_ = 640;
|
||||
top_sl_height_ = 480;
|
||||
cfg_.rc_target_bitrate = 800;
|
||||
ResetModel();
|
||||
dynamic_drop_layer_ = true;
|
||||
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
|
||||
layer_target_avg_bandwidth_, bits_in_buffer_model_);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
// Don't check rate targeting on top spatial layer since it will be skipped
|
||||
// for part of the sequence.
|
||||
CheckLayerRateTargeting(&cfg_, number_spatial_layers_ - 1,
|
||||
number_temporal_layers_, file_datarate_, 0.78, 1.15);
|
||||
}
|
||||
|
||||
// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3
|
||||
// temporal layers. Run CIF clip with 1 thread, and few short key frame periods.
|
||||
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) {
|
||||
@@ -1624,20 +2027,25 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) {
|
||||
svc_params_.scaling_factor_num[2] = 288;
|
||||
svc_params_.scaling_factor_den[2] = 288;
|
||||
cfg_.rc_dropframe_thresh = 10;
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
|
||||
cfg_.rc_target_bitrate = 800;
|
||||
number_spatial_layers_ = cfg_.ss_number_layers;
|
||||
number_temporal_layers_ = cfg_.ts_number_layers;
|
||||
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
|
||||
0, 400);
|
||||
top_sl_width_ = 640;
|
||||
top_sl_height_ = 480;
|
||||
// For this 3 temporal layer case, pattern repeats every 4 frames, so choose
|
||||
// 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
|
||||
for (int j = 32; j <= 35; j++) {
|
||||
cfg_.kf_max_dist = j;
|
||||
ResetModel();
|
||||
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
|
||||
layer_target_avg_bandwidth_, bits_in_buffer_model_);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.80)
|
||||
<< " The datarate for the file exceeds the target by too much!";
|
||||
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.30)
|
||||
<< " The datarate for the file is lower than the target by too much!";
|
||||
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
|
||||
number_temporal_layers_, file_datarate_, 0.78,
|
||||
1.15);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1667,22 +2075,25 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4threads) {
|
||||
svc_params_.scaling_factor_den[2] = 288;
|
||||
cfg_.rc_dropframe_thresh = 0;
|
||||
cfg_.kf_max_dist = 9999;
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
|
||||
number_spatial_layers_ = cfg_.ss_number_layers;
|
||||
number_temporal_layers_ = cfg_.ts_number_layers;
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
|
||||
top_sl_width_ = 1280;
|
||||
top_sl_height_ = 720;
|
||||
cfg_.rc_target_bitrate = 800;
|
||||
ResetModel();
|
||||
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
|
||||
layer_target_avg_bandwidth_, bits_in_buffer_model_);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
|
||||
<< " The datarate for the file exceeds the target by too much!";
|
||||
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22)
|
||||
<< " The datarate for the file is lower than the target by too much!";
|
||||
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
|
||||
number_temporal_layers_, file_datarate_, 0.78, 1.15);
|
||||
#if CONFIG_VP9_DECODER
|
||||
// Number of temporal layers > 1, so half of the frames in this SVC pattern
|
||||
// will be non-reference frame and hence encoder will avoid loopfilter.
|
||||
// Since frame dropper is off, we can expcet 150 (half of the sequence)
|
||||
// Since frame dropper is off, we can expect 30 (half of the sequence)
|
||||
// mismatched frames.
|
||||
EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
|
||||
EXPECT_EQ(static_cast<unsigned int>(30), GetMismatchFrames());
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1714,9 +2125,21 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TL5x5MultipleRuns) {
|
||||
cfg_.layer_target_bitrate[0] = 300;
|
||||
cfg_.layer_target_bitrate[1] = 1400;
|
||||
cfg_.rc_target_bitrate = 1700;
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
|
||||
number_spatial_layers_ = cfg_.ss_number_layers;
|
||||
number_temporal_layers_ = cfg_.ts_number_layers;
|
||||
ResetModel();
|
||||
layer_target_avg_bandwidth_[0] = cfg_.layer_target_bitrate[0] * 1000 / 30;
|
||||
bits_in_buffer_model_[0] =
|
||||
cfg_.layer_target_bitrate[0] * cfg_.rc_buf_initial_sz;
|
||||
layer_target_avg_bandwidth_[1] = cfg_.layer_target_bitrate[1] * 1000 / 30;
|
||||
bits_in_buffer_model_[1] =
|
||||
cfg_.layer_target_bitrate[1] * cfg_.rc_buf_initial_sz;
|
||||
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
|
||||
top_sl_width_ = 1280;
|
||||
top_sl_height_ = 720;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
|
||||
number_temporal_layers_, file_datarate_, 0.78, 1.15);
|
||||
EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
|
||||
}
|
||||
|
||||
@@ -1729,6 +2152,9 @@ VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
|
||||
::testing::Values(::libvpx_test::kOnePassGood,
|
||||
::libvpx_test::kRealTime),
|
||||
::testing::Range(2, 9));
|
||||
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime,
|
||||
::testing::Values(::libvpx_test::kRealTime),
|
||||
::testing::Range(5, 9));
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser,
|
||||
::testing::Values(::libvpx_test::kRealTime),
|
||||
|
@@ -28,8 +28,8 @@
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
using libvpx_test::Buffer;
|
||||
using std::tr1::tuple;
|
||||
using std::tr1::make_tuple;
|
||||
using std::tr1::tuple;
|
||||
|
||||
namespace {
|
||||
typedef void (*PartialFdctFunc)(const int16_t *in, tran_low_t *out, int stride);
|
||||
|
1090
test/dct_test.cc
1090
test/dct_test.cc
File diff suppressed because it is too large
Load Diff
@@ -106,4 +106,90 @@ TEST(EncodeAPI, ImageSizeSetting) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// Set up 2 spatial streams with 2 temporal layers per stream, and generate
|
||||
// invalid configuration by setting the temporal layer rate allocation
|
||||
// (ts_target_bitrate[]) to 0 for both layers. This should fail independent of
|
||||
// CONFIG_MULTI_RES_ENCODING.
|
||||
TEST(EncodeAPI, MultiResEncode) {
|
||||
static const vpx_codec_iface_t *kCodecs[] = {
|
||||
#if CONFIG_VP8_ENCODER
|
||||
&vpx_codec_vp8_cx_algo,
|
||||
#endif
|
||||
#if CONFIG_VP9_ENCODER
|
||||
&vpx_codec_vp9_cx_algo,
|
||||
#endif
|
||||
};
|
||||
const int width = 1280;
|
||||
const int height = 720;
|
||||
const int width_down = width / 2;
|
||||
const int height_down = height / 2;
|
||||
const int target_bitrate = 1000;
|
||||
const int framerate = 30;
|
||||
|
||||
for (int c = 0; c < NELEMENTS(kCodecs); ++c) {
|
||||
const vpx_codec_iface_t *const iface = kCodecs[c];
|
||||
vpx_codec_ctx_t enc[2];
|
||||
vpx_codec_enc_cfg_t cfg[2];
|
||||
vpx_rational_t dsf[2] = { { 2, 1 }, { 2, 1 } };
|
||||
|
||||
memset(enc, 0, sizeof(enc));
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
vpx_codec_enc_config_default(iface, &cfg[i], 0);
|
||||
}
|
||||
|
||||
/* Highest-resolution encoder settings */
|
||||
cfg[0].g_w = width;
|
||||
cfg[0].g_h = height;
|
||||
cfg[0].rc_dropframe_thresh = 0;
|
||||
cfg[0].rc_end_usage = VPX_CBR;
|
||||
cfg[0].rc_resize_allowed = 0;
|
||||
cfg[0].rc_min_quantizer = 2;
|
||||
cfg[0].rc_max_quantizer = 56;
|
||||
cfg[0].rc_undershoot_pct = 100;
|
||||
cfg[0].rc_overshoot_pct = 15;
|
||||
cfg[0].rc_buf_initial_sz = 500;
|
||||
cfg[0].rc_buf_optimal_sz = 600;
|
||||
cfg[0].rc_buf_sz = 1000;
|
||||
cfg[0].g_error_resilient = 1; /* Enable error resilient mode */
|
||||
cfg[0].g_lag_in_frames = 0;
|
||||
|
||||
cfg[0].kf_mode = VPX_KF_AUTO;
|
||||
cfg[0].kf_min_dist = 3000;
|
||||
cfg[0].kf_max_dist = 3000;
|
||||
|
||||
cfg[0].rc_target_bitrate = target_bitrate; /* Set target bitrate */
|
||||
cfg[0].g_timebase.num = 1; /* Set fps */
|
||||
cfg[0].g_timebase.den = framerate;
|
||||
|
||||
memcpy(&cfg[1], &cfg[0], sizeof(cfg[0]));
|
||||
cfg[1].rc_target_bitrate = 500;
|
||||
cfg[1].g_w = width_down;
|
||||
cfg[1].g_h = height_down;
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
cfg[i].ts_number_layers = 2;
|
||||
cfg[i].ts_periodicity = 2;
|
||||
cfg[i].ts_rate_decimator[0] = 2;
|
||||
cfg[i].ts_rate_decimator[1] = 1;
|
||||
cfg[i].ts_layer_id[0] = 0;
|
||||
cfg[i].ts_layer_id[1] = 1;
|
||||
// Invalid parameters.
|
||||
cfg[i].ts_target_bitrate[0] = 0;
|
||||
cfg[i].ts_target_bitrate[1] = 0;
|
||||
}
|
||||
|
||||
// VP9 should report incapable, VP8 invalid for all configurations.
|
||||
const char kVP9Name[] = "WebM Project VP9";
|
||||
const bool is_vp9 = strncmp(kVP9Name, vpx_codec_iface_name(iface),
|
||||
sizeof(kVP9Name) - 1) == 0;
|
||||
EXPECT_EQ(is_vp9 ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM,
|
||||
vpx_codec_enc_init_multi(&enc[0], iface, &cfg[0], 2, 0, &dsf[0]));
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
vpx_codec_destroy(&enc[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@@ -201,6 +201,8 @@ void EncoderTest::RunLoop(VideoSource *video) {
|
||||
PreEncodeFrameHook(video, encoder.get());
|
||||
encoder->EncodeFrame(video, frame_flags_);
|
||||
|
||||
PostEncodeFrameHook(encoder.get());
|
||||
|
||||
CxDataIterator iter = encoder->GetCxData();
|
||||
|
||||
bool has_cxdata = false;
|
||||
|
@@ -128,6 +128,11 @@ class Encoder {
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
}
|
||||
|
||||
void Control(int ctrl_id, struct vpx_svc_ref_frame_config *arg) {
|
||||
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
}
|
||||
|
||||
void Control(int ctrl_id, struct vpx_svc_parameters *arg) {
|
||||
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
@@ -137,15 +142,12 @@ class Encoder {
|
||||
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP8_ENCODER
|
||||
void Control(int ctrl_id, vpx_roi_map_t *arg) {
|
||||
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
}
|
||||
#endif
|
||||
|
||||
void Config(const vpx_codec_enc_cfg_t *cfg) {
|
||||
const vpx_codec_err_t res = vpx_codec_enc_config_set(&encoder_, cfg);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
@@ -219,6 +221,8 @@ class EncoderTest {
|
||||
virtual void PreEncodeFrameHook(VideoSource * /*video*/,
|
||||
Encoder * /*encoder*/) {}
|
||||
|
||||
virtual void PostEncodeFrameHook(Encoder * /*encoder*/) {}
|
||||
|
||||
// Hook to be called on every compressed data packet.
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}
|
||||
|
||||
|
@@ -675,7 +675,9 @@ INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
|
||||
::testing::Values(make_tuple(&vpx_fdct8x8_neon,
|
||||
&vpx_idct8x8_64_add_neon,
|
||||
0, VPX_BITS_8)));
|
||||
#if !CONFIG_VP9_HIGHBITDEPTH
|
||||
// TODO(linfengz): reenable these functions once test vector failures are
|
||||
// addressed.
|
||||
#if 0 // !CONFIG_VP9_HIGHBITDEPTH
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, FwdTrans8x8HT,
|
||||
::testing::Values(
|
||||
|
@@ -174,4 +174,4 @@ INSTANTIATE_TEST_CASE_P(MSA, IDCTTest,
|
||||
INSTANTIATE_TEST_CASE_P(MMI, IDCTTest,
|
||||
::testing::Values(vp8_short_idct4x4llm_mmi));
|
||||
#endif // HAVE_MMI
|
||||
}
|
||||
} // namespace
|
||||
|
@@ -123,6 +123,7 @@ TEST_P(InvalidFileTest, ReturnCode) { RunTest(); }
|
||||
#if CONFIG_VP8_DECODER
|
||||
const DecodeParam kVP8InvalidFileTests[] = {
|
||||
{ 1, "invalid-bug-1443.ivf" },
|
||||
{ 1, "invalid-token-partition.ivf" },
|
||||
};
|
||||
|
||||
VP8_INSTANTIATE_TEST_CASE(InvalidFileTest,
|
||||
|
@@ -114,6 +114,18 @@ void InitInput(Pixel *s, Pixel *ref_s, ACMRandom *rnd, const uint8_t limit,
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t GetOuterThresh(ACMRandom *rnd) {
|
||||
return static_cast<uint8_t>(rnd->RandRange(3 * MAX_LOOP_FILTER + 5));
|
||||
}
|
||||
|
||||
uint8_t GetInnerThresh(ACMRandom *rnd) {
|
||||
return static_cast<uint8_t>(rnd->RandRange(MAX_LOOP_FILTER + 1));
|
||||
}
|
||||
|
||||
uint8_t GetHevThresh(ACMRandom *rnd) {
|
||||
return static_cast<uint8_t>(rnd->RandRange(MAX_LOOP_FILTER + 1) >> 4);
|
||||
}
|
||||
|
||||
class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
|
||||
public:
|
||||
virtual ~Loop8Test6Param() {}
|
||||
@@ -162,15 +174,15 @@ TEST_P(Loop8Test6Param, OperationCheck) {
|
||||
int first_failure = -1;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int err_count = 0;
|
||||
uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
|
||||
uint8_t tmp = GetOuterThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
|
||||
tmp = GetInnerThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = rnd.Rand8();
|
||||
tmp = GetHevThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
@@ -221,15 +233,15 @@ TEST_P(Loop8Test6Param, ValueCheck) {
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int err_count = 0;
|
||||
uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
|
||||
uint8_t tmp = GetOuterThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
|
||||
tmp = GetInnerThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = rnd.Rand8();
|
||||
tmp = GetHevThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
@@ -271,27 +283,27 @@ TEST_P(Loop8Test9Param, OperationCheck) {
|
||||
int first_failure = -1;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int err_count = 0;
|
||||
uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
|
||||
uint8_t tmp = GetOuterThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
|
||||
tmp = GetInnerThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = rnd.Rand8();
|
||||
tmp = GetHevThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
|
||||
tmp = GetOuterThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
|
||||
tmp = GetInnerThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = rnd.Rand8();
|
||||
tmp = GetHevThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
@@ -334,27 +346,27 @@ TEST_P(Loop8Test9Param, ValueCheck) {
|
||||
int first_failure = -1;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int err_count = 0;
|
||||
uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
|
||||
uint8_t tmp = GetOuterThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
|
||||
tmp = GetInnerThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = rnd.Rand8();
|
||||
tmp = GetHevThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
|
||||
tmp = GetOuterThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
|
||||
tmp = GetInnerThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
tmp = rnd.Rand8();
|
||||
tmp = GetHevThresh(&rnd);
|
||||
DECLARE_ALIGNED(16, const uint8_t,
|
||||
thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
|
||||
|
@@ -277,12 +277,29 @@ class ResizeTest
|
||||
SetMode(GET_PARAM(1));
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
ASSERT_NE(static_cast<int>(pkt->data.frame.width[0]), 0);
|
||||
ASSERT_NE(static_cast<int>(pkt->data.frame.height[0]), 0);
|
||||
encode_frame_width_.push_back(pkt->data.frame.width[0]);
|
||||
encode_frame_height_.push_back(pkt->data.frame.height[0]);
|
||||
}
|
||||
|
||||
unsigned int GetFrameWidth(size_t idx) const {
|
||||
return encode_frame_width_[idx];
|
||||
}
|
||||
|
||||
unsigned int GetFrameHeight(size_t idx) const {
|
||||
return encode_frame_height_[idx];
|
||||
}
|
||||
|
||||
virtual void DecompressedFrameHook(const vpx_image_t &img,
|
||||
vpx_codec_pts_t pts) {
|
||||
frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
|
||||
}
|
||||
|
||||
std::vector<FrameInfo> frame_info_list_;
|
||||
std::vector<unsigned int> encode_frame_width_;
|
||||
std::vector<unsigned int> encode_frame_height_;
|
||||
};
|
||||
|
||||
TEST_P(ResizeTest, TestExternalResizeWorks) {
|
||||
@@ -296,6 +313,9 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
|
||||
const unsigned int frame = static_cast<unsigned>(info->pts);
|
||||
unsigned int expected_w;
|
||||
unsigned int expected_h;
|
||||
const size_t idx = info - frame_info_list_.begin();
|
||||
ASSERT_EQ(info->w, GetFrameWidth(idx));
|
||||
ASSERT_EQ(info->h, GetFrameHeight(idx));
|
||||
ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
|
||||
&expected_h, 0);
|
||||
EXPECT_EQ(expected_w, info->w)
|
||||
@@ -464,8 +484,23 @@ class ResizeRealtimeTest
|
||||
++mismatch_nframes_;
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
ASSERT_NE(static_cast<int>(pkt->data.frame.width[0]), 0);
|
||||
ASSERT_NE(static_cast<int>(pkt->data.frame.height[0]), 0);
|
||||
encode_frame_width_.push_back(pkt->data.frame.width[0]);
|
||||
encode_frame_height_.push_back(pkt->data.frame.height[0]);
|
||||
}
|
||||
|
||||
unsigned int GetMismatchFrames() { return mismatch_nframes_; }
|
||||
|
||||
unsigned int GetFrameWidth(size_t idx) const {
|
||||
return encode_frame_width_[idx];
|
||||
}
|
||||
|
||||
unsigned int GetFrameHeight(size_t idx) const {
|
||||
return encode_frame_height_[idx];
|
||||
}
|
||||
|
||||
void DefaultConfig() {
|
||||
cfg_.rc_buf_initial_sz = 500;
|
||||
cfg_.rc_buf_optimal_sz = 600;
|
||||
@@ -493,6 +528,8 @@ class ResizeRealtimeTest
|
||||
bool change_bitrate_;
|
||||
double mismatch_psnr_;
|
||||
int mismatch_nframes_;
|
||||
std::vector<unsigned int> encode_frame_width_;
|
||||
std::vector<unsigned int> encode_frame_height_;
|
||||
};
|
||||
|
||||
TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
|
||||
@@ -582,6 +619,9 @@ TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
|
||||
int resize_count = 0;
|
||||
for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
|
||||
info != frame_info_list_.end(); ++info) {
|
||||
const size_t idx = info - frame_info_list_.begin();
|
||||
ASSERT_EQ(info->w, GetFrameWidth(idx));
|
||||
ASSERT_EQ(info->h, GetFrameHeight(idx));
|
||||
if (info->w != last_w || info->h != last_h) {
|
||||
resize_count++;
|
||||
if (resize_count == 1) {
|
||||
|
@@ -112,8 +112,9 @@ INSTANTIATE_TEST_CASE_P(
|
||||
#endif // HAVE_SSE2
|
||||
|
||||
#if HAVE_MSA
|
||||
INSTANTIATE_TEST_CASE_P(MSA, SumSquaresTest, ::testing::Values(make_tuple(
|
||||
&vpx_sum_squares_2d_i16_c,
|
||||
&vpx_sum_squares_2d_i16_msa)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, SumSquaresTest,
|
||||
::testing::Values(make_tuple(&vpx_sum_squares_2d_i16_c,
|
||||
&vpx_sum_squares_2d_i16_msa)));
|
||||
#endif // HAVE_MSA
|
||||
} // namespace
|
||||
|
@@ -734,6 +734,8 @@ endif # CONFIG_VP9_HIGHBITDEPTH
|
||||
# Invalid files for testing libvpx error checking.
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf.res
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf.res
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm
|
||||
|
@@ -852,5 +852,7 @@ e402cbbf9e550ae017a1e9f1f73931c1d18474e8 *invalid-crbug-667044.webm
|
||||
d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-crbug-667044.webm.res
|
||||
fd9df7f3f6992af1d7a9dde975c9a0d6f28c053d *invalid-bug-1443.ivf
|
||||
fd3020fa6e9ca5966206738654c97dec313b0a95 *invalid-bug-1443.ivf.res
|
||||
1a0e405606939f2febab1a21b30c37cb8f2c8cb1 *invalid-token-partition.ivf
|
||||
90a8a95e7024f015b87f5483a65036609b3d1b74 *invalid-token-partition.ivf.res
|
||||
17696cd21e875f1d6e5d418cbf89feab02c8850a *vp90-2-22-svc_1280x720_1.webm
|
||||
e2f9e1e47a791b4e939a9bdc50bf7a25b3761f77 *vp90-2-22-svc_1280x720_1.webm.md5
|
||||
|
@@ -61,7 +61,6 @@ int main(int argc, char **argv) {
|
||||
#if !CONFIG_SHARED
|
||||
// Shared library builds don't support whitebox tests
|
||||
// that exercise internal symbols.
|
||||
|
||||
#if CONFIG_VP8
|
||||
vp8_rtcd();
|
||||
#endif // CONFIG_VP8
|
||||
|
@@ -27,8 +27,8 @@
|
||||
|
||||
namespace {
|
||||
|
||||
using std::string;
|
||||
using libvpx_test::ACMRandom;
|
||||
using std::string;
|
||||
|
||||
#if CONFIG_WEBM_IO
|
||||
|
||||
|
@@ -59,7 +59,7 @@ const TestVideoParam kTestVectors[] = {
|
||||
// Encoding modes tested
|
||||
const libvpx_test::TestMode kEncodingModeVectors[] = {
|
||||
::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
|
||||
::libvpx_test::kRealTime,
|
||||
::libvpx_test::kRealTime
|
||||
};
|
||||
|
||||
// Speed settings tested
|
||||
|
@@ -22,7 +22,7 @@ namespace {
|
||||
// Encoding modes
|
||||
const libvpx_test::TestMode kEncodingModeVectors[] = {
|
||||
::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
|
||||
::libvpx_test::kRealTime,
|
||||
::libvpx_test::kRealTime
|
||||
};
|
||||
|
||||
// Encoding speeds
|
||||
|
@@ -14,9 +14,9 @@
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/buffer.h"
|
||||
#include "test/clear_system_state.h"
|
||||
@@ -42,7 +42,7 @@ typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
|
||||
uint16_t *eob, const int16_t *scan,
|
||||
const int16_t *iscan);
|
||||
typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
|
||||
int /*max_size*/>
|
||||
int /*max_size*/, bool /*is_fp*/>
|
||||
QuantizeParam;
|
||||
|
||||
// Wrapper for FP version which does not use zbin or quant_shift.
|
||||
@@ -69,11 +69,15 @@ void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
|
||||
|
||||
class VP9QuantizeBase {
|
||||
public:
|
||||
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size)
|
||||
: bit_depth_(bit_depth), max_size_(max_size) {
|
||||
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
|
||||
: bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) {
|
||||
max_value_ = (1 << bit_depth_) - 1;
|
||||
zbin_ptr_ =
|
||||
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
|
||||
round_fp_ptr_ = reinterpret_cast<int16_t *>(
|
||||
vpx_memalign(16, 8 * sizeof(*round_fp_ptr_)));
|
||||
quant_fp_ptr_ = reinterpret_cast<int16_t *>(
|
||||
vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_)));
|
||||
round_ptr_ =
|
||||
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*round_ptr_)));
|
||||
quant_ptr_ =
|
||||
@@ -86,11 +90,15 @@ class VP9QuantizeBase {
|
||||
|
||||
~VP9QuantizeBase() {
|
||||
vpx_free(zbin_ptr_);
|
||||
vpx_free(round_fp_ptr_);
|
||||
vpx_free(quant_fp_ptr_);
|
||||
vpx_free(round_ptr_);
|
||||
vpx_free(quant_ptr_);
|
||||
vpx_free(quant_shift_ptr_);
|
||||
vpx_free(dequant_ptr_);
|
||||
zbin_ptr_ = NULL;
|
||||
round_fp_ptr_ = NULL;
|
||||
quant_fp_ptr_ = NULL;
|
||||
round_ptr_ = NULL;
|
||||
quant_ptr_ = NULL;
|
||||
quant_shift_ptr_ = NULL;
|
||||
@@ -100,6 +108,8 @@ class VP9QuantizeBase {
|
||||
|
||||
protected:
|
||||
int16_t *zbin_ptr_;
|
||||
int16_t *round_fp_ptr_;
|
||||
int16_t *quant_fp_ptr_;
|
||||
int16_t *round_ptr_;
|
||||
int16_t *quant_ptr_;
|
||||
int16_t *quant_shift_ptr_;
|
||||
@@ -107,29 +117,136 @@ class VP9QuantizeBase {
|
||||
const vpx_bit_depth_t bit_depth_;
|
||||
int max_value_;
|
||||
const int max_size_;
|
||||
const bool is_fp_;
|
||||
};
|
||||
|
||||
class VP9QuantizeTest : public VP9QuantizeBase,
|
||||
public ::testing::TestWithParam<QuantizeParam> {
|
||||
public:
|
||||
VP9QuantizeTest()
|
||||
: VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3)), quantize_op_(GET_PARAM(0)),
|
||||
ref_quantize_op_(GET_PARAM(1)) {}
|
||||
: VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)),
|
||||
quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
|
||||
|
||||
protected:
|
||||
const QuantizeFunc quantize_op_;
|
||||
const QuantizeFunc ref_quantize_op_;
|
||||
};
|
||||
|
||||
// This quantizer compares the AC coefficients to the quantization step size to
|
||||
// determine if further multiplication operations are needed.
|
||||
// Based on vp9_quantize_fp_sse2().
|
||||
inline void quant_fp_nz(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
int skip_block, const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
|
||||
uint16_t *eob_ptr, const int16_t *scan,
|
||||
const int16_t *iscan, int is_32x32) {
|
||||
int i, eob = -1;
|
||||
const int thr = dequant_ptr[1] >> (1 + is_32x32);
|
||||
(void)iscan;
|
||||
(void)skip_block;
|
||||
assert(!skip_block);
|
||||
|
||||
// Quantization pass: All coefficients with index >= zero_flag are
|
||||
// skippable. Note: zero_flag can be zero.
|
||||
for (i = 0; i < n_coeffs; i += 16) {
|
||||
int y;
|
||||
int nzflag_cnt = 0;
|
||||
int abs_coeff[16];
|
||||
int coeff_sign[16];
|
||||
|
||||
// count nzflag for each row (16 tran_low_t)
|
||||
for (y = 0; y < 16; ++y) {
|
||||
const int rc = i + y;
|
||||
const int coeff = coeff_ptr[rc];
|
||||
coeff_sign[y] = (coeff >> 31);
|
||||
abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y];
|
||||
// The first 16 are skipped in the sse2 code. Do the same here to match.
|
||||
if (i >= 16 && (abs_coeff[y] <= thr)) {
|
||||
nzflag_cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
for (y = 0; y < 16; ++y) {
|
||||
const int rc = i + y;
|
||||
// If all of the AC coeffs in a row has magnitude less than the
|
||||
// quantization step_size/2, quantize to zero.
|
||||
if (nzflag_cnt < 16) {
|
||||
int tmp;
|
||||
int _round;
|
||||
|
||||
if (is_32x32) {
|
||||
_round = ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
|
||||
} else {
|
||||
_round = round_ptr[rc != 0];
|
||||
}
|
||||
tmp = clamp(abs_coeff[y] + _round, INT16_MIN, INT16_MAX);
|
||||
tmp = (tmp * quant_ptr[rc != 0]) >> (16 - is_32x32);
|
||||
qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y];
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
|
||||
|
||||
if (is_32x32) {
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
|
||||
} else {
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
|
||||
}
|
||||
} else {
|
||||
qcoeff_ptr[rc] = 0;
|
||||
dqcoeff_ptr[rc] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan for eob.
|
||||
for (i = 0; i < n_coeffs; i++) {
|
||||
// Use the scan order to find the correct eob.
|
||||
const int rc = scan[i];
|
||||
if (qcoeff_ptr[rc]) {
|
||||
eob = i;
|
||||
}
|
||||
}
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
|
||||
void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
int skip_block, const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
|
||||
uint16_t *eob_ptr, const int16_t *scan,
|
||||
const int16_t *iscan) {
|
||||
quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr,
|
||||
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 0);
|
||||
}
|
||||
|
||||
void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
int skip_block, const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
|
||||
uint16_t *eob_ptr, const int16_t *scan,
|
||||
const int16_t *iscan) {
|
||||
quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr,
|
||||
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 1);
|
||||
}
|
||||
|
||||
void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
|
||||
int16_t *quant, int16_t *quant_shift,
|
||||
int16_t *dequant) {
|
||||
int16_t *dequant, int16_t *round_fp,
|
||||
int16_t *quant_fp) {
|
||||
// Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
|
||||
const int max_qrounding_factor_fp = 64;
|
||||
|
||||
for (int j = 0; j < 2; j++) {
|
||||
// The range is 4 to 1828 in the VP9 tables.
|
||||
const int qlookup = rnd->RandRange(1825) + 4;
|
||||
round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7;
|
||||
quant_fp[j] = (1 << 16) / qlookup;
|
||||
|
||||
// Values determined by deconstructing vp9_init_quantizer().
|
||||
// zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
|
||||
// values or U/V values of any bit depth. This is because y_delta is not
|
||||
// factored into the vp9_ac_quant() call.
|
||||
zbin[j] = rnd->RandRange(1200);
|
||||
|
||||
// round may be up to 685 for Y values or 914 for U/V.
|
||||
round[j] = rnd->RandRange(914);
|
||||
// quant ranges from 1 to -32703
|
||||
@@ -141,6 +258,8 @@ void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
|
||||
}
|
||||
for (int j = 2; j < 8; j++) {
|
||||
zbin[j] = zbin[1];
|
||||
round_fp[j] = round_fp[1];
|
||||
quant_fp[j] = quant_fp[1];
|
||||
round[j] = round[1];
|
||||
quant[j] = quant[1];
|
||||
quant_shift[j] = quant_shift[1];
|
||||
@@ -179,19 +298,19 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
|
||||
const int count = (4 << sz) * (4 << sz);
|
||||
coeff.Set(&rnd, -max_value_, max_value_);
|
||||
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
||||
quant_shift_ptr_, dequant_ptr_);
|
||||
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
|
||||
quant_fp_ptr_);
|
||||
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
|
||||
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
|
||||
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
|
||||
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
|
||||
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
|
||||
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
|
||||
round_ptr_, quant_ptr_, quant_shift_ptr_,
|
||||
ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
|
||||
dequant_ptr_, &ref_eob, scan_order->scan,
|
||||
scan_order->iscan);
|
||||
|
||||
ASM_REGISTER_STATE_CHECK(
|
||||
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
|
||||
round_ptr_, quant_ptr_, quant_shift_ptr_,
|
||||
qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
|
||||
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
|
||||
ASM_REGISTER_STATE_CHECK(quantize_op_(
|
||||
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
|
||||
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
|
||||
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
|
||||
|
||||
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
|
||||
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
|
||||
@@ -241,19 +360,19 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
|
||||
coeff.TopLeftPixel()[rnd(count)] =
|
||||
static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
|
||||
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
||||
quant_shift_ptr_, dequant_ptr_);
|
||||
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
|
||||
quant_fp_ptr_);
|
||||
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
|
||||
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
|
||||
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
|
||||
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
|
||||
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
|
||||
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
|
||||
round_ptr_, quant_ptr_, quant_shift_ptr_,
|
||||
ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
|
||||
dequant_ptr_, &ref_eob, scan_order->scan,
|
||||
scan_order->iscan);
|
||||
|
||||
ASM_REGISTER_STATE_CHECK(
|
||||
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
|
||||
round_ptr_, quant_ptr_, quant_shift_ptr_,
|
||||
qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
|
||||
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
|
||||
ASM_REGISTER_STATE_CHECK(quantize_op_(
|
||||
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
|
||||
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
|
||||
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
|
||||
|
||||
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
|
||||
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
|
||||
@@ -299,7 +418,10 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
|
||||
const int count = (4 << sz) * (4 << sz);
|
||||
|
||||
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
||||
quant_shift_ptr_, dequant_ptr_);
|
||||
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
|
||||
quant_fp_ptr_);
|
||||
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
|
||||
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
|
||||
|
||||
if (i == 0) {
|
||||
// When |coeff values| are less than zbin the results are 0.
|
||||
@@ -319,10 +441,10 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
|
||||
vpx_usec_timer timer;
|
||||
vpx_usec_timer_start(&timer);
|
||||
for (int j = 0; j < 100000000 / count; ++j) {
|
||||
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
|
||||
round_ptr_, quant_ptr_, quant_shift_ptr_,
|
||||
qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
|
||||
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan);
|
||||
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
|
||||
q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(),
|
||||
dqcoeff.TopLeftPixel(), dequant_ptr_, &eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
}
|
||||
vpx_usec_timer_mark(&timer);
|
||||
const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
|
||||
@@ -345,50 +467,54 @@ INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, VP9QuantizeTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
|
||||
VPX_BITS_8, 16),
|
||||
VPX_BITS_8, 16, false),
|
||||
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
|
||||
VPX_BITS_10, 16),
|
||||
VPX_BITS_10, 16, false),
|
||||
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
|
||||
VPX_BITS_12, 16),
|
||||
VPX_BITS_12, 16, false),
|
||||
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
|
||||
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32),
|
||||
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false),
|
||||
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
|
||||
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32),
|
||||
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false),
|
||||
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
|
||||
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32)));
|
||||
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false)));
|
||||
|
||||
#else
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, VP9QuantizeTest,
|
||||
::testing::Values(make_tuple(&vpx_quantize_b_sse2,
|
||||
&vpx_quantize_b_c,
|
||||
VPX_BITS_8, 16)));
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
DISABLED_SSE2, VP9QuantizeTest,
|
||||
::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
|
||||
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
|
||||
16)));
|
||||
SSE2, VP9QuantizeTest,
|
||||
::testing::Values(make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c,
|
||||
VPX_BITS_8, 16, false),
|
||||
make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
|
||||
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
|
||||
16, true)));
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#endif // HAVE_SSE2
|
||||
|
||||
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if ARCH_X86_64
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSSE3, VP9QuantizeTest,
|
||||
::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
|
||||
VPX_BITS_8, 16, false),
|
||||
make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
|
||||
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
|
||||
16, true),
|
||||
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
|
||||
&QuantFPWrapper<quantize_fp_32x32_nz_c>,
|
||||
VPX_BITS_8, 32, true)));
|
||||
#else
|
||||
INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
|
||||
::testing::Values(make_tuple(&vpx_quantize_b_ssse3,
|
||||
&vpx_quantize_b_c,
|
||||
VPX_BITS_8, 16)));
|
||||
VPX_BITS_8, 16, false)));
|
||||
#endif
|
||||
|
||||
#if ARCH_X86_64
|
||||
// TODO(johannkoenig): SSSE3 optimizations do not yet pass this test.
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
DISABLED_SSSE3, VP9QuantizeTest,
|
||||
::testing::Values(make_tuple(&vpx_quantize_b_32x32_ssse3,
|
||||
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32),
|
||||
make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
|
||||
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
|
||||
16),
|
||||
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
|
||||
&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
|
||||
VPX_BITS_8, 32)));
|
||||
INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, VP9QuantizeTest,
|
||||
::testing::Values(make_tuple(
|
||||
&vpx_quantize_b_32x32_ssse3,
|
||||
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false)));
|
||||
#endif // ARCH_X86_64
|
||||
#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
@@ -398,36 +524,54 @@ INSTANTIATE_TEST_CASE_P(
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
AVX, VP9QuantizeTest,
|
||||
::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c,
|
||||
VPX_BITS_8, 16),
|
||||
VPX_BITS_8, 16, false),
|
||||
// Even though SSSE3 and AVX do not match the reference
|
||||
// code, we can keep them in sync with each other.
|
||||
make_tuple(&vpx_quantize_b_32x32_avx,
|
||||
&vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32)));
|
||||
&vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32,
|
||||
false)));
|
||||
#endif // HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#if ARCH_X86_64 && HAVE_AVX2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
AVX2, VP9QuantizeTest,
|
||||
::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
|
||||
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
|
||||
16, true)));
|
||||
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
// TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.
|
||||
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, VP9QuantizeTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c, VPX_BITS_8, 16),
|
||||
make_tuple(&vpx_quantize_b_32x32_neon, &vpx_quantize_b_32x32_c,
|
||||
VPX_BITS_8, 32),
|
||||
make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
|
||||
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16),
|
||||
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
|
||||
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32)));
|
||||
::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c,
|
||||
VPX_BITS_8, 16, false),
|
||||
make_tuple(&vpx_quantize_b_32x32_neon,
|
||||
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
|
||||
false),
|
||||
make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
|
||||
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
|
||||
16, true),
|
||||
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
|
||||
&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
|
||||
VPX_BITS_8, 32, true)));
|
||||
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
// Only useful to compare "Speed" test results.
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
DISABLED_C, VP9QuantizeTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16),
|
||||
make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16, false),
|
||||
make_tuple(&vpx_quantize_b_32x32_c, &vpx_quantize_b_32x32_c, VPX_BITS_8,
|
||||
32),
|
||||
32, false),
|
||||
make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
|
||||
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16),
|
||||
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
|
||||
make_tuple(&QuantFPWrapper<quantize_fp_nz_c>,
|
||||
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
|
||||
make_tuple(&QuantFPWrapper<quantize_fp_32x32_nz_c>,
|
||||
&QuantFPWrapper<quantize_fp_32x32_nz_c>, VPX_BITS_8, 32,
|
||||
true),
|
||||
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
|
||||
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32)));
|
||||
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
|
||||
true)));
|
||||
} // namespace
|
||||
|
@@ -47,7 +47,7 @@ class ScaleTest : public VpxScaleBase,
|
||||
scale_fn_(&img_, &dst_img_, filter_type, phase_scaler));
|
||||
}
|
||||
|
||||
void RunTest() {
|
||||
void RunTest(INTERP_FILTER filter_type) {
|
||||
static const int kNumSizesToTest = 20;
|
||||
static const int kNumScaleFactorsToTest = 4;
|
||||
static const int kSizesToTest[] = {
|
||||
@@ -55,50 +55,48 @@ class ScaleTest : public VpxScaleBase,
|
||||
22, 24, 26, 28, 30, 32, 34, 68, 128, 134
|
||||
};
|
||||
static const int kScaleFactors[] = { 1, 2, 3, 4 };
|
||||
for (INTERP_FILTER filter_type = 0; filter_type < 4; ++filter_type) {
|
||||
for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) {
|
||||
for (int h = 0; h < kNumSizesToTest; ++h) {
|
||||
const int src_height = kSizesToTest[h];
|
||||
for (int w = 0; w < kNumSizesToTest; ++w) {
|
||||
const int src_width = kSizesToTest[w];
|
||||
for (int sf_up_idx = 0; sf_up_idx < kNumScaleFactorsToTest;
|
||||
++sf_up_idx) {
|
||||
const int sf_up = kScaleFactors[sf_up_idx];
|
||||
for (int sf_down_idx = 0; sf_down_idx < kNumScaleFactorsToTest;
|
||||
++sf_down_idx) {
|
||||
const int sf_down = kScaleFactors[sf_down_idx];
|
||||
const int dst_width = src_width * sf_up / sf_down;
|
||||
const int dst_height = src_height * sf_up / sf_down;
|
||||
if (sf_up == sf_down && sf_up != 1) {
|
||||
continue;
|
||||
}
|
||||
// I420 frame width and height must be even.
|
||||
if (!dst_width || !dst_height || dst_width & 1 ||
|
||||
dst_height & 1) {
|
||||
continue;
|
||||
}
|
||||
// vpx_convolve8_c() has restriction on the step which cannot
|
||||
// exceed 64 (ratio 1 to 4).
|
||||
if (src_width > 4 * dst_width || src_height > 4 * dst_height) {
|
||||
continue;
|
||||
}
|
||||
ASSERT_NO_FATAL_FAILURE(ResetScaleImages(
|
||||
src_width, src_height, dst_width, dst_height));
|
||||
ReferenceScaleFrame(filter_type, phase_scaler);
|
||||
ScaleFrame(filter_type, phase_scaler);
|
||||
if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc,
|
||||
ref_img_.frame_size)) {
|
||||
printf(
|
||||
"filter_type = %d, phase_scaler = %d, src_width = %4d, "
|
||||
"src_height = %4d, dst_width = %4d, dst_height = %4d, "
|
||||
"scale factor = %d:%d\n",
|
||||
filter_type, phase_scaler, src_width, src_height,
|
||||
dst_width, dst_height, sf_down, sf_up);
|
||||
PrintDiff();
|
||||
}
|
||||
CompareImages(dst_img_);
|
||||
DeallocScaleImages();
|
||||
for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) {
|
||||
for (int h = 0; h < kNumSizesToTest; ++h) {
|
||||
const int src_height = kSizesToTest[h];
|
||||
for (int w = 0; w < kNumSizesToTest; ++w) {
|
||||
const int src_width = kSizesToTest[w];
|
||||
for (int sf_up_idx = 0; sf_up_idx < kNumScaleFactorsToTest;
|
||||
++sf_up_idx) {
|
||||
const int sf_up = kScaleFactors[sf_up_idx];
|
||||
for (int sf_down_idx = 0; sf_down_idx < kNumScaleFactorsToTest;
|
||||
++sf_down_idx) {
|
||||
const int sf_down = kScaleFactors[sf_down_idx];
|
||||
const int dst_width = src_width * sf_up / sf_down;
|
||||
const int dst_height = src_height * sf_up / sf_down;
|
||||
if (sf_up == sf_down && sf_up != 1) {
|
||||
continue;
|
||||
}
|
||||
// I420 frame width and height must be even.
|
||||
if (!dst_width || !dst_height || dst_width & 1 ||
|
||||
dst_height & 1) {
|
||||
continue;
|
||||
}
|
||||
// vpx_convolve8_c() has restriction on the step which cannot
|
||||
// exceed 64 (ratio 1 to 4).
|
||||
if (src_width > 4 * dst_width || src_height > 4 * dst_height) {
|
||||
continue;
|
||||
}
|
||||
ASSERT_NO_FATAL_FAILURE(ResetScaleImages(src_width, src_height,
|
||||
dst_width, dst_height));
|
||||
ReferenceScaleFrame(filter_type, phase_scaler);
|
||||
ScaleFrame(filter_type, phase_scaler);
|
||||
if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc,
|
||||
ref_img_.frame_size)) {
|
||||
printf(
|
||||
"filter_type = %d, phase_scaler = %d, src_width = %4d, "
|
||||
"src_height = %4d, dst_width = %4d, dst_height = %4d, "
|
||||
"scale factor = %d:%d\n",
|
||||
filter_type, phase_scaler, src_width, src_height, dst_width,
|
||||
dst_height, sf_down, sf_up);
|
||||
PrintDiff();
|
||||
}
|
||||
CompareImages(dst_img_);
|
||||
DeallocScaleImages();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -145,7 +143,10 @@ class ScaleTest : public VpxScaleBase,
|
||||
ScaleFrameFunc scale_fn_;
|
||||
};
|
||||
|
||||
TEST_P(ScaleTest, ScaleFrame) { ASSERT_NO_FATAL_FAILURE(RunTest()); }
|
||||
TEST_P(ScaleTest, ScaleFrame_EightTap) { RunTest(EIGHTTAP); }
|
||||
TEST_P(ScaleTest, ScaleFrame_EightTapSmooth) { RunTest(EIGHTTAP_SMOOTH); }
|
||||
TEST_P(ScaleTest, ScaleFrame_EightTapSharp) { RunTest(EIGHTTAP_SHARP); }
|
||||
TEST_P(ScaleTest, ScaleFrame_Bilinear) { RunTest(BILINEAR); }
|
||||
|
||||
TEST_P(ScaleTest, DISABLED_Speed) {
|
||||
static const int kCountSpeedTestBlock = 100;
|
||||
|
@@ -147,7 +147,6 @@ TEST(VPxWorkerThreadTest, TestInterfaceAPI) {
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Multi-threaded decode tests
|
||||
|
||||
#if CONFIG_WEBM_IO
|
||||
struct FileList {
|
||||
const char *name;
|
||||
|
@@ -1,72 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import getopt
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
LONG_OPTIONS = ["shard=", "shards="]
|
||||
BASE_COMMAND = "./configure --enable-internal-stats --enable-experimental"
|
||||
|
||||
def RunCommand(command):
|
||||
run = subprocess.Popen(command, shell=True)
|
||||
output = run.communicate()
|
||||
if run.returncode:
|
||||
print "Non-zero return code: " + str(run.returncode) + " => exiting!"
|
||||
sys.exit(1)
|
||||
|
||||
def list_of_experiments():
|
||||
experiments = []
|
||||
configure_file = open("configure")
|
||||
list_start = False
|
||||
for line in configure_file.read().split("\n"):
|
||||
if line == 'EXPERIMENT_LIST="':
|
||||
list_start = True
|
||||
elif line == '"':
|
||||
list_start = False
|
||||
elif list_start:
|
||||
currently_broken = ["csm"]
|
||||
experiment = line[4:]
|
||||
if experiment not in currently_broken:
|
||||
experiments.append(experiment)
|
||||
return experiments
|
||||
|
||||
def main(argv):
|
||||
# Parse arguments
|
||||
options = {"--shard": 0, "--shards": 1}
|
||||
if "--" in argv:
|
||||
opt_end_index = argv.index("--")
|
||||
else:
|
||||
opt_end_index = len(argv)
|
||||
try:
|
||||
o, _ = getopt.getopt(argv[1:opt_end_index], None, LONG_OPTIONS)
|
||||
except getopt.GetoptError, err:
|
||||
print str(err)
|
||||
print "Usage: %s [--shard=<n> --shards=<n>] -- [configure flag ...]"%argv[0]
|
||||
sys.exit(2)
|
||||
|
||||
options.update(o)
|
||||
extra_args = argv[opt_end_index + 1:]
|
||||
|
||||
# Shard experiment list
|
||||
shard = int(options["--shard"])
|
||||
shards = int(options["--shards"])
|
||||
experiments = list_of_experiments()
|
||||
base_command = " ".join([BASE_COMMAND] + extra_args)
|
||||
configs = [base_command]
|
||||
configs += ["%s --enable-%s" % (base_command, e) for e in experiments]
|
||||
my_configs = zip(configs, range(len(configs)))
|
||||
my_configs = filter(lambda x: x[1] % shards == shard, my_configs)
|
||||
my_configs = [e[0] for e in my_configs]
|
||||
|
||||
# Run configs for this shard
|
||||
for config in my_configs:
|
||||
test_build(config)
|
||||
|
||||
def test_build(configure_command):
|
||||
print "\033[34m\033[47mTesting %s\033[0m" % (configure_command)
|
||||
RunCommand(configure_command)
|
||||
RunCommand("make clean")
|
||||
RunCommand("make")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## List the release each author first contributed to.
|
||||
##
|
||||
## Usage: author_first_release.sh [TAGS]
|
||||
##
|
||||
## If the TAGS arguments are unspecified, all tags reported by `git tag`
|
||||
## will be considered.
|
||||
##
|
||||
tags=${@:-$(git tag)}
|
||||
for tag in $tags; do
|
||||
git shortlog -n -e -s $tag |
|
||||
cut -f2- |
|
||||
awk "{print \"${tag#v}\t\"\$0}"
|
||||
done | sort -k2 | uniq -f2
|
158
tools/ftfy.sh
158
tools/ftfy.sh
@@ -1,158 +0,0 @@
|
||||
#!/bin/sh
|
||||
self="$0"
|
||||
dirname_self=$(dirname "$self")
|
||||
|
||||
usage() {
|
||||
cat <<EOF >&2
|
||||
Usage: $self [option]
|
||||
|
||||
This script applies a whitespace transformation to the commit at HEAD. If no
|
||||
options are given, then the modified files are left in the working tree.
|
||||
|
||||
Options:
|
||||
-h, --help Shows this message
|
||||
-n, --dry-run Shows a diff of the changes to be made.
|
||||
--amend Squashes the changes into the commit at HEAD
|
||||
This option will also reformat the commit message.
|
||||
--commit Creates a new commit containing only the whitespace changes
|
||||
--msg-only Reformat the commit message only, ignore the patch itself.
|
||||
|
||||
EOF
|
||||
rm -f ${CLEAN_FILES}
|
||||
exit 1
|
||||
}
|
||||
|
||||
|
||||
log() {
|
||||
echo "${self##*/}: $@" >&2
|
||||
}
|
||||
|
||||
|
||||
vpx_style() {
|
||||
for f; do
|
||||
case "$f" in
|
||||
*.h|*.c|*.cc)
|
||||
clang-format -i --style=file "$f"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
apply() {
|
||||
[ $INTERSECT_RESULT -ne 0 ] && patch -p1 < "$1"
|
||||
}
|
||||
|
||||
|
||||
commit() {
|
||||
LAST_CHANGEID=$(git show | awk '/Change-Id:/{print $2}')
|
||||
if [ -z "$LAST_CHANGEID" ]; then
|
||||
log "HEAD doesn't have a Change-Id, unable to generate a new commit"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Build a deterministic Change-Id from the parent's
|
||||
NEW_CHANGEID=${LAST_CHANGEID}-styled
|
||||
NEW_CHANGEID=I$(echo $NEW_CHANGEID | git hash-object --stdin)
|
||||
|
||||
# Commit, preserving authorship from the parent commit.
|
||||
git commit -a -C HEAD > /dev/null
|
||||
git commit --amend -F- << EOF
|
||||
Cosmetic: Fix whitespace in change ${LAST_CHANGEID:0:9}
|
||||
|
||||
Change-Id: ${NEW_CHANGEID}
|
||||
EOF
|
||||
}
|
||||
|
||||
|
||||
show_commit_msg_diff() {
|
||||
if [ $DIFF_MSG_RESULT -ne 0 ]; then
|
||||
log "Modified commit message:"
|
||||
diff -u "$ORIG_COMMIT_MSG" "$NEW_COMMIT_MSG" | tail -n +3
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
amend() {
|
||||
show_commit_msg_diff
|
||||
if [ $DIFF_MSG_RESULT -ne 0 ] || [ $INTERSECT_RESULT -ne 0 ]; then
|
||||
git commit -a --amend -F "$NEW_COMMIT_MSG"
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
diff_msg() {
|
||||
git log -1 --format=%B > "$ORIG_COMMIT_MSG"
|
||||
"${dirname_self}"/wrap-commit-msg.py \
|
||||
< "$ORIG_COMMIT_MSG" > "$NEW_COMMIT_MSG"
|
||||
cmp -s "$ORIG_COMMIT_MSG" "$NEW_COMMIT_MSG"
|
||||
DIFF_MSG_RESULT=$?
|
||||
}
|
||||
|
||||
|
||||
# Temporary files
|
||||
ORIG_DIFF=orig.diff.$$
|
||||
MODIFIED_DIFF=modified.diff.$$
|
||||
FINAL_DIFF=final.diff.$$
|
||||
ORIG_COMMIT_MSG=orig.commit-msg.$$
|
||||
NEW_COMMIT_MSG=new.commit-msg.$$
|
||||
CLEAN_FILES="${ORIG_DIFF} ${MODIFIED_DIFF} ${FINAL_DIFF}"
|
||||
CLEAN_FILES="${CLEAN_FILES} ${ORIG_COMMIT_MSG} ${NEW_COMMIT_MSG}"
|
||||
|
||||
# Preconditions
|
||||
[ $# -lt 2 ] || usage
|
||||
|
||||
if ! clang-format -version >/dev/null 2>&1; then
|
||||
log "clang-format not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! git diff --quiet HEAD; then
|
||||
log "Working tree is dirty, commit your changes first"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Need to be in the root
|
||||
cd "$(git rev-parse --show-toplevel)"
|
||||
|
||||
# Collect the original diff
|
||||
git show > "${ORIG_DIFF}"
|
||||
|
||||
# Apply the style guide on new and modified files and collect its diff
|
||||
for f in $(git diff HEAD^ --name-only -M90 --diff-filter=AM); do
|
||||
case "$f" in
|
||||
third_party/*) continue;;
|
||||
esac
|
||||
vpx_style "$f"
|
||||
done
|
||||
git diff --no-color --no-ext-diff > "${MODIFIED_DIFF}"
|
||||
|
||||
# Intersect the two diffs
|
||||
"${dirname_self}"/intersect-diffs.py \
|
||||
"${ORIG_DIFF}" "${MODIFIED_DIFF}" > "${FINAL_DIFF}"
|
||||
INTERSECT_RESULT=$?
|
||||
git reset --hard >/dev/null
|
||||
|
||||
# Fixup the commit message
|
||||
diff_msg
|
||||
|
||||
# Handle options
|
||||
if [ -n "$1" ]; then
|
||||
case "$1" in
|
||||
-h|--help) usage;;
|
||||
-n|--dry-run) cat "${FINAL_DIFF}"; show_commit_msg_diff;;
|
||||
--commit) apply "${FINAL_DIFF}"; commit;;
|
||||
--amend) apply "${FINAL_DIFF}"; amend;;
|
||||
--msg-only) amend;;
|
||||
*) usage;;
|
||||
esac
|
||||
else
|
||||
apply "${FINAL_DIFF}"
|
||||
if ! git diff --quiet; then
|
||||
log "Formatting changes applied, verify and commit."
|
||||
log "See also: http://www.webmproject.org/code/contribute/conventions/"
|
||||
git diff --stat
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f ${CLEAN_FILES}
|
@@ -37,7 +37,9 @@ extern "C" {
|
||||
#define SEGMENT_DELTADATA 0
|
||||
#define SEGMENT_ABSDATA 1
|
||||
|
||||
typedef struct { int r, c; } POS;
|
||||
typedef struct {
|
||||
int r, c;
|
||||
} POS;
|
||||
|
||||
#define PLANE_TYPE_Y_NO_DC 0
|
||||
#define PLANE_TYPE_Y2 1
|
||||
@@ -180,6 +182,9 @@ typedef struct {
|
||||
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
|
||||
// The video frame counter value for the key frame, for lowest resolution.
|
||||
unsigned int key_frame_counter_value;
|
||||
// Flags to signal skipped encoding of previous and base layer stream.
|
||||
unsigned int skip_encoding_prev_stream;
|
||||
unsigned int skip_encoding_base_stream;
|
||||
LOWER_RES_MB_INFO *mb_info;
|
||||
} LOWER_RES_FRAME_INFO;
|
||||
#endif
|
||||
|
@@ -6,7 +6,7 @@
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
#define VP8_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
|
@@ -20,8 +20,7 @@ static void copy_and_extend_plane(unsigned char *s, /* source */
|
||||
int et, /* extend top border */
|
||||
int el, /* extend left border */
|
||||
int eb, /* extend bottom border */
|
||||
int er /* extend right border */
|
||||
) {
|
||||
int er) { /* extend right border */
|
||||
int i;
|
||||
unsigned char *src_ptr1, *src_ptr2;
|
||||
unsigned char *dest_ptr1, *dest_ptr2;
|
||||
|
@@ -934,8 +934,8 @@ void vp8_loop_filter_uvvertical_edge_mips(unsigned char *s, int p,
|
||||
s4 = s3 + p;
|
||||
|
||||
/* load quad-byte vectors
|
||||
* memory is 4 byte aligned
|
||||
*/
|
||||
* memory is 4 byte aligned
|
||||
*/
|
||||
p2 = *((uint32_t *)(s1 - 4));
|
||||
p6 = *((uint32_t *)(s1));
|
||||
p1 = *((uint32_t *)(s2 - 4));
|
||||
@@ -990,8 +990,8 @@ void vp8_loop_filter_uvvertical_edge_mips(unsigned char *s, int p,
|
||||
:);
|
||||
|
||||
/* if (p1 - p4 == 0) and (p2 - p3 == 0)
|
||||
* mask will be zero and filtering is not needed
|
||||
*/
|
||||
* mask will be zero and filtering is not needed
|
||||
*/
|
||||
if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
|
||||
vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
|
||||
thresh, &hev, &mask);
|
||||
@@ -2102,8 +2102,8 @@ void vp8_mbloop_filter_uvvertical_edge_mips(unsigned char *s, int p,
|
||||
s4 = s3 + p;
|
||||
|
||||
/* load quad-byte vectors
|
||||
* memory is 4 byte aligned
|
||||
*/
|
||||
* memory is 4 byte aligned
|
||||
*/
|
||||
p2 = *((uint32_t *)(s1 - 4));
|
||||
p6 = *((uint32_t *)(s1));
|
||||
p1 = *((uint32_t *)(s2 - 4));
|
||||
|
@@ -12,7 +12,7 @@
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst,
|
||||
int stride, int8_t *eobs) {
|
||||
int stride, char *eobs) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
@@ -33,8 +33,7 @@ void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst,
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_mmi(int16_t *q, int16_t *dq, uint8_t *dstu,
|
||||
uint8_t *dstv, int stride,
|
||||
int8_t *eobs) {
|
||||
uint8_t *dstv, int stride, char *eobs) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
|
@@ -461,96 +461,87 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
|
||||
);
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
#define VP8_MBLOOP_HPSRAB \
|
||||
"xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
|
||||
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" \
|
||||
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
|
||||
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" \
|
||||
"psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t" \
|
||||
"psrah %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
|
||||
"packsshb %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
|
||||
"punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" \
|
||||
"punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" \
|
||||
"psrah %[ftmp10], %[ftmp10], %[ftmp9] \n\t" \
|
||||
"psrah %[ftmp11], %[ftmp11], %[ftmp9] \n\t" \
|
||||
"packsshb %[ftmp0], %[ftmp10], %[ftmp11] \n\t"
|
||||
|
||||
#define VP8_MBLOOP_HPSRAB_PMULHH(reg1, reg2) \
|
||||
"pmulhh " #reg1 ", " #reg1 ", " #reg2 " \n\t"
|
||||
|
||||
#define VP8_MBLOOP_HPSRAB_ADD(reg) \
|
||||
"xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
|
||||
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" \
|
||||
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
|
||||
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \
|
||||
VP8_MBLOOP_HPSRAB_PMULHH(%[ftmp3], reg) \
|
||||
VP8_MBLOOP_HPSRAB_PMULHH(%[ftmp8], reg) \
|
||||
"paddh %[ftmp3], %[ftmp3], %[ff_ph_003f] \n\t" \
|
||||
"paddh %[ftmp8], %[ftmp8], %[ff_ph_003f] \n\t" \
|
||||
"psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t" \
|
||||
"psrah %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
|
||||
"packsshb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
|
||||
#define VP8_MBLOOP_HPSRAB_ADD(reg) \
|
||||
"punpcklbh %[ftmp1], %[ftmp0], %[ftmp12] \n\t" \
|
||||
"punpckhbh %[ftmp2], %[ftmp0], %[ftmp12] \n\t" \
|
||||
"pmulhh %[ftmp1], %[ftmp1], " #reg " \n\t" \
|
||||
"pmulhh %[ftmp2], %[ftmp2], " #reg " \n\t" \
|
||||
"paddh %[ftmp1], %[ftmp1], %[ff_ph_003f] \n\t" \
|
||||
"paddh %[ftmp2], %[ftmp2], %[ff_ph_003f] \n\t" \
|
||||
"psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t" \
|
||||
"psrah %[ftmp2], %[ftmp2], %[ftmp9] \n\t" \
|
||||
"packsshb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
|
||||
/* clang-format on */
|
||||
|
||||
void vp8_mbloop_filter_horizontal_edge_mmi(
|
||||
unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit,
|
||||
const unsigned char *limit, const unsigned char *thresh, int count) {
|
||||
uint32_t tmp[1];
|
||||
mips_reg addr[2];
|
||||
DECLARE_ALIGNED(8, const uint64_t, srct[1]);
|
||||
double ftmp[10];
|
||||
double ftmp[13];
|
||||
|
||||
__asm__ volatile (
|
||||
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
|
||||
"1: \n\t"
|
||||
"gsldlc1 %[ftmp9], 0x07(%[limit]) \n\t"
|
||||
"gsldrc1 %[ftmp9], 0x00(%[limit]) \n\t"
|
||||
/* ftmp1: p3 */
|
||||
"gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
|
||||
/* ftmp3: p2 */
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp3], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp3], 0x00(%[src_ptr]) \n\t"
|
||||
/* ftmp4: p1 */
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp4], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp4], 0x00(%[src_ptr]) \n\t"
|
||||
/* ftmp5: p0 */
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
|
||||
/* ftmp6: q0 */
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
|
||||
/* ftmp7: q1 */
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
|
||||
/* ftmp8: q2 */
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
|
||||
/* ftmp2: q3 */
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp2], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp2], 0x00(%[src_ptr]) \n\t"
|
||||
|
||||
MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp12], 0x07(%[blimit]) \n\t"
|
||||
"gsldrc1 %[ftmp12], 0x00(%[blimit]) \n\t"
|
||||
|
||||
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
|
||||
MMI_SUBU(%[addr1], %[src_ptr], %[tmp0])
|
||||
"gsldlc1 %[ftmp1], 0x07(%[addr1]) \n\t"
|
||||
"gsldrc1 %[ftmp1], 0x00(%[addr1]) \n\t"
|
||||
MMI_SUBU(%[addr1], %[addr0], %[tmp0])
|
||||
"gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
|
||||
"gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
|
||||
"pasubub %[ftmp0], %[ftmp1], %[ftmp3] \n\t"
|
||||
"psubusb %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
|
||||
|
||||
/* ftmp4:p1 */
|
||||
MMI_SLL(%[tmp0], %[src_pixel_step], 0x01)
|
||||
MMI_SUBU(%[addr1], %[src_ptr], %[tmp0])
|
||||
"gsldlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
|
||||
"gsldrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
|
||||
"pasubub %[ftmp1], %[ftmp3], %[ftmp4] \n\t"
|
||||
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
|
||||
|
||||
/* ftmp5:p0 */
|
||||
MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp5], 0x07(%[addr1]) \n\t"
|
||||
"gsldrc1 %[ftmp5], 0x00(%[addr1]) \n\t"
|
||||
"pasubub %[ftmp1], %[ftmp4], %[ftmp5] \n\t"
|
||||
"sdc1 %[ftmp1], 0x00(%[srct]) \n\t"
|
||||
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
|
||||
"pasubub %[ftmp10], %[ftmp4], %[ftmp5] \n\t"
|
||||
"psubusb %[ftmp1], %[ftmp10], %[ftmp9] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
|
||||
|
||||
/* ftmp6:q0 */
|
||||
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
|
||||
|
||||
/* ftmp7:q1 */
|
||||
"gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t"
|
||||
"pasubub %[ftmp1], %[ftmp7], %[ftmp6] \n\t"
|
||||
"sdc1 %[ftmp1], 0x08(%[srct]) \n\t"
|
||||
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
|
||||
"pasubub %[ftmp11], %[ftmp7], %[ftmp6] \n\t"
|
||||
"psubusb %[ftmp1], %[ftmp11], %[ftmp9] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
|
||||
|
||||
MMI_ADDU(%[addr1], %[src_ptr], %[tmp0])
|
||||
"gsldlc1 %[ftmp8], 0x07(%[addr1]) \n\t"
|
||||
"gsldrc1 %[ftmp8], 0x00(%[addr1]) \n\t"
|
||||
"pasubub %[ftmp1], %[ftmp8], %[ftmp7] \n\t"
|
||||
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
|
||||
|
||||
MMI_ADDU(%[addr1], %[addr0], %[tmp0])
|
||||
"gsldlc1 %[ftmp2], 0x07(%[addr1]) \n\t"
|
||||
"gsldrc1 %[ftmp2], 0x00(%[addr1]) \n\t"
|
||||
"pasubub %[ftmp1], %[ftmp2], %[ftmp8] \n\t"
|
||||
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
|
||||
@@ -563,9 +554,7 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
|
||||
"mtc1 %[tmp0], %[ftmp9] \n\t"
|
||||
"psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
|
||||
"paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
|
||||
"gsldlc1 %[ftmp9], 0x07(%[blimit]) \n\t"
|
||||
"gsldrc1 %[ftmp9], 0x00(%[blimit]) \n\t"
|
||||
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
|
||||
"psubusb %[ftmp1], %[ftmp1], %[ftmp12] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
|
||||
"xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
|
||||
/* ftmp0: mask */
|
||||
@@ -573,29 +562,26 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
|
||||
|
||||
"gsldlc1 %[ftmp9], 0x07(%[thresh]) \n\t"
|
||||
"gsldrc1 %[ftmp9], 0x00(%[thresh]) \n\t"
|
||||
"ldc1 %[ftmp1], 0x00(%[srct]) \n\t"
|
||||
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
|
||||
"ldc1 %[ftmp2], 0x08(%[srct]) \n\t"
|
||||
"psubusb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
|
||||
"psubusb %[ftmp1], %[ftmp10], %[ftmp9] \n\t"
|
||||
"psubusb %[ftmp2], %[ftmp11], %[ftmp9] \n\t"
|
||||
"paddb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
|
||||
"xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
|
||||
"pcmpeqb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
|
||||
"pcmpeqb %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
|
||||
/* ftmp1:hev*/
|
||||
/* ftmp1: hev */
|
||||
"xor %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
|
||||
|
||||
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
|
||||
"xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t"
|
||||
"xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t"
|
||||
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
|
||||
|
||||
"psubsb %[ftmp2], %[ftmp4], %[ftmp7] \n\t"
|
||||
"psubsb %[ftmp9], %[ftmp6], %[ftmp5] \n\t"
|
||||
"paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
|
||||
"paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
|
||||
"paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
|
||||
"and %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
|
||||
"sdc1 %[ftmp2], 0x00(%[srct]) \n\t"
|
||||
"pandn %[ftmp12], %[ftmp1], %[ftmp2] \n\t"
|
||||
"and %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
|
||||
|
||||
"li %[tmp0], 0x0b \n\t"
|
||||
@@ -606,75 +592,71 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
|
||||
"paddsb %[ftmp0], %[ftmp2], %[ff_pb_04] \n\t"
|
||||
VP8_MBLOOP_HPSRAB
|
||||
"psubsb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
|
||||
"ldc1 %[ftmp2], 0x00(%[srct]) \n\t"
|
||||
"pandn %[ftmp2], %[ftmp1], %[ftmp2] \n\t"
|
||||
|
||||
"li %[tmp0], 0x07 \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp9] \n\t"
|
||||
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
|
||||
|
||||
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1b00])
|
||||
"psubsb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
|
||||
"paddsb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
|
||||
"psubsb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
|
||||
"paddsb %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
|
||||
"xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t"
|
||||
"xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t"
|
||||
|
||||
MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp5], 0x07(%[addr1]) \n\t"
|
||||
"gssdrc1 %[ftmp5], 0x00(%[addr1]) \n\t"
|
||||
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
|
||||
"gssdlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
|
||||
|
||||
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1200])
|
||||
"paddsb %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
|
||||
"psubsb %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
|
||||
"paddsb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
|
||||
"psubsb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
|
||||
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
|
||||
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
|
||||
|
||||
"gssdlc1 %[ftmp7], 0x07(%[addr0]) \n\t"
|
||||
"gssdrc1 %[ftmp7], 0x00(%[addr0]) \n\t"
|
||||
MMI_SLL(%[tmp0], %[src_pixel_step], 0x01)
|
||||
MMI_SUBU(%[addr1], %[src_ptr], %[tmp0])
|
||||
"gssdlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
|
||||
"gssdrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp4], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp4], 0x00(%[src_ptr]) \n\t"
|
||||
|
||||
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_0900])
|
||||
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
|
||||
MMI_SUBU(%[addr1], %[addr0], %[tmp0])
|
||||
"gsldlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
|
||||
"gsldrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
|
||||
MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp7], 0x07(%[addr1]) \n\t"
|
||||
"gsldrc1 %[ftmp7], 0x00(%[addr1]) \n\t"
|
||||
"xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
|
||||
"xor %[ftmp8], %[ftmp8], %[ff_pb_80] \n\t"
|
||||
"paddsb %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
|
||||
"psubsb %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
|
||||
"xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
|
||||
"xor %[ftmp8], %[ftmp8], %[ff_pb_80] \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0])
|
||||
"gssdlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp3], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp3], 0x00(%[src_ptr]) \n\t"
|
||||
|
||||
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
|
||||
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
|
||||
"paddsb %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
|
||||
"psubsb %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
|
||||
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
|
||||
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
|
||||
MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp7], 0x07(%[addr1]) \n\t"
|
||||
"gssdrc1 %[ftmp7], 0x00(%[addr1]) \n\t"
|
||||
MMI_SUBU(%[addr1], %[addr0], %[tmp0])
|
||||
"gssdlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
|
||||
"gssdrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
|
||||
|
||||
"addiu %[count], %[count], -0x01 \n\t"
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
MMI_ADDIU(%[src_ptr], %[src_ptr], 0x08)
|
||||
"addiu %[count], %[count], -0x01 \n\t"
|
||||
"bnez %[count], 1b \n\t"
|
||||
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
|
||||
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
|
||||
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
|
||||
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
|
||||
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
|
||||
[tmp0]"=&r"(tmp[0]),
|
||||
[addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
|
||||
[src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
|
||||
: [limit]"r"(limit), [blimit]"r"(blimit),
|
||||
[srct]"r"(srct), [thresh]"r"(thresh),
|
||||
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
|
||||
[ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]),
|
||||
[src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
|
||||
: [limit]"r"(limit), [blimit]"r"(blimit),
|
||||
[thresh]"r"(thresh),
|
||||
[src_pixel_step]"r"((mips_reg)src_pixel_step),
|
||||
[ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80),
|
||||
[ff_pb_04]"f"(ff_pb_04), [ff_pb_03]"f"(ff_pb_03),
|
||||
[ff_ph_0900]"f"(ff_ph_0900), [ff_ph_1b00]"f"(ff_ph_1b00),
|
||||
[ff_ph_1200]"f"(ff_ph_1200), [ff_ph_003f]"f"(ff_ph_003f)
|
||||
[ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80),
|
||||
[ff_pb_04]"f"(ff_pb_04), [ff_pb_03]"f"(ff_pb_03),
|
||||
[ff_ph_0900]"f"(ff_ph_0900), [ff_ph_1b00]"f"(ff_ph_1b00),
|
||||
[ff_ph_1200]"f"(ff_ph_1200), [ff_ph_003f]"f"(ff_ph_003f)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
@@ -696,64 +678,60 @@ void vp8_mbloop_filter_vertical_edge_mmi(
|
||||
unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit,
|
||||
const unsigned char *limit, const unsigned char *thresh, int count) {
|
||||
mips_reg tmp[1];
|
||||
mips_reg addr[2];
|
||||
DECLARE_ALIGNED(8, const uint64_t, srct[1]);
|
||||
double ftmp[13];
|
||||
double ftmp[14];
|
||||
|
||||
__asm__ volatile (
|
||||
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0])
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], 0x04)
|
||||
|
||||
"1: \n\t"
|
||||
MMI_SLL (%[tmp0], %[src_pixel_step], 0x01)
|
||||
MMI_ADDU(%[addr0], %[src_ptr], %[tmp0])
|
||||
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
|
||||
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
|
||||
"punpcklbh %[ftmp1], %[ftmp11], %[ftmp12] \n\t"
|
||||
"punpckhbh %[ftmp2], %[ftmp11], %[ftmp12] \n\t"
|
||||
"gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
|
||||
|
||||
"gsldlc1 %[ftmp11], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp11], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
|
||||
"punpcklbh %[ftmp3], %[ftmp11], %[ftmp12] \n\t"
|
||||
"punpckhbh %[ftmp4], %[ftmp11], %[ftmp12] \n\t"
|
||||
"punpcklbh %[ftmp11], %[ftmp5], %[ftmp6] \n\t"
|
||||
"punpckhbh %[ftmp12], %[ftmp5], %[ftmp6] \n\t"
|
||||
"punpcklbh %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
|
||||
"punpckhbh %[ftmp10], %[ftmp7], %[ftmp8] \n\t"
|
||||
|
||||
"punpcklhw %[ftmp5], %[ftmp4], %[ftmp2] \n\t"
|
||||
"punpckhhw %[ftmp6], %[ftmp4], %[ftmp2] \n\t"
|
||||
"punpcklhw %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
|
||||
"punpckhhw %[ftmp8], %[ftmp3], %[ftmp1] \n\t"
|
||||
"punpcklhw %[ftmp1], %[ftmp12], %[ftmp10] \n\t"
|
||||
"punpckhhw %[ftmp2], %[ftmp12], %[ftmp10] \n\t"
|
||||
"punpcklhw %[ftmp3], %[ftmp11], %[ftmp9] \n\t"
|
||||
"punpckhhw %[ftmp4], %[ftmp11], %[ftmp9] \n\t"
|
||||
|
||||
MMI_SLL(%[tmp0], %[src_pixel_step], 0x01)
|
||||
MMI_SUBU(%[addr0], %[src_ptr], %[tmp0])
|
||||
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
|
||||
MMI_SUBU(%[addr0], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
|
||||
"punpcklbh %[ftmp9], %[ftmp11], %[ftmp12] \n\t"
|
||||
"punpckhbh %[ftmp10], %[ftmp11], %[ftmp12] \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
|
||||
|
||||
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
|
||||
MMI_SUBU(%[addr0], %[src_ptr], %[tmp0])
|
||||
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
|
||||
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
|
||||
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
|
||||
"punpcklbh %[ftmp0], %[ftmp11], %[ftmp12] \n\t"
|
||||
"punpckhbh %[ftmp11], %[ftmp11], %[ftmp12] \n\t"
|
||||
"punpcklbh %[ftmp11], %[ftmp5], %[ftmp6] \n\t"
|
||||
"punpckhbh %[ftmp12], %[ftmp5], %[ftmp6] \n\t"
|
||||
"punpcklbh %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
|
||||
"punpckhbh %[ftmp10], %[ftmp7], %[ftmp8] \n\t"
|
||||
|
||||
"punpcklhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t"
|
||||
"punpckhhw %[ftmp2], %[ftmp11], %[ftmp10] \n\t"
|
||||
"punpcklhw %[ftmp3], %[ftmp0], %[ftmp9] \n\t"
|
||||
"punpckhhw %[ftmp4], %[ftmp0], %[ftmp9] \n\t"
|
||||
"punpcklhw %[ftmp5], %[ftmp12], %[ftmp10] \n\t"
|
||||
"punpckhhw %[ftmp6], %[ftmp12], %[ftmp10] \n\t"
|
||||
"punpcklhw %[ftmp7], %[ftmp11], %[ftmp9] \n\t"
|
||||
"punpckhhw %[ftmp8], %[ftmp11], %[ftmp9] \n\t"
|
||||
|
||||
"gsldlc1 %[ftmp13], 0x07(%[limit]) \n\t"
|
||||
"gsldrc1 %[ftmp13], 0x00(%[limit]) \n\t"
|
||||
/* ftmp9:q0 ftmp10:q1 */
|
||||
"punpcklwd %[ftmp9], %[ftmp1], %[ftmp5] \n\t"
|
||||
"punpckhwd %[ftmp10], %[ftmp1], %[ftmp5] \n\t"
|
||||
@@ -771,60 +749,61 @@ void vp8_mbloop_filter_vertical_edge_mmi(
|
||||
"punpcklwd %[ftmp5], %[ftmp4], %[ftmp8] \n\t"
|
||||
"punpckhwd %[ftmp6], %[ftmp4], %[ftmp8] \n\t"
|
||||
|
||||
"gsldlc1 %[ftmp8], 0x07(%[limit]) \n\t"
|
||||
"gsldrc1 %[ftmp8], 0x00(%[limit]) \n\t"
|
||||
|
||||
/* abs (q3-q2) */
|
||||
"pasubub %[ftmp7], %[ftmp12], %[ftmp11] \n\t"
|
||||
"psubusb %[ftmp0], %[ftmp7], %[ftmp8] \n\t"
|
||||
"psubusb %[ftmp0], %[ftmp7], %[ftmp13] \n\t"
|
||||
/* abs (q2-q1) */
|
||||
"pasubub %[ftmp7], %[ftmp11], %[ftmp10] \n\t"
|
||||
"psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
|
||||
"psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
|
||||
/* ftmp3: abs(q1-q0) */
|
||||
"pasubub %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
|
||||
"psubusb %[ftmp7], %[ftmp3], %[ftmp8] \n\t"
|
||||
"psubusb %[ftmp7], %[ftmp3], %[ftmp13] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
|
||||
/* ftmp4: abs(p1-p0) */
|
||||
"pasubub %[ftmp4], %[ftmp5], %[ftmp6] \n\t"
|
||||
"psubusb %[ftmp7], %[ftmp4], %[ftmp8] \n\t"
|
||||
"psubusb %[ftmp7], %[ftmp4], %[ftmp13] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
|
||||
/* abs (p2-p1) */
|
||||
"pasubub %[ftmp7], %[ftmp2], %[ftmp5] \n\t"
|
||||
"psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
|
||||
"psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
|
||||
/* abs (p3-p2) */
|
||||
"pasubub %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
|
||||
"psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
|
||||
"psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
|
||||
/* abs (p0-q0) */
|
||||
|
||||
"gsldlc1 %[ftmp13], 0x07(%[blimit]) \n\t"
|
||||
"gsldrc1 %[ftmp13], 0x00(%[blimit]) \n\t"
|
||||
"gsldlc1 %[ftmp7], 0x07(%[thresh]) \n\t"
|
||||
"gsldrc1 %[ftmp7], 0x00(%[thresh]) \n\t"
|
||||
/* abs (p0-q0) * 2 */
|
||||
"pasubub %[ftmp1], %[ftmp9], %[ftmp6] \n\t"
|
||||
"paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
|
||||
/* abs (p1-q1) */
|
||||
/* abs (p1-q1) / 2 */
|
||||
"pasubub %[ftmp12], %[ftmp10], %[ftmp5] \n\t"
|
||||
"and %[ftmp12], %[ftmp12], %[ff_pb_fe] \n\t"
|
||||
"li %[tmp0], 0x01 \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp8] \n\t"
|
||||
"psrlh %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
|
||||
"paddusb %[ftmp12], %[ftmp1], %[ftmp12] \n\t"
|
||||
|
||||
"gsldlc1 %[ftmp8], 0x07(%[blimit]) \n\t"
|
||||
"gsldrc1 %[ftmp8], 0x00(%[blimit]) \n\t"
|
||||
"psubusb %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
|
||||
"psubusb %[ftmp12], %[ftmp12], %[ftmp13] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp12] \n\t"
|
||||
"xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t"
|
||||
/* ftmp0: mask */
|
||||
"pcmpeqb %[ftmp0], %[ftmp0], %[ftmp12] \n\t"
|
||||
|
||||
"gsldlc1 %[ftmp8], 0x07(%[thresh]) \n\t"
|
||||
"gsldrc1 %[ftmp8], 0x00(%[thresh]) \n\t"
|
||||
/* ftmp3: abs(q1-q0) ftmp4: abs(p1-p0) */
|
||||
"psubusb %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
|
||||
"psubusb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
|
||||
/* abs(p1-p0) - thresh */
|
||||
"psubusb %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
|
||||
/* abs(q1-q0) - thresh */
|
||||
"psubusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
|
||||
"or %[ftmp3], %[ftmp4], %[ftmp3] \n\t"
|
||||
"pcmpeqb %[ftmp3], %[ftmp3], %[ftmp12] \n\t"
|
||||
"pcmpeqb %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
|
||||
/* ftmp1: hev */
|
||||
"xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t"
|
||||
|
||||
/* ftmp2:ps2, ftmp5:ps1, ftmp6:ps0, ftmp9:qs0, ftmp10:qs1, ftmp11:qs2 */
|
||||
"xor %[ftmp11], %[ftmp11], %[ff_pb_80] \n\t"
|
||||
"xor %[ftmp10], %[ftmp10], %[ff_pb_80] \n\t"
|
||||
"xor %[ftmp9], %[ftmp9], %[ff_pb_80] \n\t"
|
||||
@@ -837,30 +816,30 @@ void vp8_mbloop_filter_vertical_edge_mmi(
|
||||
"paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
|
||||
"paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
|
||||
"paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
|
||||
/* filter_value &= mask */
|
||||
"and %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
|
||||
/* Filter2 = filter_value & hev */
|
||||
"and %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
|
||||
/* filter_value &= ~hev */
|
||||
"pandn %[ftmp0], %[ftmp1], %[ftmp0] \n\t"
|
||||
|
||||
"paddsb %[ftmp4], %[ftmp3], %[ff_pb_04] \n\t"
|
||||
"li %[tmp0], 0x0b \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp12] \n\t"
|
||||
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
|
||||
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
|
||||
"punpcklbh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
|
||||
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
|
||||
"psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
|
||||
"psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
|
||||
"packsshb %[ftmp4], %[ftmp7], %[ftmp8] \n\t"
|
||||
/* ftmp9: qs0 */
|
||||
"psubsb %[ftmp9], %[ftmp9], %[ftmp4] \n\t"
|
||||
"paddsb %[ftmp3], %[ftmp3], %[ff_pb_03] \n\t"
|
||||
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
|
||||
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
|
||||
"punpcklbh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
|
||||
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp3] \n\t"
|
||||
"psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
|
||||
"psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
|
||||
"packsshb %[ftmp3], %[ftmp7], %[ftmp8] \n\t"
|
||||
|
||||
/* ftmp6: ps0 */
|
||||
"paddsb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
|
||||
|
||||
"li %[tmp0], 0x07 \n\t"
|
||||
@@ -872,8 +851,10 @@ void vp8_mbloop_filter_vertical_edge_mmi(
|
||||
"pmulhh %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
|
||||
VP8_MBLOOP_VPSRAB_ADDT
|
||||
"psubsb %[ftmp4], %[ftmp9], %[ftmp3] \n\t"
|
||||
/* ftmp9: oq0 */
|
||||
"xor %[ftmp9], %[ftmp4], %[ff_pb_80] \n\t"
|
||||
"paddsb %[ftmp4], %[ftmp6], %[ftmp3] \n\t"
|
||||
/* ftmp6: op0 */
|
||||
"xor %[ftmp6], %[ftmp4], %[ff_pb_80] \n\t"
|
||||
|
||||
VP8_MBLOOP_VPSRAB_ADDH
|
||||
@@ -882,8 +863,10 @@ void vp8_mbloop_filter_vertical_edge_mmi(
|
||||
"pmulhh %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
|
||||
VP8_MBLOOP_VPSRAB_ADDT
|
||||
"psubsb %[ftmp4], %[ftmp10], %[ftmp3] \n\t"
|
||||
/* ftmp10: oq1 */
|
||||
"xor %[ftmp10], %[ftmp4], %[ff_pb_80] \n\t"
|
||||
"paddsb %[ftmp4], %[ftmp5], %[ftmp3] \n\t"
|
||||
/* ftmp5: op1 */
|
||||
"xor %[ftmp5], %[ftmp4], %[ff_pb_80] \n\t"
|
||||
|
||||
VP8_MBLOOP_VPSRAB_ADDH
|
||||
@@ -891,8 +874,10 @@ void vp8_mbloop_filter_vertical_edge_mmi(
|
||||
"pmulhh %[ftmp8], %[ftmp8], %[ff_ph_0900] \n\t"
|
||||
VP8_MBLOOP_VPSRAB_ADDT
|
||||
"psubsb %[ftmp4], %[ftmp11], %[ftmp3] \n\t"
|
||||
/* ftmp11: oq2 */
|
||||
"xor %[ftmp11], %[ftmp4], %[ff_pb_80] \n\t"
|
||||
"paddsb %[ftmp4], %[ftmp2], %[ftmp3] \n\t"
|
||||
/* ftmp2: op2 */
|
||||
"xor %[ftmp2], %[ftmp4], %[ff_pb_80] \n\t"
|
||||
|
||||
"ldc1 %[ftmp12], 0x00(%[srct]) \n\t"
|
||||
@@ -916,41 +901,40 @@ void vp8_mbloop_filter_vertical_edge_mmi(
|
||||
"punpcklhw %[ftmp10], %[ftmp1], %[ftmp3] \n\t"
|
||||
"punpckhhw %[ftmp11], %[ftmp1], %[ftmp3] \n\t"
|
||||
|
||||
"punpcklwd %[ftmp0], %[ftmp6], %[ftmp10] \n\t"
|
||||
"punpckhwd %[ftmp1], %[ftmp6], %[ftmp10] \n\t"
|
||||
|
||||
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
|
||||
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
|
||||
|
||||
"punpcklwd %[ftmp0], %[ftmp7], %[ftmp11] \n\t"
|
||||
"punpckhwd %[ftmp1], %[ftmp7], %[ftmp11] \n\t"
|
||||
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
|
||||
"gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
|
||||
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
|
||||
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
|
||||
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
|
||||
|
||||
"punpcklwd %[ftmp0], %[ftmp6], %[ftmp10] \n\t"
|
||||
"punpckhwd %[ftmp1], %[ftmp6], %[ftmp10] \n\t"
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
|
||||
|
||||
"punpcklwd %[ftmp1], %[ftmp5], %[ftmp9] \n\t"
|
||||
"punpckhwd %[ftmp0], %[ftmp5], %[ftmp9] \n\t"
|
||||
MMI_SUBU(%[addr0], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
|
||||
"gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
|
||||
MMI_SUBU(%[addr0], %[addr0], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
|
||||
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
|
||||
|
||||
"punpcklwd %[ftmp1], %[ftmp4], %[ftmp8] \n\t"
|
||||
"punpckhwd %[ftmp0], %[ftmp4], %[ftmp8] \n\t"
|
||||
MMI_SUBU(%[addr0], %[addr0], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
|
||||
"gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
|
||||
MMI_SUBU(%[addr0], %[addr0], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
|
||||
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
|
||||
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
|
||||
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
|
||||
"addiu %[count], %[count], -0x01 \n\t"
|
||||
|
||||
MMI_SLL(%[tmp0], %[src_pixel_step], 0x03)
|
||||
@@ -962,9 +946,9 @@ void vp8_mbloop_filter_vertical_edge_mmi(
|
||||
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
|
||||
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
|
||||
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
|
||||
[ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]),
|
||||
[addr0]"=&r"(addr[0]),
|
||||
[src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
|
||||
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
|
||||
[tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr),
|
||||
[count]"+&r"(count)
|
||||
: [limit]"r"(limit), [blimit]"r"(blimit),
|
||||
[srct]"r"(srct), [thresh]"r"(thresh),
|
||||
[src_pixel_step]"r"((mips_reg)src_pixel_step),
|
||||
|
@@ -86,6 +86,7 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
|
||||
register double ftmp8 asm("$f18");
|
||||
register double ftmp9 asm("$f20");
|
||||
register double ftmp10 asm("$f22");
|
||||
register double ftmp11 asm("$f24");
|
||||
#else
|
||||
register double fzero asm("$f0");
|
||||
register double ftmp0 asm("$f1");
|
||||
@@ -99,6 +100,7 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
|
||||
register double ftmp8 asm("$f9");
|
||||
register double ftmp9 asm("$f10");
|
||||
register double ftmp10 asm("$f11");
|
||||
register double ftmp11 asm("$f12");
|
||||
#endif // _MIPS_SIM == _ABIO32
|
||||
|
||||
__asm__ volatile (
|
||||
@@ -112,11 +114,13 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
|
||||
"li %[tmp0], 0x07 \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp7] \n\t"
|
||||
"li %[tmp0], 0x08 \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp10] \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp11] \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"gsldlc1 %[ftmp9], 0x05(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp9], -0x02(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp9], -0x02(%[src_ptr]) \n\t"
|
||||
"gsldlc1 %[ftmp10], 0x06(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp10], -0x01(%[src_ptr]) \n\t"
|
||||
|
||||
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
|
||||
"pmullh %[ftmp8], %[ftmp6], %[ftmp0] \n\t"
|
||||
@@ -125,24 +129,21 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
|
||||
"pmullh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
|
||||
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
|
||||
|
||||
"gsldlc1 %[ftmp9], 0x06(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp9], -0x01(%[src_ptr]) \n\t"
|
||||
|
||||
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
|
||||
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
|
||||
"pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
|
||||
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
|
||||
|
||||
"punpckhbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
|
||||
"punpckhbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
|
||||
"pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
|
||||
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
|
||||
|
||||
"dsrl %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
|
||||
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
|
||||
"dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
|
||||
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
|
||||
"pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
|
||||
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
|
||||
|
||||
"dsrl %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
|
||||
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
|
||||
"dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
|
||||
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
|
||||
"pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
|
||||
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
|
||||
|
||||
@@ -163,8 +164,9 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
|
||||
[ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6),
|
||||
[ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8),
|
||||
[ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10),
|
||||
[tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr),
|
||||
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height)
|
||||
[ftmp11]"=&f"(ftmp11), [tmp0]"=&r"(tmp[0]),
|
||||
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height),
|
||||
[src_ptr]"+&r"(src_ptr)
|
||||
: [src_pixels_per_line]"r"((mips_reg)src_pixels_per_line),
|
||||
[vp8_filter]"r"(vp8_filter), [output_width]"r"(output_width),
|
||||
[ff_ph_40]"f"(ff_ph_40)
|
||||
@@ -190,6 +192,11 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
|
||||
register double ftmp6 asm("$f14");
|
||||
register double ftmp7 asm("$f16");
|
||||
register double ftmp8 asm("$f18");
|
||||
register double ftmp9 asm("$f20");
|
||||
register double ftmp10 asm("$f22");
|
||||
register double ftmp11 asm("$f24");
|
||||
register double ftmp12 asm("$f26");
|
||||
register double ftmp13 asm("$f28");
|
||||
#else
|
||||
register double fzero asm("$f0");
|
||||
register double ftmp0 asm("$f1");
|
||||
@@ -201,6 +208,11 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
|
||||
register double ftmp6 asm("$f7");
|
||||
register double ftmp7 asm("$f8");
|
||||
register double ftmp8 asm("$f9");
|
||||
register double ftmp9 asm("$f10");
|
||||
register double ftmp10 asm("$f11");
|
||||
register double ftmp11 asm("$f12");
|
||||
register double ftmp12 asm("$f13");
|
||||
register double ftmp13 asm("$f14");
|
||||
#endif // _MIPS_SIM == _ABIO32
|
||||
|
||||
__asm__ volatile (
|
||||
@@ -210,52 +222,56 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
|
||||
"ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t"
|
||||
"ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t"
|
||||
"ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t"
|
||||
MMI_SUBU(%[src_ptr], %[src_ptr], %[pixels_per_line_x2])
|
||||
"xor %[fzero], %[fzero], %[fzero] \n\t"
|
||||
"li %[tmp0], 0x07 \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp7] \n\t"
|
||||
"mtc1 %[tmp0], %[ftmp13] \n\t"
|
||||
|
||||
/* In order to make full use of memory load delay slot,
|
||||
* Operation of memory loading and calculating has been rearranged.
|
||||
*/
|
||||
"1: \n\t"
|
||||
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
|
||||
"pmullh %[ftmp8], %[ftmp6], %[ftmp0] \n\t"
|
||||
|
||||
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line])
|
||||
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
|
||||
"pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
|
||||
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
|
||||
|
||||
"gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t"
|
||||
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2])
|
||||
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
|
||||
"pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
|
||||
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
|
||||
"gsldlc1 %[ftmp8], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp8], 0x00(%[addr0]) \n\t"
|
||||
|
||||
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4])
|
||||
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
|
||||
"pmullh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
|
||||
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
|
||||
|
||||
"gsldlc1 %[ftmp9], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp9], 0x00(%[addr0]) \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line])
|
||||
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2])
|
||||
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
|
||||
"pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
|
||||
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
|
||||
|
||||
"gsldlc1 %[ftmp10], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp10], 0x00(%[addr0]) \n\t"
|
||||
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4])
|
||||
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
|
||||
"pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
|
||||
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
|
||||
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
|
||||
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
|
||||
|
||||
"paddsh %[ftmp8], %[ftmp8], %[ff_ph_40] \n\t"
|
||||
"psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
|
||||
"packushb %[ftmp8], %[ftmp8], %[fzero] \n\t"
|
||||
"gsswlc1 %[ftmp8], 0x03(%[output_ptr]) \n\t"
|
||||
"gsswrc1 %[ftmp8], 0x00(%[output_ptr]) \n\t"
|
||||
"pmullh %[ftmp12], %[ftmp6], %[ftmp0] \n\t"
|
||||
|
||||
"pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
|
||||
"paddsh %[ftmp12], %[ftmp12], %[ftmp7] \n\t"
|
||||
|
||||
"pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
|
||||
"paddsh %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
|
||||
|
||||
"pmullh %[ftmp9], %[ftmp9], %[ftmp4] \n\t"
|
||||
"paddsh %[ftmp12], %[ftmp12], %[ftmp9] \n\t"
|
||||
|
||||
"pmullh %[ftmp10], %[ftmp10], %[ftmp3] \n\t"
|
||||
"paddsh %[ftmp12], %[ftmp12], %[ftmp10] \n\t"
|
||||
|
||||
"pmullh %[ftmp11], %[ftmp11], %[ftmp5] \n\t"
|
||||
"paddsh %[ftmp12], %[ftmp12], %[ftmp11] \n\t"
|
||||
|
||||
"paddsh %[ftmp12], %[ftmp12], %[ff_ph_40] \n\t"
|
||||
"psrah %[ftmp12], %[ftmp12], %[ftmp13] \n\t"
|
||||
"packushb %[ftmp12], %[ftmp12], %[fzero] \n\t"
|
||||
"gsswlc1 %[ftmp12], 0x03(%[output_ptr]) \n\t"
|
||||
"gsswrc1 %[ftmp12], 0x00(%[output_ptr]) \n\t"
|
||||
|
||||
MMI_ADDIU(%[output_height], %[output_height], -0x01)
|
||||
MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch])
|
||||
@@ -265,9 +281,11 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
|
||||
[ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4),
|
||||
[ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6),
|
||||
[ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8),
|
||||
[tmp0]"=&r"(tmp[0]), [addr0]"=&r"(addr[0]),
|
||||
[src_ptr]"+&r"(src_ptr), [output_ptr]"+&r"(output_ptr),
|
||||
[output_height]"+&r"(output_height)
|
||||
[ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10),
|
||||
[ftmp11]"=&f"(ftmp11), [ftmp12]"=&f"(ftmp12),
|
||||
[ftmp13]"=&f"(ftmp13), [tmp0]"=&r"(tmp[0]),
|
||||
[addr0]"=&r"(addr[0]), [src_ptr]"+&r"(src_ptr),
|
||||
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height)
|
||||
: [pixels_per_line]"r"((mips_reg)pixels_per_line),
|
||||
[pixels_per_line_x2]"r"((mips_reg)(pixels_per_line<<1)),
|
||||
[pixels_per_line_x4]"r"((mips_reg)(pixels_per_line<<2)),
|
||||
@@ -301,6 +319,7 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
|
||||
"1: \n\t"
|
||||
"gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixels_per_line])
|
||||
|
||||
"punpcklbh %[ftmp1], %[ftmp0], %[fzero] \n\t"
|
||||
"gssdlc1 %[ftmp1], 0x07(%[output_ptr]) \n\t"
|
||||
@@ -308,7 +327,6 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
|
||||
|
||||
"addiu %[output_height], %[output_height], -0x01 \n\t"
|
||||
MMI_ADDU(%[output_ptr], %[output_ptr], %[output_width])
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixels_per_line])
|
||||
"bnez %[output_height], 1b \n\t"
|
||||
: [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0),
|
||||
[ftmp1]"=&f"(ftmp1), [src_ptr]"+&r"(src_ptr),
|
||||
@@ -338,12 +356,12 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
|
||||
"1: \n\t"
|
||||
"gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
|
||||
"gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line])
|
||||
MMI_ADDIU(%[output_height], %[output_height], -0x01)
|
||||
"packushb %[ftmp1], %[ftmp0], %[fzero] \n\t"
|
||||
"gsswlc1 %[ftmp1], 0x03(%[output_ptr]) \n\t"
|
||||
"gsswrc1 %[ftmp1], 0x00(%[output_ptr]) \n\t"
|
||||
|
||||
MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line])
|
||||
MMI_ADDIU(%[output_height], %[output_height], -0x01)
|
||||
MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch])
|
||||
"bnez %[output_height], 1b \n\t"
|
||||
: [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0),
|
||||
@@ -386,7 +404,7 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
|
||||
} \
|
||||
} else { \
|
||||
for (i = 0; i < loop; ++i) { \
|
||||
vp8_filter_block1dc_v6_mmi(FData2 + n * 2 + i * 4, dst_ptr + i * 4, m, \
|
||||
vp8_filter_block1dc_v6_mmi(FData2 + i * 4, dst_ptr + i * 4, m, \
|
||||
dst_pitch, n * 2, VFilter); \
|
||||
} \
|
||||
} \
|
||||
|
@@ -11,28 +11,16 @@
|
||||
#include "entropy.h"
|
||||
|
||||
const int vp8_mode_contexts[6][4] = {
|
||||
{
|
||||
/* 0 */
|
||||
7, 1, 1, 143,
|
||||
},
|
||||
{
|
||||
/* 1 */
|
||||
14, 18, 14, 107,
|
||||
},
|
||||
{
|
||||
/* 2 */
|
||||
135, 64, 57, 68,
|
||||
},
|
||||
{
|
||||
/* 3 */
|
||||
60, 56, 128, 65,
|
||||
},
|
||||
{
|
||||
/* 4 */
|
||||
159, 134, 128, 34,
|
||||
},
|
||||
{
|
||||
/* 5 */
|
||||
234, 188, 128, 28,
|
||||
},
|
||||
{ /* 0 */
|
||||
7, 1, 1, 143 },
|
||||
{ /* 1 */
|
||||
14, 18, 14, 107 },
|
||||
{ /* 2 */
|
||||
135, 64, 57, 68 },
|
||||
{ /* 3 */
|
||||
60, 56, 128, 65 },
|
||||
{ /* 4 */
|
||||
159, 134, 128, 34 },
|
||||
{ /* 5 */
|
||||
234, 188, 128, 28 },
|
||||
};
|
||||
|
@@ -1,3 +1,13 @@
|
||||
##
|
||||
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
## Use of this source code is governed by a BSD-style license
|
||||
## that can be found in the LICENSE file in the root of the source
|
||||
## tree. An additional intellectual property rights grant can be found
|
||||
## in the file PATENTS. All contributing project authors may
|
||||
## be found in the AUTHORS file in the root of the source tree.
|
||||
##
|
||||
|
||||
sub vp8_common_forward_decls() {
|
||||
print <<EOF
|
||||
/*
|
||||
|
@@ -6,7 +6,7 @@
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_VP8_ENTROPYMODEDATA_H_
|
||||
#define VP8_COMMON_VP8_ENTROPYMODEDATA_H_
|
||||
|
@@ -95,9 +95,7 @@ void vp8_sixtap_predict4x4_mmx(unsigned char *src_ptr, int src_pixels_per_line,
|
||||
void vp8_sixtap_predict16x16_sse2(unsigned char *src_ptr,
|
||||
int src_pixels_per_line, int xoffset,
|
||||
int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
|
||||
) {
|
||||
int dst_pitch) {
|
||||
DECLARE_ALIGNED(16, unsigned short,
|
||||
FData2[24 * 24]); /* Temp data bufffer used in filtering */
|
||||
|
||||
@@ -236,9 +234,7 @@ extern void vp8_filter_block1d4_v6_ssse3(unsigned char *src_ptr,
|
||||
void vp8_sixtap_predict16x16_ssse3(unsigned char *src_ptr,
|
||||
int src_pixels_per_line, int xoffset,
|
||||
int yoffset, unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
|
||||
) {
|
||||
int dst_pitch) {
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[24 * 24]);
|
||||
|
||||
if (xoffset) {
|
||||
@@ -351,8 +347,8 @@ void vp8_sixtap_predict4x4_ssse3(unsigned char *src_ptr,
|
||||
yoffset);
|
||||
} else {
|
||||
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
|
||||
* yoffset==0) case correctly. Add copy function here to guarantee
|
||||
* six-tap function handles all possible offsets. */
|
||||
* yoffset==0) case correctly. Add copy function here to guarantee
|
||||
* six-tap function handles all possible offsets. */
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 4; ++r) {
|
||||
|
@@ -674,7 +674,7 @@ static unsigned int read_partition_size(VP8D_COMP *pbi,
|
||||
|
||||
static int read_is_valid(const unsigned char *start, size_t len,
|
||||
const unsigned char *end) {
|
||||
return (start + len > start && start + len <= end);
|
||||
return len != 0 && end > start && len <= (size_t)(end - start);
|
||||
}
|
||||
|
||||
static unsigned int read_available_partition_size(
|
||||
|
@@ -34,7 +34,9 @@ typedef struct {
|
||||
/* Structure used to hold all the overlaps of a macroblock. The overlaps of a
|
||||
* macroblock is further divided into block overlaps.
|
||||
*/
|
||||
typedef struct { B_OVERLAP overlaps[16]; } MB_OVERLAP;
|
||||
typedef struct {
|
||||
B_OVERLAP overlaps[16];
|
||||
} MB_OVERLAP;
|
||||
|
||||
/* Structure for keeping track of motion vectors and which reference frame they
|
||||
* refer to. Used for motion vector interpolation.
|
||||
|
@@ -31,7 +31,9 @@ typedef struct {
|
||||
void *ptr2;
|
||||
} DECODETHREAD_DATA;
|
||||
|
||||
typedef struct { MACROBLOCKD mbd; } MB_ROW_DEC;
|
||||
typedef struct {
|
||||
MACROBLOCKD mbd;
|
||||
} MB_ROW_DEC;
|
||||
|
||||
typedef struct {
|
||||
int enabled;
|
||||
|
@@ -739,24 +739,21 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
|
||||
/* Allocate memory for above_row buffers. */
|
||||
CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
|
||||
for (i = 0; i < pc->mb_rows; ++i)
|
||||
CHECK_MEM_ERROR(
|
||||
pbi->mt_yabove_row[i],
|
||||
vpx_memalign(
|
||||
16, sizeof(unsigned char) * (width + (VP8BORDERINPIXELS << 1))));
|
||||
CHECK_MEM_ERROR(pbi->mt_yabove_row[i],
|
||||
vpx_memalign(16, sizeof(unsigned char) *
|
||||
(width + (VP8BORDERINPIXELS << 1))));
|
||||
|
||||
CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
|
||||
for (i = 0; i < pc->mb_rows; ++i)
|
||||
CHECK_MEM_ERROR(
|
||||
pbi->mt_uabove_row[i],
|
||||
vpx_memalign(16,
|
||||
sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
|
||||
CHECK_MEM_ERROR(pbi->mt_uabove_row[i],
|
||||
vpx_memalign(16, sizeof(unsigned char) *
|
||||
(uv_width + VP8BORDERINPIXELS)));
|
||||
|
||||
CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
|
||||
for (i = 0; i < pc->mb_rows; ++i)
|
||||
CHECK_MEM_ERROR(
|
||||
pbi->mt_vabove_row[i],
|
||||
vpx_memalign(16,
|
||||
sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
|
||||
CHECK_MEM_ERROR(pbi->mt_vabove_row[i],
|
||||
vpx_memalign(16, sizeof(unsigned char) *
|
||||
(uv_width + VP8BORDERINPIXELS)));
|
||||
|
||||
/* Allocate memory for left_col buffers. */
|
||||
CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);
|
||||
|
@@ -9,12 +9,12 @@
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Module Title : boolhuff.h
|
||||
*
|
||||
* Description : Bool Coder header file.
|
||||
*
|
||||
****************************************************************************/
|
||||
*
|
||||
* Module Title : boolhuff.h
|
||||
*
|
||||
* Description : Bool Coder header file.
|
||||
*
|
||||
****************************************************************************/
|
||||
#ifndef VP8_ENCODER_BOOLHUFF_H_
|
||||
#define VP8_ENCODER_BOOLHUFF_H_
|
||||
|
||||
|
@@ -989,11 +989,11 @@ static int estimate_max_q(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats,
|
||||
bits_per_mb_at_this_q =
|
||||
vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb;
|
||||
|
||||
bits_per_mb_at_this_q = (int)(.5 +
|
||||
err_correction_factor * speed_correction *
|
||||
cpi->twopass.est_max_qcorrection_factor *
|
||||
cpi->twopass.section_max_qfactor *
|
||||
(double)bits_per_mb_at_this_q);
|
||||
bits_per_mb_at_this_q =
|
||||
(int)(.5 + err_correction_factor * speed_correction *
|
||||
cpi->twopass.est_max_qcorrection_factor *
|
||||
cpi->twopass.section_max_qfactor *
|
||||
(double)bits_per_mb_at_this_q);
|
||||
|
||||
/* Mode and motion overhead */
|
||||
/* As Q rises in real encode loop rd code will force overhead down
|
||||
@@ -1086,9 +1086,8 @@ static int estimate_cq(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats,
|
||||
vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb;
|
||||
|
||||
bits_per_mb_at_this_q =
|
||||
(int)(.5 +
|
||||
err_correction_factor * speed_correction * clip_iifactor *
|
||||
(double)bits_per_mb_at_this_q);
|
||||
(int)(.5 + err_correction_factor * speed_correction * clip_iifactor *
|
||||
(double)bits_per_mb_at_this_q);
|
||||
|
||||
/* Mode and motion overhead */
|
||||
/* As Q rises in real encode loop rd code will force overhead down
|
||||
@@ -1273,9 +1272,8 @@ void vp8_init_second_pass(VP8_COMP *cpi) {
|
||||
* sum duration is not. Its calculated based on the actual durations of
|
||||
* all frames from the first pass.
|
||||
*/
|
||||
vp8_new_framerate(cpi,
|
||||
10000000.0 * cpi->twopass.total_stats.count /
|
||||
cpi->twopass.total_stats.duration);
|
||||
vp8_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count /
|
||||
cpi->twopass.total_stats.duration);
|
||||
|
||||
cpi->output_framerate = cpi->framerate;
|
||||
cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration *
|
||||
@@ -1739,10 +1737,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
/* Dont break out very close to a key frame */
|
||||
((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) &&
|
||||
((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) &&
|
||||
(!flash_detected) && ((mv_ratio_accumulator > 100.0) ||
|
||||
(abs_mv_in_out_accumulator > 3.0) ||
|
||||
(mv_in_out_accumulator < -2.0) ||
|
||||
((boost_score - old_boost_score) < 2.0)))) {
|
||||
(!flash_detected) &&
|
||||
((mv_ratio_accumulator > 100.0) ||
|
||||
(abs_mv_in_out_accumulator > 3.0) ||
|
||||
(mv_in_out_accumulator < -2.0) ||
|
||||
((boost_score - old_boost_score) < 2.0)))) {
|
||||
boost_score = old_boost_score;
|
||||
break;
|
||||
}
|
||||
@@ -1815,8 +1814,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
(next_frame.pcnt_inter > 0.75) &&
|
||||
((mv_in_out_accumulator / (double)i > -0.2) ||
|
||||
(mv_in_out_accumulator > -2.0)) &&
|
||||
(cpi->gfu_boost > 100) && (cpi->twopass.gf_decay_rate <=
|
||||
(ARF_DECAY_THRESH + (cpi->gfu_boost / 200))))
|
||||
(cpi->gfu_boost > 100) &&
|
||||
(cpi->twopass.gf_decay_rate <=
|
||||
(ARF_DECAY_THRESH + (cpi->gfu_boost / 200))))
|
||||
#endif
|
||||
{
|
||||
int Boost;
|
||||
|
@@ -2862,7 +2862,6 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
|
||||
fclose(yframe);
|
||||
}
|
||||
#endif
|
||||
/* return of 0 means drop frame */
|
||||
|
||||
#if !CONFIG_REALTIME_ONLY
|
||||
/* Function to test for conditions that indeicate we should loop
|
||||
@@ -3364,11 +3363,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
|
||||
(LOWER_RES_FRAME_INFO *)cpi->oxcf.mr_low_res_mode_info;
|
||||
|
||||
if (cpi->oxcf.mr_encoder_id) {
|
||||
// TODO(marpan): This constraint shouldn't be needed, as we would like
|
||||
// to allow for key frame setting (forced or periodic) defined per
|
||||
// spatial layer. For now, keep this in.
|
||||
cm->frame_type = low_res_frame_info->frame_type;
|
||||
|
||||
// Check if lower resolution is available for motion vector reuse.
|
||||
if (cm->frame_type != KEY_FRAME) {
|
||||
cpi->mr_low_res_mv_avail = 1;
|
||||
@@ -3393,7 +3387,16 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
|
||||
== low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]);
|
||||
*/
|
||||
}
|
||||
// Disable motion vector reuse (i.e., disable any usage of the low_res)
|
||||
// if the previous lower stream is skipped/disabled.
|
||||
if (low_res_frame_info->skip_encoding_prev_stream) {
|
||||
cpi->mr_low_res_mv_avail = 0;
|
||||
}
|
||||
}
|
||||
// This stream is not skipped (i.e., it's being encoded), so set this skip
|
||||
// flag to 0. This is needed for the next stream (i.e., which is the next
|
||||
// frame to be encoded).
|
||||
low_res_frame_info->skip_encoding_prev_stream = 0;
|
||||
|
||||
// On a key frame: For the lowest resolution, keep track of the key frame
|
||||
// counter value. For the higher resolutions, reset the current video
|
||||
@@ -3799,7 +3802,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
|
||||
|
||||
/* Setup background Q adjustment for error resilient mode.
|
||||
* For multi-layer encodes only enable this for the base layer.
|
||||
*/
|
||||
*/
|
||||
if (cpi->cyclic_refresh_mode_enabled) {
|
||||
// Special case for screen_content_mode with golden frame updates.
|
||||
int disable_cr_gf =
|
||||
@@ -4782,8 +4785,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
|
||||
cpi->temporal_pattern_counter++;
|
||||
}
|
||||
|
||||
/* reset to normal state now that we are done. */
|
||||
|
||||
#if 0
|
||||
{
|
||||
char filename[512];
|
||||
@@ -4999,10 +5000,13 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags,
|
||||
// be received for that high layer, which will yield an incorrect
|
||||
// frame rate (from time-stamp adjustment in above calculation).
|
||||
if (cpi->oxcf.mr_encoder_id) {
|
||||
cpi->ref_framerate = low_res_frame_info->low_res_framerate;
|
||||
if (!low_res_frame_info->skip_encoding_base_stream)
|
||||
cpi->ref_framerate = low_res_frame_info->low_res_framerate;
|
||||
} else {
|
||||
// Keep track of frame rate for lowest resolution.
|
||||
low_res_frame_info->low_res_framerate = cpi->ref_framerate;
|
||||
// The base stream is being encoded so set skip flag to 0.
|
||||
low_res_frame_info->skip_encoding_base_stream = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@@ -741,10 +741,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
|
||||
|
||||
/* If the frame has big static background and current MB is in low
|
||||
* motion area, its mode decision is biased to ZEROMV mode.
|
||||
* No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12).
|
||||
* At such speed settings, ZEROMV is already heavily favored.
|
||||
*/
|
||||
* motion area, its mode decision is biased to ZEROMV mode.
|
||||
* No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12).
|
||||
* At such speed settings, ZEROMV is already heavily favored.
|
||||
*/
|
||||
if (cpi->Speed < 12) {
|
||||
calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment);
|
||||
}
|
||||
|
@@ -996,7 +996,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) {
|
||||
* bits on this frame even if it is a contructed arf.
|
||||
* The active maximum quantizer insures that an appropriate
|
||||
* number of bits will be spent if needed for contstructed ARFs.
|
||||
*/
|
||||
*/
|
||||
cpi->this_frame_target = 0;
|
||||
}
|
||||
|
||||
@@ -1052,9 +1052,8 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) {
|
||||
* overflow when values are large
|
||||
*/
|
||||
projected_size_based_on_q =
|
||||
(int)(((.5 +
|
||||
rate_correction_factor *
|
||||
vp8_bits_per_mb[cpi->common.frame_type][Q]) *
|
||||
(int)(((.5 + rate_correction_factor *
|
||||
vp8_bits_per_mb[cpi->common.frame_type][Q]) *
|
||||
cpi->common.MBs) /
|
||||
(1 << BPER_MB_NORMBITS));
|
||||
|
||||
|
@@ -23,6 +23,7 @@
|
||||
#include "modecosts.h"
|
||||
#include "encodeintra.h"
|
||||
#include "pickinter.h"
|
||||
#include "vp8/common/common.h"
|
||||
#include "vp8/common/entropymode.h"
|
||||
#include "vp8/common/reconinter.h"
|
||||
#include "vp8/common/reconintra.h"
|
||||
@@ -769,9 +770,9 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
|
||||
vp8_quantize_mbuv(x);
|
||||
|
||||
rate_to = rd_cost_mbuv(x);
|
||||
this_rate = rate_to +
|
||||
x->intra_uv_mode_cost[xd->frame_type]
|
||||
[xd->mode_info_context->mbmi.uv_mode];
|
||||
this_rate =
|
||||
rate_to + x->intra_uv_mode_cost[xd->frame_type]
|
||||
[xd->mode_info_context->mbmi.uv_mode];
|
||||
|
||||
this_distortion = vp8_mbuverror(x) / 4;
|
||||
|
||||
@@ -959,19 +960,13 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
|
||||
vp8_variance_fn_ptr_t *v_fn_ptr;
|
||||
|
||||
ENTROPY_CONTEXT_PLANES t_above, t_left;
|
||||
ENTROPY_CONTEXT *ta;
|
||||
ENTROPY_CONTEXT *tl;
|
||||
ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
|
||||
ENTROPY_CONTEXT *ta_b;
|
||||
ENTROPY_CONTEXT *tl_b;
|
||||
|
||||
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
ta_b = (ENTROPY_CONTEXT *)&t_above_b;
|
||||
tl_b = (ENTROPY_CONTEXT *)&t_left_b;
|
||||
vp8_zero(t_above_b);
|
||||
vp8_zero(t_left_b);
|
||||
|
||||
br = 0;
|
||||
bd = 0;
|
||||
@@ -1151,13 +1146,13 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
|
||||
mode_selected = this_mode;
|
||||
best_label_rd = this_rd;
|
||||
|
||||
memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above_b, &t_above_s, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left_b, &t_left_s, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
}
|
||||
} /*for each 4x4 mode*/
|
||||
|
||||
memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, &t_above_b, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, &t_left_b, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
|
||||
bsi->ref_mv, x->mvcost);
|
||||
|
@@ -56,8 +56,7 @@ static INLINE unsigned int vp8_cost_branch(const unsigned int ct[2],
|
||||
|
||||
static void vp8_treed_write(vp8_writer *const w, vp8_tree t,
|
||||
const vp8_prob *const p, int v,
|
||||
int n /* number of bits in v, assumed nonzero */
|
||||
) {
|
||||
int n) { /* number of bits in v, assumed nonzero */
|
||||
vp8_tree_index i = 0;
|
||||
|
||||
do {
|
||||
@@ -73,8 +72,7 @@ static INLINE void vp8_write_token(vp8_writer *const w, vp8_tree t,
|
||||
}
|
||||
|
||||
static int vp8_treed_cost(vp8_tree t, const vp8_prob *const p, int v,
|
||||
int n /* number of bits in v, assumed nonzero */
|
||||
) {
|
||||
int n) { /* number of bits in v, assumed nonzero */
|
||||
int c = 0;
|
||||
vp8_tree_index i = 0;
|
||||
|
||||
|
@@ -802,7 +802,20 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
|
||||
unsigned long deadline) {
|
||||
vpx_codec_err_t res = VPX_CODEC_OK;
|
||||
|
||||
if (!ctx->cfg.rc_target_bitrate) return res;
|
||||
if (!ctx->cfg.rc_target_bitrate) {
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
if (!ctx->cpi) return VPX_CODEC_ERROR;
|
||||
if (ctx->cpi->oxcf.mr_total_resolutions > 1) {
|
||||
LOWER_RES_FRAME_INFO *low_res_frame_info =
|
||||
(LOWER_RES_FRAME_INFO *)ctx->cpi->oxcf.mr_low_res_mode_info;
|
||||
if (!low_res_frame_info) return VPX_CODEC_ERROR;
|
||||
low_res_frame_info->skip_encoding_prev_stream = 1;
|
||||
if (ctx->cpi->oxcf.mr_encoder_id == 0)
|
||||
low_res_frame_info->skip_encoding_base_stream = 1;
|
||||
}
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
if (img) res = validate_img(ctx, img);
|
||||
|
||||
@@ -902,6 +915,8 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
|
||||
(unsigned long)((delta * ctx->cfg.g_timebase.den + round) /
|
||||
ctx->cfg.g_timebase.num / 10000000);
|
||||
pkt.data.frame.flags = lib_flags << 16;
|
||||
pkt.data.frame.width[0] = cpi->common.Width;
|
||||
pkt.data.frame.height[0] = cpi->common.Height;
|
||||
|
||||
if (lib_flags & FRAMEFLAGS_KEY) {
|
||||
pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
|
||||
@@ -1259,6 +1274,9 @@ CODEC_INTERFACE(vpx_codec_vp8_cx) = {
|
||||
vp8e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t cfg_maps; */
|
||||
vp8e_encode, /* vpx_codec_encode_fn_t encode; */
|
||||
vp8e_get_cxdata, /* vpx_codec_get_cx_data_fn_t get_cx_data; */
|
||||
vp8e_set_config, NULL, vp8e_get_preview, vp8e_mr_alloc_mem,
|
||||
vp8e_set_config,
|
||||
NULL,
|
||||
vp8e_get_preview,
|
||||
vp8e_mr_alloc_mem,
|
||||
} /* encoder functions */
|
||||
};
|
||||
|
@@ -200,9 +200,9 @@ static vpx_codec_err_t update_error_state(
|
||||
static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
|
||||
void *user_priv) {
|
||||
/** vpx_img_wrap() doesn't allow specifying independent strides for
|
||||
* the Y, U, and V planes, nor other alignment adjustments that
|
||||
* might be representable by a YV12_BUFFER_CONFIG, so we just
|
||||
* initialize all the fields.*/
|
||||
* the Y, U, and V planes, nor other alignment adjustments that
|
||||
* might be representable by a YV12_BUFFER_CONFIG, so we just
|
||||
* initialize all the fields.*/
|
||||
img->fmt = VPX_IMG_FMT_I420;
|
||||
img->w = yv12->y_stride;
|
||||
img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15;
|
||||
|
160
vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c
Normal file
160
vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c
Normal file
@@ -0,0 +1,160 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/arm/neon/vp9_iht_neon.h"
|
||||
#include "vpx_dsp/arm/highbd_idct_neon.h"
|
||||
#include "vpx_dsp/arm/idct_neon.h"
|
||||
#include "vpx_dsp/arm/mem_neon.h"
|
||||
#include "vpx_dsp/txfm_common.h"
|
||||
|
||||
static INLINE void highbd_iadst4(int32x4_t *const io) {
|
||||
const int32_t sinpis[4] = { sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9 };
|
||||
const int32x4_t sinpi = vld1q_s32(sinpis);
|
||||
int32x4_t s[8];
|
||||
|
||||
s[0] = vmulq_lane_s32(io[0], vget_low_s32(sinpi), 0);
|
||||
s[1] = vmulq_lane_s32(io[0], vget_low_s32(sinpi), 1);
|
||||
s[2] = vmulq_lane_s32(io[1], vget_high_s32(sinpi), 0);
|
||||
s[3] = vmulq_lane_s32(io[2], vget_high_s32(sinpi), 1);
|
||||
s[4] = vmulq_lane_s32(io[2], vget_low_s32(sinpi), 0);
|
||||
s[5] = vmulq_lane_s32(io[3], vget_low_s32(sinpi), 1);
|
||||
s[6] = vmulq_lane_s32(io[3], vget_high_s32(sinpi), 1);
|
||||
s[7] = vsubq_s32(io[0], io[2]);
|
||||
s[7] = vaddq_s32(s[7], io[3]);
|
||||
|
||||
s[0] = vaddq_s32(s[0], s[3]);
|
||||
s[0] = vaddq_s32(s[0], s[5]);
|
||||
s[1] = vsubq_s32(s[1], s[4]);
|
||||
s[1] = vsubq_s32(s[1], s[6]);
|
||||
s[3] = s[2];
|
||||
s[2] = vmulq_lane_s32(s[7], vget_high_s32(sinpi), 0);
|
||||
|
||||
io[0] = vaddq_s32(s[0], s[3]);
|
||||
io[1] = vaddq_s32(s[1], s[3]);
|
||||
io[2] = s[2];
|
||||
io[3] = vaddq_s32(s[0], s[1]);
|
||||
io[3] = vsubq_s32(io[3], s[3]);
|
||||
io[0] = vrshrq_n_s32(io[0], DCT_CONST_BITS);
|
||||
io[1] = vrshrq_n_s32(io[1], DCT_CONST_BITS);
|
||||
io[2] = vrshrq_n_s32(io[2], DCT_CONST_BITS);
|
||||
io[3] = vrshrq_n_s32(io[3], DCT_CONST_BITS);
|
||||
}
|
||||
|
||||
void vp9_highbd_iht4x4_16_add_neon(const tran_low_t *input, uint16_t *dest,
|
||||
int stride, int tx_type, int bd) {
|
||||
const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
|
||||
int16x8_t a[2];
|
||||
int32x4_t c[4];
|
||||
|
||||
c[0] = vld1q_s32(input);
|
||||
c[1] = vld1q_s32(input + 4);
|
||||
c[2] = vld1q_s32(input + 8);
|
||||
c[3] = vld1q_s32(input + 12);
|
||||
|
||||
if (bd == 8) {
|
||||
a[0] = vcombine_s16(vmovn_s32(c[0]), vmovn_s32(c[1]));
|
||||
a[1] = vcombine_s16(vmovn_s32(c[2]), vmovn_s32(c[3]));
|
||||
transpose_s16_4x4q(&a[0], &a[1]);
|
||||
|
||||
switch (tx_type) {
|
||||
case DCT_DCT:
|
||||
idct4x4_16_kernel_bd8(a);
|
||||
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
|
||||
transpose_s16_4x4q(&a[0], &a[1]);
|
||||
idct4x4_16_kernel_bd8(a);
|
||||
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
|
||||
break;
|
||||
|
||||
case ADST_DCT:
|
||||
idct4x4_16_kernel_bd8(a);
|
||||
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
|
||||
transpose_s16_4x4q(&a[0], &a[1]);
|
||||
iadst4(a);
|
||||
break;
|
||||
|
||||
case DCT_ADST:
|
||||
iadst4(a);
|
||||
transpose_s16_4x4q(&a[0], &a[1]);
|
||||
idct4x4_16_kernel_bd8(a);
|
||||
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(tx_type == ADST_ADST);
|
||||
iadst4(a);
|
||||
transpose_s16_4x4q(&a[0], &a[1]);
|
||||
iadst4(a);
|
||||
break;
|
||||
}
|
||||
a[0] = vrshrq_n_s16(a[0], 4);
|
||||
a[1] = vrshrq_n_s16(a[1], 4);
|
||||
} else {
|
||||
switch (tx_type) {
|
||||
case DCT_DCT: {
|
||||
const int32x4_t cospis = vld1q_s32(kCospi32);
|
||||
|
||||
if (bd == 10) {
|
||||
idct4x4_16_kernel_bd10(cospis, c);
|
||||
idct4x4_16_kernel_bd10(cospis, c);
|
||||
} else {
|
||||
idct4x4_16_kernel_bd12(cospis, c);
|
||||
idct4x4_16_kernel_bd12(cospis, c);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case ADST_DCT: {
|
||||
const int32x4_t cospis = vld1q_s32(kCospi32);
|
||||
|
||||
if (bd == 10) {
|
||||
idct4x4_16_kernel_bd10(cospis, c);
|
||||
} else {
|
||||
idct4x4_16_kernel_bd12(cospis, c);
|
||||
}
|
||||
transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
|
||||
highbd_iadst4(c);
|
||||
break;
|
||||
}
|
||||
|
||||
case DCT_ADST: {
|
||||
const int32x4_t cospis = vld1q_s32(kCospi32);
|
||||
|
||||
transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
|
||||
highbd_iadst4(c);
|
||||
if (bd == 10) {
|
||||
idct4x4_16_kernel_bd10(cospis, c);
|
||||
} else {
|
||||
idct4x4_16_kernel_bd12(cospis, c);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
assert(tx_type == ADST_ADST);
|
||||
transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
|
||||
highbd_iadst4(c);
|
||||
transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
|
||||
highbd_iadst4(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
a[0] = vcombine_s16(vqrshrn_n_s32(c[0], 4), vqrshrn_n_s32(c[1], 4));
|
||||
a[1] = vcombine_s16(vqrshrn_n_s32(c[2], 4), vqrshrn_n_s32(c[3], 4));
|
||||
}
|
||||
|
||||
highbd_idct4x4_1_add_kernel1(&dest, stride, a[0], max);
|
||||
highbd_idct4x4_1_add_kernel1(&dest, stride, a[1], max);
|
||||
}
|
@@ -14,206 +14,63 @@
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/arm/neon/vp9_iht_neon.h"
|
||||
#include "vpx_dsp/arm/idct_neon.h"
|
||||
#include "vpx_dsp/arm/mem_neon.h"
|
||||
#include "vpx_dsp/txfm_common.h"
|
||||
|
||||
static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
|
||||
int32x4_t q8s32, q9s32;
|
||||
int16x4x2_t d0x2s16, d1x2s16;
|
||||
int32x4x2_t q0x2s32;
|
||||
|
||||
d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16));
|
||||
d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16));
|
||||
|
||||
q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]));
|
||||
q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]));
|
||||
q0x2s32 = vtrnq_s32(q8s32, q9s32);
|
||||
|
||||
*q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]);
|
||||
*q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]);
|
||||
}
|
||||
|
||||
static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16,
|
||||
int16x4_t *d2s16) {
|
||||
*d0s16 = vdup_n_s16(cospi_8_64);
|
||||
*d1s16 = vdup_n_s16(cospi_16_64);
|
||||
*d2s16 = vdup_n_s16(cospi_24_64);
|
||||
}
|
||||
|
||||
static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16,
|
||||
int16x4_t *d5s16, int16x8_t *q3s16) {
|
||||
*d3s16 = vdup_n_s16(sinpi_1_9);
|
||||
*d4s16 = vdup_n_s16(sinpi_2_9);
|
||||
*q3s16 = vdupq_n_s16(sinpi_3_9);
|
||||
*d5s16 = vdup_n_s16(sinpi_4_9);
|
||||
}
|
||||
|
||||
static INLINE void IDCT4x4_1D(int16x4_t *d0s16, int16x4_t *d1s16,
|
||||
int16x4_t *d2s16, int16x8_t *q8s16,
|
||||
int16x8_t *q9s16) {
|
||||
int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16;
|
||||
int16x4_t d26s16, d27s16, d28s16, d29s16;
|
||||
int32x4_t q10s32, q13s32, q14s32, q15s32;
|
||||
int16x8_t q13s16, q14s16;
|
||||
|
||||
d16s16 = vget_low_s16(*q8s16);
|
||||
d17s16 = vget_high_s16(*q8s16);
|
||||
d18s16 = vget_low_s16(*q9s16);
|
||||
d19s16 = vget_high_s16(*q9s16);
|
||||
|
||||
d23s16 = vadd_s16(d16s16, d18s16);
|
||||
d24s16 = vsub_s16(d16s16, d18s16);
|
||||
|
||||
q15s32 = vmull_s16(d17s16, *d2s16);
|
||||
q10s32 = vmull_s16(d17s16, *d0s16);
|
||||
q13s32 = vmull_s16(d23s16, *d1s16);
|
||||
q14s32 = vmull_s16(d24s16, *d1s16);
|
||||
q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16);
|
||||
q10s32 = vmlal_s16(q10s32, d19s16, *d2s16);
|
||||
|
||||
d26s16 = vrshrn_n_s32(q13s32, 14);
|
||||
d27s16 = vrshrn_n_s32(q14s32, 14);
|
||||
d29s16 = vrshrn_n_s32(q15s32, 14);
|
||||
d28s16 = vrshrn_n_s32(q10s32, 14);
|
||||
|
||||
q13s16 = vcombine_s16(d26s16, d27s16);
|
||||
q14s16 = vcombine_s16(d28s16, d29s16);
|
||||
*q8s16 = vaddq_s16(q13s16, q14s16);
|
||||
*q9s16 = vsubq_s16(q13s16, q14s16);
|
||||
*q9s16 = vcombine_s16(vget_high_s16(*q9s16), vget_low_s16(*q9s16)); // vswp
|
||||
}
|
||||
|
||||
static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16,
|
||||
int16x4_t *d5s16, int16x8_t *q3s16,
|
||||
int16x8_t *q8s16, int16x8_t *q9s16) {
|
||||
int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16;
|
||||
int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
|
||||
|
||||
d6s16 = vget_low_s16(*q3s16);
|
||||
|
||||
d16s16 = vget_low_s16(*q8s16);
|
||||
d17s16 = vget_high_s16(*q8s16);
|
||||
d18s16 = vget_low_s16(*q9s16);
|
||||
d19s16 = vget_high_s16(*q9s16);
|
||||
|
||||
q10s32 = vmull_s16(*d3s16, d16s16);
|
||||
q11s32 = vmull_s16(*d4s16, d16s16);
|
||||
q12s32 = vmull_s16(d6s16, d17s16);
|
||||
q13s32 = vmull_s16(*d5s16, d18s16);
|
||||
q14s32 = vmull_s16(*d3s16, d18s16);
|
||||
q15s32 = vmovl_s16(d16s16);
|
||||
q15s32 = vaddw_s16(q15s32, d19s16);
|
||||
q8s32 = vmull_s16(*d4s16, d19s16);
|
||||
q15s32 = vsubw_s16(q15s32, d18s16);
|
||||
q9s32 = vmull_s16(*d5s16, d19s16);
|
||||
|
||||
q10s32 = vaddq_s32(q10s32, q13s32);
|
||||
q10s32 = vaddq_s32(q10s32, q8s32);
|
||||
q11s32 = vsubq_s32(q11s32, q14s32);
|
||||
q8s32 = vdupq_n_s32(sinpi_3_9);
|
||||
q11s32 = vsubq_s32(q11s32, q9s32);
|
||||
q15s32 = vmulq_s32(q15s32, q8s32);
|
||||
|
||||
q13s32 = vaddq_s32(q10s32, q12s32);
|
||||
q10s32 = vaddq_s32(q10s32, q11s32);
|
||||
q14s32 = vaddq_s32(q11s32, q12s32);
|
||||
q10s32 = vsubq_s32(q10s32, q12s32);
|
||||
|
||||
d16s16 = vrshrn_n_s32(q13s32, 14);
|
||||
d17s16 = vrshrn_n_s32(q14s32, 14);
|
||||
d18s16 = vrshrn_n_s32(q15s32, 14);
|
||||
d19s16 = vrshrn_n_s32(q10s32, 14);
|
||||
|
||||
*q8s16 = vcombine_s16(d16s16, d17s16);
|
||||
*q9s16 = vcombine_s16(d18s16, d19s16);
|
||||
}
|
||||
|
||||
void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int tx_type) {
|
||||
uint8x8_t d26u8, d27u8;
|
||||
int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16;
|
||||
uint32x2_t d26u32, d27u32;
|
||||
int16x8_t q3s16, q8s16, q9s16;
|
||||
uint16x8_t q8u16, q9u16;
|
||||
int16x8_t a[2];
|
||||
uint8x8_t s[2], d[2];
|
||||
uint16x8_t sum[2];
|
||||
|
||||
d26u32 = d27u32 = vdup_n_u32(0);
|
||||
assert(!((intptr_t)dest % sizeof(uint32_t)));
|
||||
assert(!(stride % sizeof(uint32_t)));
|
||||
|
||||
q8s16 = vld1q_s16(input);
|
||||
q9s16 = vld1q_s16(input + 8);
|
||||
|
||||
TRANSPOSE4X4(&q8s16, &q9s16);
|
||||
a[0] = load_tran_low_to_s16q(input);
|
||||
a[1] = load_tran_low_to_s16q(input + 8);
|
||||
transpose_s16_4x4q(&a[0], &a[1]);
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // idct_idct is not supported. Fall back to C
|
||||
vp9_iht4x4_16_add_c(input, dest, stride, tx_type);
|
||||
return;
|
||||
case 1: // iadst_idct
|
||||
// generate constants
|
||||
GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
|
||||
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
|
||||
|
||||
// first transform rows
|
||||
IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
|
||||
|
||||
// transpose the matrix
|
||||
TRANSPOSE4X4(&q8s16, &q9s16);
|
||||
|
||||
// then transform columns
|
||||
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
|
||||
case DCT_DCT:
|
||||
idct4x4_16_kernel_bd8(a);
|
||||
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
|
||||
transpose_s16_4x4q(&a[0], &a[1]);
|
||||
idct4x4_16_kernel_bd8(a);
|
||||
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
|
||||
break;
|
||||
case 2: // idct_iadst
|
||||
// generate constantsyy
|
||||
GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
|
||||
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
|
||||
|
||||
// first transform rows
|
||||
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
|
||||
|
||||
// transpose the matrix
|
||||
TRANSPOSE4X4(&q8s16, &q9s16);
|
||||
|
||||
// then transform columns
|
||||
IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
|
||||
case ADST_DCT:
|
||||
idct4x4_16_kernel_bd8(a);
|
||||
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
|
||||
transpose_s16_4x4q(&a[0], &a[1]);
|
||||
iadst4(a);
|
||||
break;
|
||||
case 3: // iadst_iadst
|
||||
// generate constants
|
||||
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
|
||||
|
||||
// first transform rows
|
||||
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
|
||||
|
||||
// transpose the matrix
|
||||
TRANSPOSE4X4(&q8s16, &q9s16);
|
||||
|
||||
// then transform columns
|
||||
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
|
||||
case DCT_ADST:
|
||||
iadst4(a);
|
||||
transpose_s16_4x4q(&a[0], &a[1]);
|
||||
idct4x4_16_kernel_bd8(a);
|
||||
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
|
||||
break;
|
||||
default: // iadst_idct
|
||||
assert(0);
|
||||
|
||||
default:
|
||||
assert(tx_type == ADST_ADST);
|
||||
iadst4(a);
|
||||
transpose_s16_4x4q(&a[0], &a[1]);
|
||||
iadst4(a);
|
||||
break;
|
||||
}
|
||||
|
||||
q8s16 = vrshrq_n_s16(q8s16, 4);
|
||||
q9s16 = vrshrq_n_s16(q9s16, 4);
|
||||
|
||||
d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0);
|
||||
dest += stride;
|
||||
d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1);
|
||||
dest += stride;
|
||||
d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0);
|
||||
dest += stride;
|
||||
d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1);
|
||||
|
||||
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32));
|
||||
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32));
|
||||
|
||||
d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
|
||||
d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
|
||||
|
||||
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1);
|
||||
dest -= stride;
|
||||
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0);
|
||||
dest -= stride;
|
||||
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1);
|
||||
dest -= stride;
|
||||
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0);
|
||||
a[0] = vrshrq_n_s16(a[0], 4);
|
||||
a[1] = vrshrq_n_s16(a[1], 4);
|
||||
s[0] = load_u8(dest, stride);
|
||||
s[1] = load_u8(dest + 2 * stride, stride);
|
||||
sum[0] = vaddw_u8(vreinterpretq_u16_s16(a[0]), s[0]);
|
||||
sum[1] = vaddw_u8(vreinterpretq_u16_s16(a[1]), s[1]);
|
||||
d[0] = vqmovun_s16(vreinterpretq_s16_u16(sum[0]));
|
||||
d[1] = vqmovun_s16(vreinterpretq_s16_u16(sum[1]));
|
||||
store_u8(dest, stride, d[0]);
|
||||
store_u8(dest + 2 * stride, stride, d[1]);
|
||||
}
|
||||
|
@@ -14,527 +14,199 @@
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vpx_dsp/arm/idct_neon.h"
|
||||
#include "vpx_dsp/arm/mem_neon.h"
|
||||
#include "vpx_dsp/arm/transpose_neon.h"
|
||||
|
||||
static int16_t cospi_2_64 = 16305;
|
||||
static int16_t cospi_4_64 = 16069;
|
||||
static int16_t cospi_6_64 = 15679;
|
||||
static int16_t cospi_8_64 = 15137;
|
||||
static int16_t cospi_10_64 = 14449;
|
||||
static int16_t cospi_12_64 = 13623;
|
||||
static int16_t cospi_14_64 = 12665;
|
||||
static int16_t cospi_16_64 = 11585;
|
||||
static int16_t cospi_18_64 = 10394;
|
||||
static int16_t cospi_20_64 = 9102;
|
||||
static int16_t cospi_22_64 = 7723;
|
||||
static int16_t cospi_24_64 = 6270;
|
||||
static int16_t cospi_26_64 = 4756;
|
||||
static int16_t cospi_28_64 = 3196;
|
||||
static int16_t cospi_30_64 = 1606;
|
||||
static INLINE void iadst_half_butterfly_neon(int16x8_t *const x,
|
||||
const int16x4_t c) {
|
||||
const int16x8_t sum = vaddq_s16(x[0], x[1]);
|
||||
const int16x8_t sub = vsubq_s16(x[0], x[1]);
|
||||
int32x4_t t0[2], t1[2];
|
||||
|
||||
static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
|
||||
int16x8_t *q10s16, int16x8_t *q11s16,
|
||||
int16x8_t *q12s16, int16x8_t *q13s16,
|
||||
int16x8_t *q14s16, int16x8_t *q15s16) {
|
||||
int16x4_t d0s16, d1s16, d2s16, d3s16;
|
||||
int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
|
||||
int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
|
||||
int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
|
||||
int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
|
||||
int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
|
||||
int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
|
||||
|
||||
d0s16 = vdup_n_s16(cospi_28_64);
|
||||
d1s16 = vdup_n_s16(cospi_4_64);
|
||||
d2s16 = vdup_n_s16(cospi_12_64);
|
||||
d3s16 = vdup_n_s16(cospi_20_64);
|
||||
|
||||
d16s16 = vget_low_s16(*q8s16);
|
||||
d17s16 = vget_high_s16(*q8s16);
|
||||
d18s16 = vget_low_s16(*q9s16);
|
||||
d19s16 = vget_high_s16(*q9s16);
|
||||
d20s16 = vget_low_s16(*q10s16);
|
||||
d21s16 = vget_high_s16(*q10s16);
|
||||
d22s16 = vget_low_s16(*q11s16);
|
||||
d23s16 = vget_high_s16(*q11s16);
|
||||
d24s16 = vget_low_s16(*q12s16);
|
||||
d25s16 = vget_high_s16(*q12s16);
|
||||
d26s16 = vget_low_s16(*q13s16);
|
||||
d27s16 = vget_high_s16(*q13s16);
|
||||
d28s16 = vget_low_s16(*q14s16);
|
||||
d29s16 = vget_high_s16(*q14s16);
|
||||
d30s16 = vget_low_s16(*q15s16);
|
||||
d31s16 = vget_high_s16(*q15s16);
|
||||
|
||||
q2s32 = vmull_s16(d18s16, d0s16);
|
||||
q3s32 = vmull_s16(d19s16, d0s16);
|
||||
q5s32 = vmull_s16(d26s16, d2s16);
|
||||
q6s32 = vmull_s16(d27s16, d2s16);
|
||||
|
||||
q2s32 = vmlsl_s16(q2s32, d30s16, d1s16);
|
||||
q3s32 = vmlsl_s16(q3s32, d31s16, d1s16);
|
||||
q5s32 = vmlsl_s16(q5s32, d22s16, d3s16);
|
||||
q6s32 = vmlsl_s16(q6s32, d23s16, d3s16);
|
||||
|
||||
d8s16 = vrshrn_n_s32(q2s32, 14);
|
||||
d9s16 = vrshrn_n_s32(q3s32, 14);
|
||||
d10s16 = vrshrn_n_s32(q5s32, 14);
|
||||
d11s16 = vrshrn_n_s32(q6s32, 14);
|
||||
q4s16 = vcombine_s16(d8s16, d9s16);
|
||||
q5s16 = vcombine_s16(d10s16, d11s16);
|
||||
|
||||
q2s32 = vmull_s16(d18s16, d1s16);
|
||||
q3s32 = vmull_s16(d19s16, d1s16);
|
||||
q9s32 = vmull_s16(d26s16, d3s16);
|
||||
q13s32 = vmull_s16(d27s16, d3s16);
|
||||
|
||||
q2s32 = vmlal_s16(q2s32, d30s16, d0s16);
|
||||
q3s32 = vmlal_s16(q3s32, d31s16, d0s16);
|
||||
q9s32 = vmlal_s16(q9s32, d22s16, d2s16);
|
||||
q13s32 = vmlal_s16(q13s32, d23s16, d2s16);
|
||||
|
||||
d14s16 = vrshrn_n_s32(q2s32, 14);
|
||||
d15s16 = vrshrn_n_s32(q3s32, 14);
|
||||
d12s16 = vrshrn_n_s32(q9s32, 14);
|
||||
d13s16 = vrshrn_n_s32(q13s32, 14);
|
||||
q6s16 = vcombine_s16(d12s16, d13s16);
|
||||
q7s16 = vcombine_s16(d14s16, d15s16);
|
||||
|
||||
d0s16 = vdup_n_s16(cospi_16_64);
|
||||
|
||||
q2s32 = vmull_s16(d16s16, d0s16);
|
||||
q3s32 = vmull_s16(d17s16, d0s16);
|
||||
q13s32 = vmull_s16(d16s16, d0s16);
|
||||
q15s32 = vmull_s16(d17s16, d0s16);
|
||||
|
||||
q2s32 = vmlal_s16(q2s32, d24s16, d0s16);
|
||||
q3s32 = vmlal_s16(q3s32, d25s16, d0s16);
|
||||
q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
|
||||
q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
|
||||
|
||||
d0s16 = vdup_n_s16(cospi_24_64);
|
||||
d1s16 = vdup_n_s16(cospi_8_64);
|
||||
|
||||
d18s16 = vrshrn_n_s32(q2s32, 14);
|
||||
d19s16 = vrshrn_n_s32(q3s32, 14);
|
||||
d22s16 = vrshrn_n_s32(q13s32, 14);
|
||||
d23s16 = vrshrn_n_s32(q15s32, 14);
|
||||
*q9s16 = vcombine_s16(d18s16, d19s16);
|
||||
*q11s16 = vcombine_s16(d22s16, d23s16);
|
||||
|
||||
q2s32 = vmull_s16(d20s16, d0s16);
|
||||
q3s32 = vmull_s16(d21s16, d0s16);
|
||||
q8s32 = vmull_s16(d20s16, d1s16);
|
||||
q12s32 = vmull_s16(d21s16, d1s16);
|
||||
|
||||
q2s32 = vmlsl_s16(q2s32, d28s16, d1s16);
|
||||
q3s32 = vmlsl_s16(q3s32, d29s16, d1s16);
|
||||
q8s32 = vmlal_s16(q8s32, d28s16, d0s16);
|
||||
q12s32 = vmlal_s16(q12s32, d29s16, d0s16);
|
||||
|
||||
d26s16 = vrshrn_n_s32(q2s32, 14);
|
||||
d27s16 = vrshrn_n_s32(q3s32, 14);
|
||||
d30s16 = vrshrn_n_s32(q8s32, 14);
|
||||
d31s16 = vrshrn_n_s32(q12s32, 14);
|
||||
*q13s16 = vcombine_s16(d26s16, d27s16);
|
||||
*q15s16 = vcombine_s16(d30s16, d31s16);
|
||||
|
||||
q0s16 = vaddq_s16(*q9s16, *q15s16);
|
||||
q1s16 = vaddq_s16(*q11s16, *q13s16);
|
||||
q2s16 = vsubq_s16(*q11s16, *q13s16);
|
||||
q3s16 = vsubq_s16(*q9s16, *q15s16);
|
||||
|
||||
*q13s16 = vsubq_s16(q4s16, q5s16);
|
||||
q4s16 = vaddq_s16(q4s16, q5s16);
|
||||
*q14s16 = vsubq_s16(q7s16, q6s16);
|
||||
q7s16 = vaddq_s16(q7s16, q6s16);
|
||||
d26s16 = vget_low_s16(*q13s16);
|
||||
d27s16 = vget_high_s16(*q13s16);
|
||||
d28s16 = vget_low_s16(*q14s16);
|
||||
d29s16 = vget_high_s16(*q14s16);
|
||||
|
||||
d16s16 = vdup_n_s16(cospi_16_64);
|
||||
|
||||
q9s32 = vmull_s16(d28s16, d16s16);
|
||||
q10s32 = vmull_s16(d29s16, d16s16);
|
||||
q11s32 = vmull_s16(d28s16, d16s16);
|
||||
q12s32 = vmull_s16(d29s16, d16s16);
|
||||
|
||||
q9s32 = vmlsl_s16(q9s32, d26s16, d16s16);
|
||||
q10s32 = vmlsl_s16(q10s32, d27s16, d16s16);
|
||||
q11s32 = vmlal_s16(q11s32, d26s16, d16s16);
|
||||
q12s32 = vmlal_s16(q12s32, d27s16, d16s16);
|
||||
|
||||
d10s16 = vrshrn_n_s32(q9s32, 14);
|
||||
d11s16 = vrshrn_n_s32(q10s32, 14);
|
||||
d12s16 = vrshrn_n_s32(q11s32, 14);
|
||||
d13s16 = vrshrn_n_s32(q12s32, 14);
|
||||
q5s16 = vcombine_s16(d10s16, d11s16);
|
||||
q6s16 = vcombine_s16(d12s16, d13s16);
|
||||
|
||||
*q8s16 = vaddq_s16(q0s16, q7s16);
|
||||
*q9s16 = vaddq_s16(q1s16, q6s16);
|
||||
*q10s16 = vaddq_s16(q2s16, q5s16);
|
||||
*q11s16 = vaddq_s16(q3s16, q4s16);
|
||||
*q12s16 = vsubq_s16(q3s16, q4s16);
|
||||
*q13s16 = vsubq_s16(q2s16, q5s16);
|
||||
*q14s16 = vsubq_s16(q1s16, q6s16);
|
||||
*q15s16 = vsubq_s16(q0s16, q7s16);
|
||||
t0[0] = vmull_lane_s16(vget_low_s16(sum), c, 0);
|
||||
t0[1] = vmull_lane_s16(vget_high_s16(sum), c, 0);
|
||||
t1[0] = vmull_lane_s16(vget_low_s16(sub), c, 0);
|
||||
t1[1] = vmull_lane_s16(vget_high_s16(sub), c, 0);
|
||||
x[0] = dct_const_round_shift_low_8(t0);
|
||||
x[1] = dct_const_round_shift_low_8(t1);
|
||||
}
|
||||
|
||||
static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
|
||||
int16x8_t *q10s16, int16x8_t *q11s16,
|
||||
int16x8_t *q12s16, int16x8_t *q13s16,
|
||||
int16x8_t *q14s16, int16x8_t *q15s16) {
|
||||
int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16;
|
||||
int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
|
||||
int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
|
||||
int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
|
||||
int16x8_t q2s16, q4s16, q5s16, q6s16;
|
||||
int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32;
|
||||
int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
|
||||
static INLINE void iadst_butterfly_lane_0_1_neon(const int16x8_t in0,
|
||||
const int16x8_t in1,
|
||||
const int16x4_t c,
|
||||
int32x4_t *const s0,
|
||||
int32x4_t *const s1) {
|
||||
s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 0);
|
||||
s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 0);
|
||||
s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 1);
|
||||
s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 1);
|
||||
|
||||
d16s16 = vget_low_s16(*q8s16);
|
||||
d17s16 = vget_high_s16(*q8s16);
|
||||
d18s16 = vget_low_s16(*q9s16);
|
||||
d19s16 = vget_high_s16(*q9s16);
|
||||
d20s16 = vget_low_s16(*q10s16);
|
||||
d21s16 = vget_high_s16(*q10s16);
|
||||
d22s16 = vget_low_s16(*q11s16);
|
||||
d23s16 = vget_high_s16(*q11s16);
|
||||
d24s16 = vget_low_s16(*q12s16);
|
||||
d25s16 = vget_high_s16(*q12s16);
|
||||
d26s16 = vget_low_s16(*q13s16);
|
||||
d27s16 = vget_high_s16(*q13s16);
|
||||
d28s16 = vget_low_s16(*q14s16);
|
||||
d29s16 = vget_high_s16(*q14s16);
|
||||
d30s16 = vget_low_s16(*q15s16);
|
||||
d31s16 = vget_high_s16(*q15s16);
|
||||
s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 1);
|
||||
s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 1);
|
||||
s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 0);
|
||||
s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 0);
|
||||
}
|
||||
|
||||
d14s16 = vdup_n_s16(cospi_2_64);
|
||||
d15s16 = vdup_n_s16(cospi_30_64);
|
||||
static INLINE void iadst_butterfly_lane_2_3_neon(const int16x8_t in0,
|
||||
const int16x8_t in1,
|
||||
const int16x4_t c,
|
||||
int32x4_t *const s0,
|
||||
int32x4_t *const s1) {
|
||||
s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
|
||||
s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
|
||||
s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
|
||||
s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
|
||||
|
||||
q1s32 = vmull_s16(d30s16, d14s16);
|
||||
q2s32 = vmull_s16(d31s16, d14s16);
|
||||
q3s32 = vmull_s16(d30s16, d15s16);
|
||||
q4s32 = vmull_s16(d31s16, d15s16);
|
||||
s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 3);
|
||||
s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 3);
|
||||
s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 2);
|
||||
s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 2);
|
||||
}
|
||||
|
||||
d30s16 = vdup_n_s16(cospi_18_64);
|
||||
d31s16 = vdup_n_s16(cospi_14_64);
|
||||
static INLINE void iadst_butterfly_lane_3_2_neon(const int16x8_t in0,
|
||||
const int16x8_t in1,
|
||||
const int16x4_t c,
|
||||
int32x4_t *const s0,
|
||||
int32x4_t *const s1) {
|
||||
s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
|
||||
s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
|
||||
s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
|
||||
s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
|
||||
|
||||
q1s32 = vmlal_s16(q1s32, d16s16, d15s16);
|
||||
q2s32 = vmlal_s16(q2s32, d17s16, d15s16);
|
||||
q3s32 = vmlsl_s16(q3s32, d16s16, d14s16);
|
||||
q4s32 = vmlsl_s16(q4s32, d17s16, d14s16);
|
||||
s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 2);
|
||||
s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 2);
|
||||
s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 3);
|
||||
s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 3);
|
||||
}
|
||||
|
||||
q5s32 = vmull_s16(d22s16, d30s16);
|
||||
q6s32 = vmull_s16(d23s16, d30s16);
|
||||
q7s32 = vmull_s16(d22s16, d31s16);
|
||||
q8s32 = vmull_s16(d23s16, d31s16);
|
||||
static INLINE int16x8_t add_dct_const_round_shift_low_8(
|
||||
const int32x4_t *const in0, const int32x4_t *const in1) {
|
||||
int32x4_t sum[2];
|
||||
|
||||
q5s32 = vmlal_s16(q5s32, d24s16, d31s16);
|
||||
q6s32 = vmlal_s16(q6s32, d25s16, d31s16);
|
||||
q7s32 = vmlsl_s16(q7s32, d24s16, d30s16);
|
||||
q8s32 = vmlsl_s16(q8s32, d25s16, d30s16);
|
||||
sum[0] = vaddq_s32(in0[0], in1[0]);
|
||||
sum[1] = vaddq_s32(in0[1], in1[1]);
|
||||
return dct_const_round_shift_low_8(sum);
|
||||
}
|
||||
|
||||
q11s32 = vaddq_s32(q1s32, q5s32);
|
||||
q12s32 = vaddq_s32(q2s32, q6s32);
|
||||
q1s32 = vsubq_s32(q1s32, q5s32);
|
||||
q2s32 = vsubq_s32(q2s32, q6s32);
|
||||
static INLINE int16x8_t sub_dct_const_round_shift_low_8(
|
||||
const int32x4_t *const in0, const int32x4_t *const in1) {
|
||||
int32x4_t sum[2];
|
||||
|
||||
d22s16 = vrshrn_n_s32(q11s32, 14);
|
||||
d23s16 = vrshrn_n_s32(q12s32, 14);
|
||||
*q11s16 = vcombine_s16(d22s16, d23s16);
|
||||
sum[0] = vsubq_s32(in0[0], in1[0]);
|
||||
sum[1] = vsubq_s32(in0[1], in1[1]);
|
||||
return dct_const_round_shift_low_8(sum);
|
||||
}
|
||||
|
||||
q12s32 = vaddq_s32(q3s32, q7s32);
|
||||
q15s32 = vaddq_s32(q4s32, q8s32);
|
||||
q3s32 = vsubq_s32(q3s32, q7s32);
|
||||
q4s32 = vsubq_s32(q4s32, q8s32);
|
||||
static INLINE void iadst8(int16x8_t *const io) {
|
||||
const int16x4_t c0 =
|
||||
create_s16x4_neon(cospi_2_64, cospi_30_64, cospi_10_64, cospi_22_64);
|
||||
const int16x4_t c1 =
|
||||
create_s16x4_neon(cospi_18_64, cospi_14_64, cospi_26_64, cospi_6_64);
|
||||
const int16x4_t c2 =
|
||||
create_s16x4_neon(cospi_16_64, 0, cospi_8_64, cospi_24_64);
|
||||
int16x8_t x[8], t[4];
|
||||
int32x4_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
|
||||
|
||||
d2s16 = vrshrn_n_s32(q1s32, 14);
|
||||
d3s16 = vrshrn_n_s32(q2s32, 14);
|
||||
d24s16 = vrshrn_n_s32(q12s32, 14);
|
||||
d25s16 = vrshrn_n_s32(q15s32, 14);
|
||||
d6s16 = vrshrn_n_s32(q3s32, 14);
|
||||
d7s16 = vrshrn_n_s32(q4s32, 14);
|
||||
*q12s16 = vcombine_s16(d24s16, d25s16);
|
||||
x[0] = io[7];
|
||||
x[1] = io[0];
|
||||
x[2] = io[5];
|
||||
x[3] = io[2];
|
||||
x[4] = io[3];
|
||||
x[5] = io[4];
|
||||
x[6] = io[1];
|
||||
x[7] = io[6];
|
||||
|
||||
d0s16 = vdup_n_s16(cospi_10_64);
|
||||
d1s16 = vdup_n_s16(cospi_22_64);
|
||||
q4s32 = vmull_s16(d26s16, d0s16);
|
||||
q5s32 = vmull_s16(d27s16, d0s16);
|
||||
q2s32 = vmull_s16(d26s16, d1s16);
|
||||
q6s32 = vmull_s16(d27s16, d1s16);
|
||||
// stage 1
|
||||
iadst_butterfly_lane_0_1_neon(x[0], x[1], c0, s0, s1);
|
||||
iadst_butterfly_lane_2_3_neon(x[2], x[3], c0, s2, s3);
|
||||
iadst_butterfly_lane_0_1_neon(x[4], x[5], c1, s4, s5);
|
||||
iadst_butterfly_lane_2_3_neon(x[6], x[7], c1, s6, s7);
|
||||
|
||||
d30s16 = vdup_n_s16(cospi_26_64);
|
||||
d31s16 = vdup_n_s16(cospi_6_64);
|
||||
x[0] = add_dct_const_round_shift_low_8(s0, s4);
|
||||
x[1] = add_dct_const_round_shift_low_8(s1, s5);
|
||||
x[2] = add_dct_const_round_shift_low_8(s2, s6);
|
||||
x[3] = add_dct_const_round_shift_low_8(s3, s7);
|
||||
x[4] = sub_dct_const_round_shift_low_8(s0, s4);
|
||||
x[5] = sub_dct_const_round_shift_low_8(s1, s5);
|
||||
x[6] = sub_dct_const_round_shift_low_8(s2, s6);
|
||||
x[7] = sub_dct_const_round_shift_low_8(s3, s7);
|
||||
|
||||
q4s32 = vmlal_s16(q4s32, d20s16, d1s16);
|
||||
q5s32 = vmlal_s16(q5s32, d21s16, d1s16);
|
||||
q2s32 = vmlsl_s16(q2s32, d20s16, d0s16);
|
||||
q6s32 = vmlsl_s16(q6s32, d21s16, d0s16);
|
||||
// stage 2
|
||||
t[0] = x[0];
|
||||
t[1] = x[1];
|
||||
t[2] = x[2];
|
||||
t[3] = x[3];
|
||||
iadst_butterfly_lane_2_3_neon(x[4], x[5], c2, s4, s5);
|
||||
iadst_butterfly_lane_3_2_neon(x[7], x[6], c2, s7, s6);
|
||||
|
||||
q0s32 = vmull_s16(d18s16, d30s16);
|
||||
q13s32 = vmull_s16(d19s16, d30s16);
|
||||
x[0] = vaddq_s16(t[0], t[2]);
|
||||
x[1] = vaddq_s16(t[1], t[3]);
|
||||
x[2] = vsubq_s16(t[0], t[2]);
|
||||
x[3] = vsubq_s16(t[1], t[3]);
|
||||
x[4] = add_dct_const_round_shift_low_8(s4, s6);
|
||||
x[5] = add_dct_const_round_shift_low_8(s5, s7);
|
||||
x[6] = sub_dct_const_round_shift_low_8(s4, s6);
|
||||
x[7] = sub_dct_const_round_shift_low_8(s5, s7);
|
||||
|
||||
q0s32 = vmlal_s16(q0s32, d28s16, d31s16);
|
||||
q13s32 = vmlal_s16(q13s32, d29s16, d31s16);
|
||||
// stage 3
|
||||
iadst_half_butterfly_neon(x + 2, c2);
|
||||
iadst_half_butterfly_neon(x + 6, c2);
|
||||
|
||||
q10s32 = vmull_s16(d18s16, d31s16);
|
||||
q9s32 = vmull_s16(d19s16, d31s16);
|
||||
|
||||
q10s32 = vmlsl_s16(q10s32, d28s16, d30s16);
|
||||
q9s32 = vmlsl_s16(q9s32, d29s16, d30s16);
|
||||
|
||||
q14s32 = vaddq_s32(q2s32, q10s32);
|
||||
q15s32 = vaddq_s32(q6s32, q9s32);
|
||||
q2s32 = vsubq_s32(q2s32, q10s32);
|
||||
q6s32 = vsubq_s32(q6s32, q9s32);
|
||||
|
||||
d28s16 = vrshrn_n_s32(q14s32, 14);
|
||||
d29s16 = vrshrn_n_s32(q15s32, 14);
|
||||
d4s16 = vrshrn_n_s32(q2s32, 14);
|
||||
d5s16 = vrshrn_n_s32(q6s32, 14);
|
||||
*q14s16 = vcombine_s16(d28s16, d29s16);
|
||||
|
||||
q9s32 = vaddq_s32(q4s32, q0s32);
|
||||
q10s32 = vaddq_s32(q5s32, q13s32);
|
||||
q4s32 = vsubq_s32(q4s32, q0s32);
|
||||
q5s32 = vsubq_s32(q5s32, q13s32);
|
||||
|
||||
d30s16 = vdup_n_s16(cospi_8_64);
|
||||
d31s16 = vdup_n_s16(cospi_24_64);
|
||||
|
||||
d18s16 = vrshrn_n_s32(q9s32, 14);
|
||||
d19s16 = vrshrn_n_s32(q10s32, 14);
|
||||
d8s16 = vrshrn_n_s32(q4s32, 14);
|
||||
d9s16 = vrshrn_n_s32(q5s32, 14);
|
||||
*q9s16 = vcombine_s16(d18s16, d19s16);
|
||||
|
||||
q5s32 = vmull_s16(d2s16, d30s16);
|
||||
q6s32 = vmull_s16(d3s16, d30s16);
|
||||
q7s32 = vmull_s16(d2s16, d31s16);
|
||||
q0s32 = vmull_s16(d3s16, d31s16);
|
||||
|
||||
q5s32 = vmlal_s16(q5s32, d6s16, d31s16);
|
||||
q6s32 = vmlal_s16(q6s32, d7s16, d31s16);
|
||||
q7s32 = vmlsl_s16(q7s32, d6s16, d30s16);
|
||||
q0s32 = vmlsl_s16(q0s32, d7s16, d30s16);
|
||||
|
||||
q1s32 = vmull_s16(d4s16, d30s16);
|
||||
q3s32 = vmull_s16(d5s16, d30s16);
|
||||
q10s32 = vmull_s16(d4s16, d31s16);
|
||||
q2s32 = vmull_s16(d5s16, d31s16);
|
||||
|
||||
q1s32 = vmlsl_s16(q1s32, d8s16, d31s16);
|
||||
q3s32 = vmlsl_s16(q3s32, d9s16, d31s16);
|
||||
q10s32 = vmlal_s16(q10s32, d8s16, d30s16);
|
||||
q2s32 = vmlal_s16(q2s32, d9s16, d30s16);
|
||||
|
||||
*q8s16 = vaddq_s16(*q11s16, *q9s16);
|
||||
*q11s16 = vsubq_s16(*q11s16, *q9s16);
|
||||
q4s16 = vaddq_s16(*q12s16, *q14s16);
|
||||
*q12s16 = vsubq_s16(*q12s16, *q14s16);
|
||||
|
||||
q14s32 = vaddq_s32(q5s32, q1s32);
|
||||
q15s32 = vaddq_s32(q6s32, q3s32);
|
||||
q5s32 = vsubq_s32(q5s32, q1s32);
|
||||
q6s32 = vsubq_s32(q6s32, q3s32);
|
||||
|
||||
d18s16 = vrshrn_n_s32(q14s32, 14);
|
||||
d19s16 = vrshrn_n_s32(q15s32, 14);
|
||||
d10s16 = vrshrn_n_s32(q5s32, 14);
|
||||
d11s16 = vrshrn_n_s32(q6s32, 14);
|
||||
*q9s16 = vcombine_s16(d18s16, d19s16);
|
||||
|
||||
q1s32 = vaddq_s32(q7s32, q10s32);
|
||||
q3s32 = vaddq_s32(q0s32, q2s32);
|
||||
q7s32 = vsubq_s32(q7s32, q10s32);
|
||||
q0s32 = vsubq_s32(q0s32, q2s32);
|
||||
|
||||
d28s16 = vrshrn_n_s32(q1s32, 14);
|
||||
d29s16 = vrshrn_n_s32(q3s32, 14);
|
||||
d14s16 = vrshrn_n_s32(q7s32, 14);
|
||||
d15s16 = vrshrn_n_s32(q0s32, 14);
|
||||
*q14s16 = vcombine_s16(d28s16, d29s16);
|
||||
|
||||
d30s16 = vdup_n_s16(cospi_16_64);
|
||||
|
||||
d22s16 = vget_low_s16(*q11s16);
|
||||
d23s16 = vget_high_s16(*q11s16);
|
||||
q2s32 = vmull_s16(d22s16, d30s16);
|
||||
q3s32 = vmull_s16(d23s16, d30s16);
|
||||
q13s32 = vmull_s16(d22s16, d30s16);
|
||||
q1s32 = vmull_s16(d23s16, d30s16);
|
||||
|
||||
d24s16 = vget_low_s16(*q12s16);
|
||||
d25s16 = vget_high_s16(*q12s16);
|
||||
q2s32 = vmlal_s16(q2s32, d24s16, d30s16);
|
||||
q3s32 = vmlal_s16(q3s32, d25s16, d30s16);
|
||||
q13s32 = vmlsl_s16(q13s32, d24s16, d30s16);
|
||||
q1s32 = vmlsl_s16(q1s32, d25s16, d30s16);
|
||||
|
||||
d4s16 = vrshrn_n_s32(q2s32, 14);
|
||||
d5s16 = vrshrn_n_s32(q3s32, 14);
|
||||
d24s16 = vrshrn_n_s32(q13s32, 14);
|
||||
d25s16 = vrshrn_n_s32(q1s32, 14);
|
||||
q2s16 = vcombine_s16(d4s16, d5s16);
|
||||
*q12s16 = vcombine_s16(d24s16, d25s16);
|
||||
|
||||
q13s32 = vmull_s16(d10s16, d30s16);
|
||||
q1s32 = vmull_s16(d11s16, d30s16);
|
||||
q11s32 = vmull_s16(d10s16, d30s16);
|
||||
q0s32 = vmull_s16(d11s16, d30s16);
|
||||
|
||||
q13s32 = vmlal_s16(q13s32, d14s16, d30s16);
|
||||
q1s32 = vmlal_s16(q1s32, d15s16, d30s16);
|
||||
q11s32 = vmlsl_s16(q11s32, d14s16, d30s16);
|
||||
q0s32 = vmlsl_s16(q0s32, d15s16, d30s16);
|
||||
|
||||
d20s16 = vrshrn_n_s32(q13s32, 14);
|
||||
d21s16 = vrshrn_n_s32(q1s32, 14);
|
||||
d12s16 = vrshrn_n_s32(q11s32, 14);
|
||||
d13s16 = vrshrn_n_s32(q0s32, 14);
|
||||
*q10s16 = vcombine_s16(d20s16, d21s16);
|
||||
q6s16 = vcombine_s16(d12s16, d13s16);
|
||||
|
||||
q5s16 = vdupq_n_s16(0);
|
||||
|
||||
*q9s16 = vsubq_s16(q5s16, *q9s16);
|
||||
*q11s16 = vsubq_s16(q5s16, q2s16);
|
||||
*q13s16 = vsubq_s16(q5s16, q6s16);
|
||||
*q15s16 = vsubq_s16(q5s16, q4s16);
|
||||
io[0] = x[0];
|
||||
io[1] = vnegq_s16(x[4]);
|
||||
io[2] = x[6];
|
||||
io[3] = vnegq_s16(x[2]);
|
||||
io[4] = x[3];
|
||||
io[5] = vnegq_s16(x[7]);
|
||||
io[6] = x[5];
|
||||
io[7] = vnegq_s16(x[1]);
|
||||
}
|
||||
|
||||
void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int tx_type) {
|
||||
int i;
|
||||
uint8_t *d1, *d2;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8;
|
||||
uint64x1_t d0u64, d1u64, d2u64, d3u64;
|
||||
int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
|
||||
uint16x8_t q8u16, q9u16, q10u16, q11u16;
|
||||
const int16x8_t cospis = vld1q_s16(kCospi);
|
||||
const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24
|
||||
const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28
|
||||
int16x8_t a[8];
|
||||
|
||||
q8s16 = vld1q_s16(input);
|
||||
q9s16 = vld1q_s16(input + 8);
|
||||
q10s16 = vld1q_s16(input + 8 * 2);
|
||||
q11s16 = vld1q_s16(input + 8 * 3);
|
||||
q12s16 = vld1q_s16(input + 8 * 4);
|
||||
q13s16 = vld1q_s16(input + 8 * 5);
|
||||
q14s16 = vld1q_s16(input + 8 * 6);
|
||||
q15s16 = vld1q_s16(input + 8 * 7);
|
||||
a[0] = load_tran_low_to_s16q(input + 0 * 8);
|
||||
a[1] = load_tran_low_to_s16q(input + 1 * 8);
|
||||
a[2] = load_tran_low_to_s16q(input + 2 * 8);
|
||||
a[3] = load_tran_low_to_s16q(input + 3 * 8);
|
||||
a[4] = load_tran_low_to_s16q(input + 4 * 8);
|
||||
a[5] = load_tran_low_to_s16q(input + 5 * 8);
|
||||
a[6] = load_tran_low_to_s16q(input + 6 * 8);
|
||||
a[7] = load_tran_low_to_s16q(input + 7 * 8);
|
||||
|
||||
transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // idct_idct is not supported. Fall back to C
|
||||
vp9_iht8x8_64_add_c(input, dest, stride, tx_type);
|
||||
return;
|
||||
case 1: // iadst_idct
|
||||
// generate IDCT constants
|
||||
// GENERATE_IDCT_CONSTANTS
|
||||
|
||||
// first transform rows
|
||||
IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
|
||||
// transpose the matrix
|
||||
transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
|
||||
&q14s16, &q15s16);
|
||||
|
||||
// generate IADST constants
|
||||
// GENERATE_IADST_CONSTANTS
|
||||
|
||||
// then transform columns
|
||||
IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
case DCT_DCT:
|
||||
idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
|
||||
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
|
||||
idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
|
||||
break;
|
||||
case 2: // idct_iadst
|
||||
// generate IADST constants
|
||||
// GENERATE_IADST_CONSTANTS
|
||||
|
||||
// first transform rows
|
||||
IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
|
||||
// transpose the matrix
|
||||
transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
|
||||
&q14s16, &q15s16);
|
||||
|
||||
// generate IDCT constants
|
||||
// GENERATE_IDCT_CONSTANTS
|
||||
|
||||
// then transform columns
|
||||
IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
case ADST_DCT:
|
||||
idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
|
||||
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
|
||||
iadst8(a);
|
||||
break;
|
||||
case 3: // iadst_iadst
|
||||
// generate IADST constants
|
||||
// GENERATE_IADST_CONSTANTS
|
||||
|
||||
// first transform rows
|
||||
IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
|
||||
// transpose the matrix
|
||||
transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
|
||||
&q14s16, &q15s16);
|
||||
|
||||
// then transform columns
|
||||
IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
|
||||
&q15s16);
|
||||
case DCT_ADST:
|
||||
iadst8(a);
|
||||
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
|
||||
idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
|
||||
break;
|
||||
default: // iadst_idct
|
||||
assert(0);
|
||||
|
||||
default:
|
||||
assert(tx_type == ADST_ADST);
|
||||
iadst8(a);
|
||||
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
|
||||
iadst8(a);
|
||||
break;
|
||||
}
|
||||
|
||||
q8s16 = vrshrq_n_s16(q8s16, 5);
|
||||
q9s16 = vrshrq_n_s16(q9s16, 5);
|
||||
q10s16 = vrshrq_n_s16(q10s16, 5);
|
||||
q11s16 = vrshrq_n_s16(q11s16, 5);
|
||||
q12s16 = vrshrq_n_s16(q12s16, 5);
|
||||
q13s16 = vrshrq_n_s16(q13s16, 5);
|
||||
q14s16 = vrshrq_n_s16(q14s16, 5);
|
||||
q15s16 = vrshrq_n_s16(q15s16, 5);
|
||||
|
||||
for (d1 = d2 = dest, i = 0; i < 2; i++) {
|
||||
if (i != 0) {
|
||||
q8s16 = q12s16;
|
||||
q9s16 = q13s16;
|
||||
q10s16 = q14s16;
|
||||
q11s16 = q15s16;
|
||||
}
|
||||
|
||||
d0u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += stride;
|
||||
d1u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += stride;
|
||||
d2u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += stride;
|
||||
d3u64 = vld1_u64((uint64_t *)d1);
|
||||
d1 += stride;
|
||||
|
||||
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
|
||||
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
|
||||
q10u16 =
|
||||
vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
|
||||
q11u16 =
|
||||
vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
|
||||
|
||||
d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
|
||||
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
|
||||
d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
|
||||
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
|
||||
d2 += stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
|
||||
d2 += stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
|
||||
d2 += stride;
|
||||
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
|
||||
d2 += stride;
|
||||
}
|
||||
idct8x8_add8x8_neon(a, dest, stride);
|
||||
}
|
||||
|
60
vp9/common/arm/neon/vp9_iht_neon.h
Normal file
60
vp9/common/arm/neon/vp9_iht_neon.h
Normal file
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
|
||||
#define VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vpx_dsp/arm/idct_neon.h"
|
||||
#include "vpx_dsp/arm/mem_neon.h"
|
||||
#include "vpx_dsp/txfm_common.h"
|
||||
|
||||
static INLINE void iadst4(int16x8_t *const io) {
|
||||
const int32x4_t c3 = vdupq_n_s32(sinpi_3_9);
|
||||
int16x4_t x[4];
|
||||
int32x4_t s[8], output[4];
|
||||
const int16x4_t c =
|
||||
create_s16x4_neon(sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9);
|
||||
|
||||
x[0] = vget_low_s16(io[0]);
|
||||
x[1] = vget_low_s16(io[1]);
|
||||
x[2] = vget_high_s16(io[0]);
|
||||
x[3] = vget_high_s16(io[1]);
|
||||
|
||||
s[0] = vmull_lane_s16(x[0], c, 0);
|
||||
s[1] = vmull_lane_s16(x[0], c, 1);
|
||||
s[2] = vmull_lane_s16(x[1], c, 2);
|
||||
s[3] = vmull_lane_s16(x[2], c, 3);
|
||||
s[4] = vmull_lane_s16(x[2], c, 0);
|
||||
s[5] = vmull_lane_s16(x[3], c, 1);
|
||||
s[6] = vmull_lane_s16(x[3], c, 3);
|
||||
s[7] = vaddl_s16(x[0], x[3]);
|
||||
s[7] = vsubw_s16(s[7], x[2]);
|
||||
|
||||
s[0] = vaddq_s32(s[0], s[3]);
|
||||
s[0] = vaddq_s32(s[0], s[5]);
|
||||
s[1] = vsubq_s32(s[1], s[4]);
|
||||
s[1] = vsubq_s32(s[1], s[6]);
|
||||
s[3] = s[2];
|
||||
s[2] = vmulq_s32(c3, s[7]);
|
||||
|
||||
output[0] = vaddq_s32(s[0], s[3]);
|
||||
output[1] = vaddq_s32(s[1], s[3]);
|
||||
output[2] = s[2];
|
||||
output[3] = vaddq_s32(s[0], s[1]);
|
||||
output[3] = vsubq_s32(output[3], s[3]);
|
||||
dct_const_round_shift_low_8_dual(output, &io[0], &io[1]);
|
||||
}
|
||||
|
||||
#endif // VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
|
@@ -42,6 +42,7 @@ const vpx_prob vp9_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254,
|
||||
177, 153, 140, 133, 130, 129 };
|
||||
#endif
|
||||
|
||||
/* clang-format off */
|
||||
const uint8_t vp9_coefband_trans_8x8plus[1024] = {
|
||||
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
|
||||
// beyond MAXBAND_INDEX+1 all values are filled as 5
|
||||
@@ -85,6 +86,7 @@ const uint8_t vp9_coefband_trans_8x8plus[1024] = {
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
};
|
||||
/* clang-format on */
|
||||
|
||||
const uint8_t vp9_coefband_trans_4x4[16] = {
|
||||
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
|
||||
|
@@ -137,7 +137,6 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
|
||||
// 128 lists of probabilities are stored for the following ONE node probs:
|
||||
// 1, 3, 5, 7, ..., 253, 255
|
||||
// In between probabilities are interpolated linearly
|
||||
|
||||
#define COEFF_PROB_MODELS 255
|
||||
|
||||
#define UNCONSTRAINED_NODES 3
|
||||
|
@@ -186,16 +186,19 @@ const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] =
|
||||
{ 93, 24, 99 }, // a split, l not split
|
||||
{ 85, 119, 44 }, // l split, a not split
|
||||
{ 62, 59, 67 }, // a/l both split
|
||||
|
||||
// 16x16 -> 8x8
|
||||
{ 149, 53, 53 }, // a/l both not split
|
||||
{ 94, 20, 48 }, // a split, l not split
|
||||
{ 83, 53, 24 }, // l split, a not split
|
||||
{ 52, 18, 18 }, // a/l both split
|
||||
|
||||
// 32x32 -> 16x16
|
||||
{ 150, 40, 39 }, // a/l both not split
|
||||
{ 78, 12, 26 }, // a split, l not split
|
||||
{ 67, 33, 11 }, // l split, a not split
|
||||
{ 24, 7, 5 }, // a/l both split
|
||||
|
||||
// 64x64 -> 32x32
|
||||
{ 174, 35, 49 }, // a/l both not split
|
||||
{ 68, 11, 27 }, // a split, l not split
|
||||
|
@@ -22,9 +22,7 @@ const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = {
|
||||
18, -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10,
|
||||
};
|
||||
|
||||
const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = {
|
||||
-0, -1,
|
||||
};
|
||||
const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { -0, -1 };
|
||||
|
||||
const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1,
|
||||
4, -2, -3 };
|
||||
|
@@ -1174,7 +1174,7 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
|
||||
}
|
||||
|
||||
// Disable filtering on the leftmost column
|
||||
border_mask = ~(mi_col == 0);
|
||||
border_mask = ~(mi_col == 0 ? 1 : 0);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth) {
|
||||
highbd_filter_selectively_vert(
|
||||
|
@@ -229,9 +229,8 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
|
||||
else
|
||||
pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME);
|
||||
} else {
|
||||
pred_context = 1 +
|
||||
2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
|
||||
edge_mi->ref_frame[1] == GOLDEN_FRAME);
|
||||
pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
|
||||
edge_mi->ref_frame[1] == GOLDEN_FRAME);
|
||||
}
|
||||
} else { // inter/inter
|
||||
const int above_has_second = has_second_ref(above_mi);
|
||||
|
@@ -1,3 +1,13 @@
|
||||
##
|
||||
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
## Use of this source code is governed by a BSD-style license
|
||||
## that can be found in the LICENSE file in the root of the source
|
||||
## tree. An additional intellectual property rights grant can be found
|
||||
## in the file PATENTS. All contributing project authors may
|
||||
## be found in the AUTHORS file in the root of the source tree.
|
||||
##
|
||||
|
||||
sub vp9_common_forward_decls() {
|
||||
print <<EOF
|
||||
/*
|
||||
@@ -57,13 +67,13 @@ add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *outp
|
||||
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
|
||||
# Note that there are more specializations appended when
|
||||
# CONFIG_VP9_HIGHBITDEPTH is off.
|
||||
specialize qw/vp9_iht4x4_16_add sse2/;
|
||||
specialize qw/vp9_iht4x4_16_add neon sse2/;
|
||||
specialize qw/vp9_iht8x8_64_add sse2/;
|
||||
specialize qw/vp9_iht16x16_256_add sse2/;
|
||||
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
|
||||
# Note that these specializations are appended to the above ones.
|
||||
specialize qw/vp9_iht4x4_16_add neon dspr2 msa/;
|
||||
specialize qw/vp9_iht8x8_64_add neon dspr2 msa/;
|
||||
specialize qw/vp9_iht4x4_16_add dspr2 msa/;
|
||||
specialize qw/vp9_iht8x8_64_add dspr2 msa/;
|
||||
specialize qw/vp9_iht16x16_256_add dspr2 msa/;
|
||||
}
|
||||
}
|
||||
@@ -91,6 +101,12 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd";
|
||||
|
||||
add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *output, int pitch, int tx_type, int bd";
|
||||
|
||||
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
|
||||
specialize qw/vp9_highbd_iht4x4_16_add neon sse4_1/;
|
||||
specialize qw/vp9_highbd_iht8x8_64_add sse4_1/;
|
||||
specialize qw/vp9_highbd_iht16x16_256_add sse4_1/;
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
@@ -113,7 +129,7 @@ add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_
|
||||
add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size";
|
||||
|
||||
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64";
|
||||
specialize qw/vp9_quantize_fp neon sse2 avx2/, "$ssse3_x86_64";
|
||||
|
||||
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_quantize_fp_32x32 neon/, "$ssse3_x86_64";
|
||||
|
419
vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c
Normal file
419
vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c
Normal file
@@ -0,0 +1,419 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
|
||||
#include "vpx_dsp/x86/inv_txfm_sse2.h"
|
||||
#include "vpx_dsp/x86/transpose_sse2.h"
|
||||
#include "vpx_dsp/x86/txfm_common_sse2.h"
|
||||
|
||||
static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in,
|
||||
const int c,
|
||||
__m128i *const s) {
|
||||
const __m128i pair_c = pair_set_epi32(4 * c, 0);
|
||||
__m128i x[2];
|
||||
|
||||
extend_64bit(in, x);
|
||||
s[0] = _mm_mul_epi32(pair_c, x[0]);
|
||||
s[1] = _mm_mul_epi32(pair_c, x[1]);
|
||||
}
|
||||
|
||||
static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0,
|
||||
const __m128i in1,
|
||||
const int c0, const int c1,
|
||||
__m128i *const s0,
|
||||
__m128i *const s1) {
|
||||
const __m128i pair_c0 = pair_set_epi32(4 * c0, 0);
|
||||
const __m128i pair_c1 = pair_set_epi32(4 * c1, 0);
|
||||
__m128i t00[2], t01[2], t10[2], t11[2];
|
||||
__m128i x0[2], x1[2];
|
||||
|
||||
extend_64bit(in0, x0);
|
||||
extend_64bit(in1, x1);
|
||||
t00[0] = _mm_mul_epi32(pair_c0, x0[0]);
|
||||
t00[1] = _mm_mul_epi32(pair_c0, x0[1]);
|
||||
t01[0] = _mm_mul_epi32(pair_c0, x1[0]);
|
||||
t01[1] = _mm_mul_epi32(pair_c0, x1[1]);
|
||||
t10[0] = _mm_mul_epi32(pair_c1, x0[0]);
|
||||
t10[1] = _mm_mul_epi32(pair_c1, x0[1]);
|
||||
t11[0] = _mm_mul_epi32(pair_c1, x1[0]);
|
||||
t11[1] = _mm_mul_epi32(pair_c1, x1[1]);
|
||||
|
||||
s0[0] = _mm_add_epi64(t00[0], t11[0]);
|
||||
s0[1] = _mm_add_epi64(t00[1], t11[1]);
|
||||
s1[0] = _mm_sub_epi64(t10[0], t01[0]);
|
||||
s1[1] = _mm_sub_epi64(t10[1], t01[1]);
|
||||
}
|
||||
|
||||
static void highbd_iadst16_4col_sse4_1(__m128i *const io /*io[16]*/) {
|
||||
__m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2], s8[2], s9[2],
|
||||
s10[2], s11[2], s12[2], s13[2], s14[2], s15[2];
|
||||
__m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2], x8[2], x9[2],
|
||||
x10[2], x11[2], x12[2], x13[2], x14[2], x15[2];
|
||||
|
||||
// stage 1
|
||||
highbd_iadst_butterfly_sse4_1(io[15], io[0], cospi_1_64, cospi_31_64, s0, s1);
|
||||
highbd_iadst_butterfly_sse4_1(io[13], io[2], cospi_5_64, cospi_27_64, s2, s3);
|
||||
highbd_iadst_butterfly_sse4_1(io[11], io[4], cospi_9_64, cospi_23_64, s4, s5);
|
||||
highbd_iadst_butterfly_sse4_1(io[9], io[6], cospi_13_64, cospi_19_64, s6, s7);
|
||||
highbd_iadst_butterfly_sse4_1(io[7], io[8], cospi_17_64, cospi_15_64, s8, s9);
|
||||
highbd_iadst_butterfly_sse4_1(io[5], io[10], cospi_21_64, cospi_11_64, s10,
|
||||
s11);
|
||||
highbd_iadst_butterfly_sse4_1(io[3], io[12], cospi_25_64, cospi_7_64, s12,
|
||||
s13);
|
||||
highbd_iadst_butterfly_sse4_1(io[1], io[14], cospi_29_64, cospi_3_64, s14,
|
||||
s15);
|
||||
|
||||
x0[0] = _mm_add_epi64(s0[0], s8[0]);
|
||||
x0[1] = _mm_add_epi64(s0[1], s8[1]);
|
||||
x1[0] = _mm_add_epi64(s1[0], s9[0]);
|
||||
x1[1] = _mm_add_epi64(s1[1], s9[1]);
|
||||
x2[0] = _mm_add_epi64(s2[0], s10[0]);
|
||||
x2[1] = _mm_add_epi64(s2[1], s10[1]);
|
||||
x3[0] = _mm_add_epi64(s3[0], s11[0]);
|
||||
x3[1] = _mm_add_epi64(s3[1], s11[1]);
|
||||
x4[0] = _mm_add_epi64(s4[0], s12[0]);
|
||||
x4[1] = _mm_add_epi64(s4[1], s12[1]);
|
||||
x5[0] = _mm_add_epi64(s5[0], s13[0]);
|
||||
x5[1] = _mm_add_epi64(s5[1], s13[1]);
|
||||
x6[0] = _mm_add_epi64(s6[0], s14[0]);
|
||||
x6[1] = _mm_add_epi64(s6[1], s14[1]);
|
||||
x7[0] = _mm_add_epi64(s7[0], s15[0]);
|
||||
x7[1] = _mm_add_epi64(s7[1], s15[1]);
|
||||
x8[0] = _mm_sub_epi64(s0[0], s8[0]);
|
||||
x8[1] = _mm_sub_epi64(s0[1], s8[1]);
|
||||
x9[0] = _mm_sub_epi64(s1[0], s9[0]);
|
||||
x9[1] = _mm_sub_epi64(s1[1], s9[1]);
|
||||
x10[0] = _mm_sub_epi64(s2[0], s10[0]);
|
||||
x10[1] = _mm_sub_epi64(s2[1], s10[1]);
|
||||
x11[0] = _mm_sub_epi64(s3[0], s11[0]);
|
||||
x11[1] = _mm_sub_epi64(s3[1], s11[1]);
|
||||
x12[0] = _mm_sub_epi64(s4[0], s12[0]);
|
||||
x12[1] = _mm_sub_epi64(s4[1], s12[1]);
|
||||
x13[0] = _mm_sub_epi64(s5[0], s13[0]);
|
||||
x13[1] = _mm_sub_epi64(s5[1], s13[1]);
|
||||
x14[0] = _mm_sub_epi64(s6[0], s14[0]);
|
||||
x14[1] = _mm_sub_epi64(s6[1], s14[1]);
|
||||
x15[0] = _mm_sub_epi64(s7[0], s15[0]);
|
||||
x15[1] = _mm_sub_epi64(s7[1], s15[1]);
|
||||
|
||||
x0[0] = dct_const_round_shift_64bit(x0[0]);
|
||||
x0[1] = dct_const_round_shift_64bit(x0[1]);
|
||||
x1[0] = dct_const_round_shift_64bit(x1[0]);
|
||||
x1[1] = dct_const_round_shift_64bit(x1[1]);
|
||||
x2[0] = dct_const_round_shift_64bit(x2[0]);
|
||||
x2[1] = dct_const_round_shift_64bit(x2[1]);
|
||||
x3[0] = dct_const_round_shift_64bit(x3[0]);
|
||||
x3[1] = dct_const_round_shift_64bit(x3[1]);
|
||||
x4[0] = dct_const_round_shift_64bit(x4[0]);
|
||||
x4[1] = dct_const_round_shift_64bit(x4[1]);
|
||||
x5[0] = dct_const_round_shift_64bit(x5[0]);
|
||||
x5[1] = dct_const_round_shift_64bit(x5[1]);
|
||||
x6[0] = dct_const_round_shift_64bit(x6[0]);
|
||||
x6[1] = dct_const_round_shift_64bit(x6[1]);
|
||||
x7[0] = dct_const_round_shift_64bit(x7[0]);
|
||||
x7[1] = dct_const_round_shift_64bit(x7[1]);
|
||||
x8[0] = dct_const_round_shift_64bit(x8[0]);
|
||||
x8[1] = dct_const_round_shift_64bit(x8[1]);
|
||||
x9[0] = dct_const_round_shift_64bit(x9[0]);
|
||||
x9[1] = dct_const_round_shift_64bit(x9[1]);
|
||||
x10[0] = dct_const_round_shift_64bit(x10[0]);
|
||||
x10[1] = dct_const_round_shift_64bit(x10[1]);
|
||||
x11[0] = dct_const_round_shift_64bit(x11[0]);
|
||||
x11[1] = dct_const_round_shift_64bit(x11[1]);
|
||||
x12[0] = dct_const_round_shift_64bit(x12[0]);
|
||||
x12[1] = dct_const_round_shift_64bit(x12[1]);
|
||||
x13[0] = dct_const_round_shift_64bit(x13[0]);
|
||||
x13[1] = dct_const_round_shift_64bit(x13[1]);
|
||||
x14[0] = dct_const_round_shift_64bit(x14[0]);
|
||||
x14[1] = dct_const_round_shift_64bit(x14[1]);
|
||||
x15[0] = dct_const_round_shift_64bit(x15[0]);
|
||||
x15[1] = dct_const_round_shift_64bit(x15[1]);
|
||||
x0[0] = pack_4(x0[0], x0[1]);
|
||||
x1[0] = pack_4(x1[0], x1[1]);
|
||||
x2[0] = pack_4(x2[0], x2[1]);
|
||||
x3[0] = pack_4(x3[0], x3[1]);
|
||||
x4[0] = pack_4(x4[0], x4[1]);
|
||||
x5[0] = pack_4(x5[0], x5[1]);
|
||||
x6[0] = pack_4(x6[0], x6[1]);
|
||||
x7[0] = pack_4(x7[0], x7[1]);
|
||||
x8[0] = pack_4(x8[0], x8[1]);
|
||||
x9[0] = pack_4(x9[0], x9[1]);
|
||||
x10[0] = pack_4(x10[0], x10[1]);
|
||||
x11[0] = pack_4(x11[0], x11[1]);
|
||||
x12[0] = pack_4(x12[0], x12[1]);
|
||||
x13[0] = pack_4(x13[0], x13[1]);
|
||||
x14[0] = pack_4(x14[0], x14[1]);
|
||||
x15[0] = pack_4(x15[0], x15[1]);
|
||||
|
||||
// stage 2
|
||||
s0[0] = x0[0];
|
||||
s1[0] = x1[0];
|
||||
s2[0] = x2[0];
|
||||
s3[0] = x3[0];
|
||||
s4[0] = x4[0];
|
||||
s5[0] = x5[0];
|
||||
s6[0] = x6[0];
|
||||
s7[0] = x7[0];
|
||||
x0[0] = _mm_add_epi32(s0[0], s4[0]);
|
||||
x1[0] = _mm_add_epi32(s1[0], s5[0]);
|
||||
x2[0] = _mm_add_epi32(s2[0], s6[0]);
|
||||
x3[0] = _mm_add_epi32(s3[0], s7[0]);
|
||||
x4[0] = _mm_sub_epi32(s0[0], s4[0]);
|
||||
x5[0] = _mm_sub_epi32(s1[0], s5[0]);
|
||||
x6[0] = _mm_sub_epi32(s2[0], s6[0]);
|
||||
x7[0] = _mm_sub_epi32(s3[0], s7[0]);
|
||||
|
||||
highbd_iadst_butterfly_sse4_1(x8[0], x9[0], cospi_4_64, cospi_28_64, s8, s9);
|
||||
highbd_iadst_butterfly_sse4_1(x10[0], x11[0], cospi_20_64, cospi_12_64, s10,
|
||||
s11);
|
||||
highbd_iadst_butterfly_sse4_1(x13[0], x12[0], cospi_28_64, cospi_4_64, s13,
|
||||
s12);
|
||||
highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_12_64, cospi_20_64, s15,
|
||||
s14);
|
||||
|
||||
x8[0] = _mm_add_epi64(s8[0], s12[0]);
|
||||
x8[1] = _mm_add_epi64(s8[1], s12[1]);
|
||||
x9[0] = _mm_add_epi64(s9[0], s13[0]);
|
||||
x9[1] = _mm_add_epi64(s9[1], s13[1]);
|
||||
x10[0] = _mm_add_epi64(s10[0], s14[0]);
|
||||
x10[1] = _mm_add_epi64(s10[1], s14[1]);
|
||||
x11[0] = _mm_add_epi64(s11[0], s15[0]);
|
||||
x11[1] = _mm_add_epi64(s11[1], s15[1]);
|
||||
x12[0] = _mm_sub_epi64(s8[0], s12[0]);
|
||||
x12[1] = _mm_sub_epi64(s8[1], s12[1]);
|
||||
x13[0] = _mm_sub_epi64(s9[0], s13[0]);
|
||||
x13[1] = _mm_sub_epi64(s9[1], s13[1]);
|
||||
x14[0] = _mm_sub_epi64(s10[0], s14[0]);
|
||||
x14[1] = _mm_sub_epi64(s10[1], s14[1]);
|
||||
x15[0] = _mm_sub_epi64(s11[0], s15[0]);
|
||||
x15[1] = _mm_sub_epi64(s11[1], s15[1]);
|
||||
x8[0] = dct_const_round_shift_64bit(x8[0]);
|
||||
x8[1] = dct_const_round_shift_64bit(x8[1]);
|
||||
x9[0] = dct_const_round_shift_64bit(x9[0]);
|
||||
x9[1] = dct_const_round_shift_64bit(x9[1]);
|
||||
x10[0] = dct_const_round_shift_64bit(x10[0]);
|
||||
x10[1] = dct_const_round_shift_64bit(x10[1]);
|
||||
x11[0] = dct_const_round_shift_64bit(x11[0]);
|
||||
x11[1] = dct_const_round_shift_64bit(x11[1]);
|
||||
x12[0] = dct_const_round_shift_64bit(x12[0]);
|
||||
x12[1] = dct_const_round_shift_64bit(x12[1]);
|
||||
x13[0] = dct_const_round_shift_64bit(x13[0]);
|
||||
x13[1] = dct_const_round_shift_64bit(x13[1]);
|
||||
x14[0] = dct_const_round_shift_64bit(x14[0]);
|
||||
x14[1] = dct_const_round_shift_64bit(x14[1]);
|
||||
x15[0] = dct_const_round_shift_64bit(x15[0]);
|
||||
x15[1] = dct_const_round_shift_64bit(x15[1]);
|
||||
x8[0] = pack_4(x8[0], x8[1]);
|
||||
x9[0] = pack_4(x9[0], x9[1]);
|
||||
x10[0] = pack_4(x10[0], x10[1]);
|
||||
x11[0] = pack_4(x11[0], x11[1]);
|
||||
x12[0] = pack_4(x12[0], x12[1]);
|
||||
x13[0] = pack_4(x13[0], x13[1]);
|
||||
x14[0] = pack_4(x14[0], x14[1]);
|
||||
x15[0] = pack_4(x15[0], x15[1]);
|
||||
|
||||
// stage 3
|
||||
s0[0] = x0[0];
|
||||
s1[0] = x1[0];
|
||||
s2[0] = x2[0];
|
||||
s3[0] = x3[0];
|
||||
highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5);
|
||||
highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6);
|
||||
s8[0] = x8[0];
|
||||
s9[0] = x9[0];
|
||||
s10[0] = x10[0];
|
||||
s11[0] = x11[0];
|
||||
highbd_iadst_butterfly_sse4_1(x12[0], x13[0], cospi_8_64, cospi_24_64, s12,
|
||||
s13);
|
||||
highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_24_64, cospi_8_64, s15,
|
||||
s14);
|
||||
|
||||
x0[0] = _mm_add_epi32(s0[0], s2[0]);
|
||||
x1[0] = _mm_add_epi32(s1[0], s3[0]);
|
||||
x2[0] = _mm_sub_epi32(s0[0], s2[0]);
|
||||
x3[0] = _mm_sub_epi32(s1[0], s3[0]);
|
||||
x4[0] = _mm_add_epi64(s4[0], s6[0]);
|
||||
x4[1] = _mm_add_epi64(s4[1], s6[1]);
|
||||
x5[0] = _mm_add_epi64(s5[0], s7[0]);
|
||||
x5[1] = _mm_add_epi64(s5[1], s7[1]);
|
||||
x6[0] = _mm_sub_epi64(s4[0], s6[0]);
|
||||
x6[1] = _mm_sub_epi64(s4[1], s6[1]);
|
||||
x7[0] = _mm_sub_epi64(s5[0], s7[0]);
|
||||
x7[1] = _mm_sub_epi64(s5[1], s7[1]);
|
||||
x4[0] = dct_const_round_shift_64bit(x4[0]);
|
||||
x4[1] = dct_const_round_shift_64bit(x4[1]);
|
||||
x5[0] = dct_const_round_shift_64bit(x5[0]);
|
||||
x5[1] = dct_const_round_shift_64bit(x5[1]);
|
||||
x6[0] = dct_const_round_shift_64bit(x6[0]);
|
||||
x6[1] = dct_const_round_shift_64bit(x6[1]);
|
||||
x7[0] = dct_const_round_shift_64bit(x7[0]);
|
||||
x7[1] = dct_const_round_shift_64bit(x7[1]);
|
||||
x4[0] = pack_4(x4[0], x4[1]);
|
||||
x5[0] = pack_4(x5[0], x5[1]);
|
||||
x6[0] = pack_4(x6[0], x6[1]);
|
||||
x7[0] = pack_4(x7[0], x7[1]);
|
||||
x8[0] = _mm_add_epi32(s8[0], s10[0]);
|
||||
x9[0] = _mm_add_epi32(s9[0], s11[0]);
|
||||
x10[0] = _mm_sub_epi32(s8[0], s10[0]);
|
||||
x11[0] = _mm_sub_epi32(s9[0], s11[0]);
|
||||
x12[0] = _mm_add_epi64(s12[0], s14[0]);
|
||||
x12[1] = _mm_add_epi64(s12[1], s14[1]);
|
||||
x13[0] = _mm_add_epi64(s13[0], s15[0]);
|
||||
x13[1] = _mm_add_epi64(s13[1], s15[1]);
|
||||
x14[0] = _mm_sub_epi64(s12[0], s14[0]);
|
||||
x14[1] = _mm_sub_epi64(s12[1], s14[1]);
|
||||
x15[0] = _mm_sub_epi64(s13[0], s15[0]);
|
||||
x15[1] = _mm_sub_epi64(s13[1], s15[1]);
|
||||
x12[0] = dct_const_round_shift_64bit(x12[0]);
|
||||
x12[1] = dct_const_round_shift_64bit(x12[1]);
|
||||
x13[0] = dct_const_round_shift_64bit(x13[0]);
|
||||
x13[1] = dct_const_round_shift_64bit(x13[1]);
|
||||
x14[0] = dct_const_round_shift_64bit(x14[0]);
|
||||
x14[1] = dct_const_round_shift_64bit(x14[1]);
|
||||
x15[0] = dct_const_round_shift_64bit(x15[0]);
|
||||
x15[1] = dct_const_round_shift_64bit(x15[1]);
|
||||
x12[0] = pack_4(x12[0], x12[1]);
|
||||
x13[0] = pack_4(x13[0], x13[1]);
|
||||
x14[0] = pack_4(x14[0], x14[1]);
|
||||
x15[0] = pack_4(x15[0], x15[1]);
|
||||
|
||||
// stage 4
|
||||
s2[0] = _mm_add_epi32(x2[0], x3[0]);
|
||||
s3[0] = _mm_sub_epi32(x2[0], x3[0]);
|
||||
s6[0] = _mm_add_epi32(x7[0], x6[0]);
|
||||
s7[0] = _mm_sub_epi32(x7[0], x6[0]);
|
||||
s10[0] = _mm_add_epi32(x11[0], x10[0]);
|
||||
s11[0] = _mm_sub_epi32(x11[0], x10[0]);
|
||||
s14[0] = _mm_add_epi32(x14[0], x15[0]);
|
||||
s15[0] = _mm_sub_epi32(x14[0], x15[0]);
|
||||
highbd_iadst_half_butterfly_sse4_1(s2[0], -cospi_16_64, s2);
|
||||
highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3);
|
||||
highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6);
|
||||
highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7);
|
||||
highbd_iadst_half_butterfly_sse4_1(s10[0], cospi_16_64, s10);
|
||||
highbd_iadst_half_butterfly_sse4_1(s11[0], cospi_16_64, s11);
|
||||
highbd_iadst_half_butterfly_sse4_1(s14[0], -cospi_16_64, s14);
|
||||
highbd_iadst_half_butterfly_sse4_1(s15[0], cospi_16_64, s15);
|
||||
|
||||
x2[0] = dct_const_round_shift_64bit(s2[0]);
|
||||
x2[1] = dct_const_round_shift_64bit(s2[1]);
|
||||
x3[0] = dct_const_round_shift_64bit(s3[0]);
|
||||
x3[1] = dct_const_round_shift_64bit(s3[1]);
|
||||
x6[0] = dct_const_round_shift_64bit(s6[0]);
|
||||
x6[1] = dct_const_round_shift_64bit(s6[1]);
|
||||
x7[0] = dct_const_round_shift_64bit(s7[0]);
|
||||
x7[1] = dct_const_round_shift_64bit(s7[1]);
|
||||
x10[0] = dct_const_round_shift_64bit(s10[0]);
|
||||
x10[1] = dct_const_round_shift_64bit(s10[1]);
|
||||
x11[0] = dct_const_round_shift_64bit(s11[0]);
|
||||
x11[1] = dct_const_round_shift_64bit(s11[1]);
|
||||
x14[0] = dct_const_round_shift_64bit(s14[0]);
|
||||
x14[1] = dct_const_round_shift_64bit(s14[1]);
|
||||
x15[0] = dct_const_round_shift_64bit(s15[0]);
|
||||
x15[1] = dct_const_round_shift_64bit(s15[1]);
|
||||
x2[0] = pack_4(x2[0], x2[1]);
|
||||
x3[0] = pack_4(x3[0], x3[1]);
|
||||
x6[0] = pack_4(x6[0], x6[1]);
|
||||
x7[0] = pack_4(x7[0], x7[1]);
|
||||
x10[0] = pack_4(x10[0], x10[1]);
|
||||
x11[0] = pack_4(x11[0], x11[1]);
|
||||
x14[0] = pack_4(x14[0], x14[1]);
|
||||
x15[0] = pack_4(x15[0], x15[1]);
|
||||
|
||||
io[0] = x0[0];
|
||||
io[1] = _mm_sub_epi32(_mm_setzero_si128(), x8[0]);
|
||||
io[2] = x12[0];
|
||||
io[3] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]);
|
||||
io[4] = x6[0];
|
||||
io[5] = x14[0];
|
||||
io[6] = x10[0];
|
||||
io[7] = x2[0];
|
||||
io[8] = x3[0];
|
||||
io[9] = x11[0];
|
||||
io[10] = x15[0];
|
||||
io[11] = x7[0];
|
||||
io[12] = x5[0];
|
||||
io[13] = _mm_sub_epi32(_mm_setzero_si128(), x13[0]);
|
||||
io[14] = x9[0];
|
||||
io[15] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]);
|
||||
}
|
||||
|
||||
void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t *input, uint16_t *dest,
|
||||
int stride, int tx_type, int bd) {
|
||||
int i;
|
||||
__m128i out[16], *in;
|
||||
|
||||
if (bd == 8) {
|
||||
__m128i l[16], r[16];
|
||||
|
||||
in = l;
|
||||
for (i = 0; i < 2; i++) {
|
||||
highbd_load_pack_transpose_32bit_8x8(&input[0], 16, &in[0]);
|
||||
highbd_load_pack_transpose_32bit_8x8(&input[8], 16, &in[8]);
|
||||
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
|
||||
idct16_8col(in, in);
|
||||
} else {
|
||||
vpx_iadst16_8col_sse2(in);
|
||||
}
|
||||
in = r;
|
||||
input += 128;
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i += 8) {
|
||||
int j;
|
||||
transpose_16bit_8x8(l + i, out);
|
||||
transpose_16bit_8x8(r + i, out + 8);
|
||||
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
|
||||
idct16_8col(out, out);
|
||||
} else {
|
||||
vpx_iadst16_8col_sse2(out);
|
||||
}
|
||||
|
||||
for (j = 0; j < 16; ++j) {
|
||||
highbd_write_buffer_8(dest + j * stride, out[j], bd);
|
||||
}
|
||||
dest += 8;
|
||||
}
|
||||
} else {
|
||||
__m128i all[4][16];
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
in = all[i];
|
||||
highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]);
|
||||
highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]);
|
||||
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
|
||||
vpx_highbd_idct16_4col_sse4_1(in);
|
||||
} else {
|
||||
highbd_iadst16_4col_sse4_1(in);
|
||||
}
|
||||
input += 4 * 16;
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i += 4) {
|
||||
int j;
|
||||
transpose_32bit_4x4(all[0] + i, out + 0);
|
||||
transpose_32bit_4x4(all[1] + i, out + 4);
|
||||
transpose_32bit_4x4(all[2] + i, out + 8);
|
||||
transpose_32bit_4x4(all[3] + i, out + 12);
|
||||
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
|
||||
vpx_highbd_idct16_4col_sse4_1(out);
|
||||
} else {
|
||||
highbd_iadst16_4col_sse4_1(out);
|
||||
}
|
||||
|
||||
for (j = 0; j < 16; ++j) {
|
||||
highbd_write_buffer_4(dest + j * stride, out[j], bd);
|
||||
}
|
||||
dest += 4;
|
||||
}
|
||||
}
|
||||
}
|
131
vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c
Normal file
131
vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c
Normal file
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
|
||||
#include "vpx_dsp/x86/inv_txfm_sse2.h"
|
||||
#include "vpx_dsp/x86/transpose_sse2.h"
|
||||
#include "vpx_dsp/x86/txfm_common_sse2.h"
|
||||
|
||||
static INLINE void highbd_iadst4_sse4_1(__m128i *const io) {
|
||||
const __m128i pair_c1 = pair_set_epi32(4 * sinpi_1_9, 0);
|
||||
const __m128i pair_c2 = pair_set_epi32(4 * sinpi_2_9, 0);
|
||||
const __m128i pair_c3 = pair_set_epi32(4 * sinpi_3_9, 0);
|
||||
const __m128i pair_c4 = pair_set_epi32(4 * sinpi_4_9, 0);
|
||||
__m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], t0[2], t1[2], t2[2];
|
||||
__m128i temp[2];
|
||||
|
||||
transpose_32bit_4x4(io, io);
|
||||
|
||||
extend_64bit(io[0], temp);
|
||||
s0[0] = _mm_mul_epi32(pair_c1, temp[0]);
|
||||
s0[1] = _mm_mul_epi32(pair_c1, temp[1]);
|
||||
s1[0] = _mm_mul_epi32(pair_c2, temp[0]);
|
||||
s1[1] = _mm_mul_epi32(pair_c2, temp[1]);
|
||||
|
||||
extend_64bit(io[1], temp);
|
||||
s2[0] = _mm_mul_epi32(pair_c3, temp[0]);
|
||||
s2[1] = _mm_mul_epi32(pair_c3, temp[1]);
|
||||
|
||||
extend_64bit(io[2], temp);
|
||||
s3[0] = _mm_mul_epi32(pair_c4, temp[0]);
|
||||
s3[1] = _mm_mul_epi32(pair_c4, temp[1]);
|
||||
s4[0] = _mm_mul_epi32(pair_c1, temp[0]);
|
||||
s4[1] = _mm_mul_epi32(pair_c1, temp[1]);
|
||||
|
||||
extend_64bit(io[3], temp);
|
||||
s5[0] = _mm_mul_epi32(pair_c2, temp[0]);
|
||||
s5[1] = _mm_mul_epi32(pair_c2, temp[1]);
|
||||
s6[0] = _mm_mul_epi32(pair_c4, temp[0]);
|
||||
s6[1] = _mm_mul_epi32(pair_c4, temp[1]);
|
||||
|
||||
t0[0] = _mm_add_epi64(s0[0], s3[0]);
|
||||
t0[1] = _mm_add_epi64(s0[1], s3[1]);
|
||||
t0[0] = _mm_add_epi64(t0[0], s5[0]);
|
||||
t0[1] = _mm_add_epi64(t0[1], s5[1]);
|
||||
t1[0] = _mm_sub_epi64(s1[0], s4[0]);
|
||||
t1[1] = _mm_sub_epi64(s1[1], s4[1]);
|
||||
t1[0] = _mm_sub_epi64(t1[0], s6[0]);
|
||||
t1[1] = _mm_sub_epi64(t1[1], s6[1]);
|
||||
temp[0] = _mm_sub_epi32(io[0], io[2]);
|
||||
temp[0] = _mm_add_epi32(temp[0], io[3]);
|
||||
extend_64bit(temp[0], temp);
|
||||
t2[0] = _mm_mul_epi32(pair_c3, temp[0]);
|
||||
t2[1] = _mm_mul_epi32(pair_c3, temp[1]);
|
||||
|
||||
s0[0] = _mm_add_epi64(t0[0], s2[0]);
|
||||
s0[1] = _mm_add_epi64(t0[1], s2[1]);
|
||||
s1[0] = _mm_add_epi64(t1[0], s2[0]);
|
||||
s1[1] = _mm_add_epi64(t1[1], s2[1]);
|
||||
s3[0] = _mm_add_epi64(t0[0], t1[0]);
|
||||
s3[1] = _mm_add_epi64(t0[1], t1[1]);
|
||||
s3[0] = _mm_sub_epi64(s3[0], s2[0]);
|
||||
s3[1] = _mm_sub_epi64(s3[1], s2[1]);
|
||||
|
||||
s0[0] = dct_const_round_shift_64bit(s0[0]);
|
||||
s0[1] = dct_const_round_shift_64bit(s0[1]);
|
||||
s1[0] = dct_const_round_shift_64bit(s1[0]);
|
||||
s1[1] = dct_const_round_shift_64bit(s1[1]);
|
||||
s2[0] = dct_const_round_shift_64bit(t2[0]);
|
||||
s2[1] = dct_const_round_shift_64bit(t2[1]);
|
||||
s3[0] = dct_const_round_shift_64bit(s3[0]);
|
||||
s3[1] = dct_const_round_shift_64bit(s3[1]);
|
||||
io[0] = pack_4(s0[0], s0[1]);
|
||||
io[1] = pack_4(s1[0], s1[1]);
|
||||
io[2] = pack_4(s2[0], s2[1]);
|
||||
io[3] = pack_4(s3[0], s3[1]);
|
||||
}
|
||||
|
||||
void vp9_highbd_iht4x4_16_add_sse4_1(const tran_low_t *input, uint16_t *dest,
|
||||
int stride, int tx_type, int bd) {
|
||||
__m128i io[4];
|
||||
|
||||
io[0] = _mm_load_si128((const __m128i *)(input + 0));
|
||||
io[1] = _mm_load_si128((const __m128i *)(input + 4));
|
||||
io[2] = _mm_load_si128((const __m128i *)(input + 8));
|
||||
io[3] = _mm_load_si128((const __m128i *)(input + 12));
|
||||
|
||||
if (bd == 8) {
|
||||
__m128i io_short[2];
|
||||
|
||||
io_short[0] = _mm_packs_epi32(io[0], io[1]);
|
||||
io_short[1] = _mm_packs_epi32(io[2], io[3]);
|
||||
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
|
||||
idct4_sse2(io_short);
|
||||
} else {
|
||||
iadst4_sse2(io_short);
|
||||
}
|
||||
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
|
||||
idct4_sse2(io_short);
|
||||
} else {
|
||||
iadst4_sse2(io_short);
|
||||
}
|
||||
io_short[0] = _mm_add_epi16(io_short[0], _mm_set1_epi16(8));
|
||||
io_short[1] = _mm_add_epi16(io_short[1], _mm_set1_epi16(8));
|
||||
io[0] = _mm_srai_epi16(io_short[0], 4);
|
||||
io[1] = _mm_srai_epi16(io_short[1], 4);
|
||||
} else {
|
||||
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
|
||||
highbd_idct4_sse4_1(io);
|
||||
} else {
|
||||
highbd_iadst4_sse4_1(io);
|
||||
}
|
||||
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
|
||||
highbd_idct4_sse4_1(io);
|
||||
} else {
|
||||
highbd_iadst4_sse4_1(io);
|
||||
}
|
||||
io[0] = wraplow_16bit_shift4(io[0], io[1], _mm_set1_epi32(8));
|
||||
io[1] = wraplow_16bit_shift4(io[2], io[3], _mm_set1_epi32(8));
|
||||
}
|
||||
|
||||
recon_and_store_4x4(io, dest, stride, bd);
|
||||
}
|
255
vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c
Normal file
255
vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c
Normal file
@@ -0,0 +1,255 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
|
||||
#include "vpx_dsp/x86/inv_txfm_sse2.h"
|
||||
#include "vpx_dsp/x86/transpose_sse2.h"
|
||||
#include "vpx_dsp/x86/txfm_common_sse2.h"
|
||||
|
||||
static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in,
|
||||
const int c,
|
||||
__m128i *const s) {
|
||||
const __m128i pair_c = pair_set_epi32(4 * c, 0);
|
||||
__m128i x[2];
|
||||
|
||||
extend_64bit(in, x);
|
||||
s[0] = _mm_mul_epi32(pair_c, x[0]);
|
||||
s[1] = _mm_mul_epi32(pair_c, x[1]);
|
||||
}
|
||||
|
||||
static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0,
|
||||
const __m128i in1,
|
||||
const int c0, const int c1,
|
||||
__m128i *const s0,
|
||||
__m128i *const s1) {
|
||||
const __m128i pair_c0 = pair_set_epi32(4 * c0, 0);
|
||||
const __m128i pair_c1 = pair_set_epi32(4 * c1, 0);
|
||||
__m128i t00[2], t01[2], t10[2], t11[2];
|
||||
__m128i x0[2], x1[2];
|
||||
|
||||
extend_64bit(in0, x0);
|
||||
extend_64bit(in1, x1);
|
||||
t00[0] = _mm_mul_epi32(pair_c0, x0[0]);
|
||||
t00[1] = _mm_mul_epi32(pair_c0, x0[1]);
|
||||
t01[0] = _mm_mul_epi32(pair_c0, x1[0]);
|
||||
t01[1] = _mm_mul_epi32(pair_c0, x1[1]);
|
||||
t10[0] = _mm_mul_epi32(pair_c1, x0[0]);
|
||||
t10[1] = _mm_mul_epi32(pair_c1, x0[1]);
|
||||
t11[0] = _mm_mul_epi32(pair_c1, x1[0]);
|
||||
t11[1] = _mm_mul_epi32(pair_c1, x1[1]);
|
||||
|
||||
s0[0] = _mm_add_epi64(t00[0], t11[0]);
|
||||
s0[1] = _mm_add_epi64(t00[1], t11[1]);
|
||||
s1[0] = _mm_sub_epi64(t10[0], t01[0]);
|
||||
s1[1] = _mm_sub_epi64(t10[1], t01[1]);
|
||||
}
|
||||
|
||||
static void highbd_iadst8_sse4_1(__m128i *const io) {
|
||||
__m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
|
||||
__m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2];
|
||||
|
||||
transpose_32bit_4x4x2(io, io);
|
||||
|
||||
// stage 1
|
||||
highbd_iadst_butterfly_sse4_1(io[7], io[0], cospi_2_64, cospi_30_64, s0, s1);
|
||||
highbd_iadst_butterfly_sse4_1(io[3], io[4], cospi_18_64, cospi_14_64, s4, s5);
|
||||
x0[0] = _mm_add_epi64(s0[0], s4[0]);
|
||||
x0[1] = _mm_add_epi64(s0[1], s4[1]);
|
||||
x1[0] = _mm_add_epi64(s1[0], s5[0]);
|
||||
x1[1] = _mm_add_epi64(s1[1], s5[1]);
|
||||
x4[0] = _mm_sub_epi64(s0[0], s4[0]);
|
||||
x4[1] = _mm_sub_epi64(s0[1], s4[1]);
|
||||
x5[0] = _mm_sub_epi64(s1[0], s5[0]);
|
||||
x5[1] = _mm_sub_epi64(s1[1], s5[1]);
|
||||
|
||||
highbd_iadst_butterfly_sse4_1(io[5], io[2], cospi_10_64, cospi_22_64, s2, s3);
|
||||
highbd_iadst_butterfly_sse4_1(io[1], io[6], cospi_26_64, cospi_6_64, s6, s7);
|
||||
x2[0] = _mm_add_epi64(s2[0], s6[0]);
|
||||
x2[1] = _mm_add_epi64(s2[1], s6[1]);
|
||||
x3[0] = _mm_add_epi64(s3[0], s7[0]);
|
||||
x3[1] = _mm_add_epi64(s3[1], s7[1]);
|
||||
x6[0] = _mm_sub_epi64(s2[0], s6[0]);
|
||||
x6[1] = _mm_sub_epi64(s2[1], s6[1]);
|
||||
x7[0] = _mm_sub_epi64(s3[0], s7[0]);
|
||||
x7[1] = _mm_sub_epi64(s3[1], s7[1]);
|
||||
|
||||
x0[0] = dct_const_round_shift_64bit(x0[0]);
|
||||
x0[1] = dct_const_round_shift_64bit(x0[1]);
|
||||
x1[0] = dct_const_round_shift_64bit(x1[0]);
|
||||
x1[1] = dct_const_round_shift_64bit(x1[1]);
|
||||
x2[0] = dct_const_round_shift_64bit(x2[0]);
|
||||
x2[1] = dct_const_round_shift_64bit(x2[1]);
|
||||
x3[0] = dct_const_round_shift_64bit(x3[0]);
|
||||
x3[1] = dct_const_round_shift_64bit(x3[1]);
|
||||
x4[0] = dct_const_round_shift_64bit(x4[0]);
|
||||
x4[1] = dct_const_round_shift_64bit(x4[1]);
|
||||
x5[0] = dct_const_round_shift_64bit(x5[0]);
|
||||
x5[1] = dct_const_round_shift_64bit(x5[1]);
|
||||
x6[0] = dct_const_round_shift_64bit(x6[0]);
|
||||
x6[1] = dct_const_round_shift_64bit(x6[1]);
|
||||
x7[0] = dct_const_round_shift_64bit(x7[0]);
|
||||
x7[1] = dct_const_round_shift_64bit(x7[1]);
|
||||
s0[0] = pack_4(x0[0], x0[1]); // s0 = x0;
|
||||
s1[0] = pack_4(x1[0], x1[1]); // s1 = x1;
|
||||
s2[0] = pack_4(x2[0], x2[1]); // s2 = x2;
|
||||
s3[0] = pack_4(x3[0], x3[1]); // s3 = x3;
|
||||
x4[0] = pack_4(x4[0], x4[1]);
|
||||
x5[0] = pack_4(x5[0], x5[1]);
|
||||
x6[0] = pack_4(x6[0], x6[1]);
|
||||
x7[0] = pack_4(x7[0], x7[1]);
|
||||
|
||||
// stage 2
|
||||
x0[0] = _mm_add_epi32(s0[0], s2[0]);
|
||||
x1[0] = _mm_add_epi32(s1[0], s3[0]);
|
||||
x2[0] = _mm_sub_epi32(s0[0], s2[0]);
|
||||
x3[0] = _mm_sub_epi32(s1[0], s3[0]);
|
||||
|
||||
highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5);
|
||||
highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6);
|
||||
|
||||
x4[0] = _mm_add_epi64(s4[0], s6[0]);
|
||||
x4[1] = _mm_add_epi64(s4[1], s6[1]);
|
||||
x5[0] = _mm_add_epi64(s5[0], s7[0]);
|
||||
x5[1] = _mm_add_epi64(s5[1], s7[1]);
|
||||
x6[0] = _mm_sub_epi64(s4[0], s6[0]);
|
||||
x6[1] = _mm_sub_epi64(s4[1], s6[1]);
|
||||
x7[0] = _mm_sub_epi64(s5[0], s7[0]);
|
||||
x7[1] = _mm_sub_epi64(s5[1], s7[1]);
|
||||
x4[0] = dct_const_round_shift_64bit(x4[0]);
|
||||
x4[1] = dct_const_round_shift_64bit(x4[1]);
|
||||
x5[0] = dct_const_round_shift_64bit(x5[0]);
|
||||
x5[1] = dct_const_round_shift_64bit(x5[1]);
|
||||
x6[0] = dct_const_round_shift_64bit(x6[0]);
|
||||
x6[1] = dct_const_round_shift_64bit(x6[1]);
|
||||
x7[0] = dct_const_round_shift_64bit(x7[0]);
|
||||
x7[1] = dct_const_round_shift_64bit(x7[1]);
|
||||
x4[0] = pack_4(x4[0], x4[1]);
|
||||
x5[0] = pack_4(x5[0], x5[1]);
|
||||
x6[0] = pack_4(x6[0], x6[1]);
|
||||
x7[0] = pack_4(x7[0], x7[1]);
|
||||
|
||||
// stage 3
|
||||
s2[0] = _mm_add_epi32(x2[0], x3[0]);
|
||||
s3[0] = _mm_sub_epi32(x2[0], x3[0]);
|
||||
s6[0] = _mm_add_epi32(x6[0], x7[0]);
|
||||
s7[0] = _mm_sub_epi32(x6[0], x7[0]);
|
||||
highbd_iadst_half_butterfly_sse4_1(s2[0], cospi_16_64, s2);
|
||||
highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3);
|
||||
highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6);
|
||||
highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7);
|
||||
|
||||
x2[0] = dct_const_round_shift_64bit(s2[0]);
|
||||
x2[1] = dct_const_round_shift_64bit(s2[1]);
|
||||
x3[0] = dct_const_round_shift_64bit(s3[0]);
|
||||
x3[1] = dct_const_round_shift_64bit(s3[1]);
|
||||
x6[0] = dct_const_round_shift_64bit(s6[0]);
|
||||
x6[1] = dct_const_round_shift_64bit(s6[1]);
|
||||
x7[0] = dct_const_round_shift_64bit(s7[0]);
|
||||
x7[1] = dct_const_round_shift_64bit(s7[1]);
|
||||
x2[0] = pack_4(x2[0], x2[1]);
|
||||
x3[0] = pack_4(x3[0], x3[1]);
|
||||
x6[0] = pack_4(x6[0], x6[1]);
|
||||
x7[0] = pack_4(x7[0], x7[1]);
|
||||
|
||||
io[0] = x0[0];
|
||||
io[1] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]);
|
||||
io[2] = x6[0];
|
||||
io[3] = _mm_sub_epi32(_mm_setzero_si128(), x2[0]);
|
||||
io[4] = x3[0];
|
||||
io[5] = _mm_sub_epi32(_mm_setzero_si128(), x7[0]);
|
||||
io[6] = x5[0];
|
||||
io[7] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]);
|
||||
}
|
||||
|
||||
void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest,
|
||||
int stride, int tx_type, int bd) {
|
||||
__m128i io[16];
|
||||
|
||||
io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0));
|
||||
io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4));
|
||||
io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0));
|
||||
io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4));
|
||||
io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0));
|
||||
io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4));
|
||||
io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0));
|
||||
io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4));
|
||||
io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0));
|
||||
io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4));
|
||||
io[9] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 0));
|
||||
io[13] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 4));
|
||||
io[10] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 0));
|
||||
io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4));
|
||||
io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0));
|
||||
io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4));
|
||||
|
||||
if (bd == 8) {
|
||||
__m128i io_short[8];
|
||||
|
||||
io_short[0] = _mm_packs_epi32(io[0], io[4]);
|
||||
io_short[1] = _mm_packs_epi32(io[1], io[5]);
|
||||
io_short[2] = _mm_packs_epi32(io[2], io[6]);
|
||||
io_short[3] = _mm_packs_epi32(io[3], io[7]);
|
||||
io_short[4] = _mm_packs_epi32(io[8], io[12]);
|
||||
io_short[5] = _mm_packs_epi32(io[9], io[13]);
|
||||
io_short[6] = _mm_packs_epi32(io[10], io[14]);
|
||||
io_short[7] = _mm_packs_epi32(io[11], io[15]);
|
||||
|
||||
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
|
||||
vpx_idct8_sse2(io_short);
|
||||
} else {
|
||||
iadst8_sse2(io_short);
|
||||
}
|
||||
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
|
||||
vpx_idct8_sse2(io_short);
|
||||
} else {
|
||||
iadst8_sse2(io_short);
|
||||
}
|
||||
round_shift_8x8(io_short, io);
|
||||
} else {
|
||||
__m128i temp[4];
|
||||
|
||||
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
|
||||
vpx_highbd_idct8x8_half1d_sse4_1(io);
|
||||
vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
|
||||
} else {
|
||||
highbd_iadst8_sse4_1(io);
|
||||
highbd_iadst8_sse4_1(&io[8]);
|
||||
}
|
||||
|
||||
temp[0] = io[4];
|
||||
temp[1] = io[5];
|
||||
temp[2] = io[6];
|
||||
temp[3] = io[7];
|
||||
io[4] = io[8];
|
||||
io[5] = io[9];
|
||||
io[6] = io[10];
|
||||
io[7] = io[11];
|
||||
|
||||
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
|
||||
vpx_highbd_idct8x8_half1d_sse4_1(io);
|
||||
io[8] = temp[0];
|
||||
io[9] = temp[1];
|
||||
io[10] = temp[2];
|
||||
io[11] = temp[3];
|
||||
vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
|
||||
} else {
|
||||
highbd_iadst8_sse4_1(io);
|
||||
io[8] = temp[0];
|
||||
io[9] = temp[1];
|
||||
io[10] = temp[2];
|
||||
io[11] = temp[3];
|
||||
highbd_iadst8_sse4_1(&io[8]);
|
||||
}
|
||||
highbd_idct8x8_final_round(io);
|
||||
}
|
||||
recon_and_store_8x8(io, dest, stride, bd);
|
||||
}
|
@@ -10,8 +10,6 @@
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vpx_dsp/x86/inv_txfm_sse2.h"
|
||||
#include "vpx_dsp/x86/txfm_common_sse2.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int tx_type) {
|
||||
@@ -22,23 +20,23 @@ void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
in[1] = load_input_data8(input + 8);
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // DCT_DCT
|
||||
case DCT_DCT:
|
||||
idct4_sse2(in);
|
||||
idct4_sse2(in);
|
||||
break;
|
||||
case 1: // ADST_DCT
|
||||
case ADST_DCT:
|
||||
idct4_sse2(in);
|
||||
iadst4_sse2(in);
|
||||
break;
|
||||
case 2: // DCT_ADST
|
||||
case DCT_ADST:
|
||||
iadst4_sse2(in);
|
||||
idct4_sse2(in);
|
||||
break;
|
||||
case 3: // ADST_ADST
|
||||
default:
|
||||
assert(tx_type == ADST_ADST);
|
||||
iadst4_sse2(in);
|
||||
iadst4_sse2(in);
|
||||
break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
|
||||
// Final round and shift
|
||||
@@ -67,23 +65,23 @@ void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
in[7] = load_input_data8(input + 8 * 7);
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // DCT_DCT
|
||||
idct8_sse2(in);
|
||||
idct8_sse2(in);
|
||||
case DCT_DCT:
|
||||
vpx_idct8_sse2(in);
|
||||
vpx_idct8_sse2(in);
|
||||
break;
|
||||
case 1: // ADST_DCT
|
||||
idct8_sse2(in);
|
||||
case ADST_DCT:
|
||||
vpx_idct8_sse2(in);
|
||||
iadst8_sse2(in);
|
||||
break;
|
||||
case 2: // DCT_ADST
|
||||
case DCT_ADST:
|
||||
iadst8_sse2(in);
|
||||
idct8_sse2(in);
|
||||
vpx_idct8_sse2(in);
|
||||
break;
|
||||
case 3: // ADST_ADST
|
||||
default:
|
||||
assert(tx_type == ADST_ADST);
|
||||
iadst8_sse2(in);
|
||||
iadst8_sse2(in);
|
||||
break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
|
||||
// Final rounding and shift
|
||||
@@ -201,23 +199,23 @@ void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
|
||||
load_buffer_8x16(input, in1);
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // DCT_DCT
|
||||
case DCT_DCT:
|
||||
idct16_sse2(in0, in1);
|
||||
idct16_sse2(in0, in1);
|
||||
break;
|
||||
case 1: // ADST_DCT
|
||||
case ADST_DCT:
|
||||
idct16_sse2(in0, in1);
|
||||
iadst16_sse2(in0, in1);
|
||||
break;
|
||||
case 2: // DCT_ADST
|
||||
case DCT_ADST:
|
||||
iadst16_sse2(in0, in1);
|
||||
idct16_sse2(in0, in1);
|
||||
break;
|
||||
case 3: // ADST_ADST
|
||||
default:
|
||||
assert(tx_type == ADST_ADST);
|
||||
iadst16_sse2(in0, in1);
|
||||
iadst16_sse2(in0, in1);
|
||||
break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
|
||||
write_buffer_8x16(dest, in0, stride);
|
||||
|
@@ -464,10 +464,6 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
|
||||
cr->rate_ratio_qdelta = VPXMAX(cr->rate_ratio_qdelta, 2.5);
|
||||
}
|
||||
}
|
||||
if (cpi->svc.spatial_layer_id > 0) {
|
||||
cr->motion_thresh = 4;
|
||||
cr->rate_boost_fac = 12;
|
||||
}
|
||||
if (cpi->oxcf.rc_mode == VPX_VBR) {
|
||||
// To be adjusted for VBR mode, e.g., based on gf period and boost.
|
||||
// For now use smaller qp-delta (than CBR), no second boosted seg, and
|
||||
|
@@ -12,7 +12,10 @@
|
||||
#include "vp9/encoder/vp9_encoder.h"
|
||||
|
||||
static const BLOCK_SIZE square[] = {
|
||||
BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64,
|
||||
BLOCK_8X8,
|
||||
BLOCK_16X16,
|
||||
BLOCK_32X32,
|
||||
BLOCK_64X64,
|
||||
};
|
||||
|
||||
static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
|
||||
|
@@ -189,11 +189,12 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
|
||||
int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx,
|
||||
int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv,
|
||||
int num_spatial_layers, int width, int lst_fb_idx, int gld_fb_idx,
|
||||
int use_svc) {
|
||||
int use_svc, int spatial_layer) {
|
||||
const int sse_diff = (ctx->newmv_sse == UINT_MAX)
|
||||
? 0
|
||||
: ((int)ctx->zeromv_sse - (int)ctx->newmv_sse);
|
||||
int frame;
|
||||
int denoise_layer_idx = 0;
|
||||
MACROBLOCKD *filter_mbd = &mb->e_mbd;
|
||||
MODE_INFO *mi = filter_mbd->mi[0];
|
||||
MODE_INFO saved_mi;
|
||||
@@ -254,6 +255,10 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
|
||||
frame = lst_fb_idx + 1;
|
||||
else if (frame == GOLDEN_FRAME)
|
||||
frame = gld_fb_idx + 1;
|
||||
// Shift for the second spatial layer.
|
||||
if (num_spatial_layers - spatial_layer == 2)
|
||||
frame = frame + denoiser->num_ref_frames;
|
||||
denoise_layer_idx = num_spatial_layers - spatial_layer - 1;
|
||||
}
|
||||
|
||||
if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) {
|
||||
@@ -289,18 +294,21 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
|
||||
denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col);
|
||||
filter_mbd->plane[2].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;
|
||||
|
||||
filter_mbd->plane[0].dst.buf =
|
||||
block_start(denoiser->mc_running_avg_y.y_buffer,
|
||||
denoiser->mc_running_avg_y.y_stride, mi_row, mi_col);
|
||||
filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride;
|
||||
filter_mbd->plane[1].dst.buf =
|
||||
block_start(denoiser->mc_running_avg_y.u_buffer,
|
||||
denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
|
||||
filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride;
|
||||
filter_mbd->plane[2].dst.buf =
|
||||
block_start(denoiser->mc_running_avg_y.v_buffer,
|
||||
denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
|
||||
filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride;
|
||||
filter_mbd->plane[0].dst.buf = block_start(
|
||||
denoiser->mc_running_avg_y[denoise_layer_idx].y_buffer,
|
||||
denoiser->mc_running_avg_y[denoise_layer_idx].y_stride, mi_row, mi_col);
|
||||
filter_mbd->plane[0].dst.stride =
|
||||
denoiser->mc_running_avg_y[denoise_layer_idx].y_stride;
|
||||
filter_mbd->plane[1].dst.buf = block_start(
|
||||
denoiser->mc_running_avg_y[denoise_layer_idx].u_buffer,
|
||||
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col);
|
||||
filter_mbd->plane[1].dst.stride =
|
||||
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride;
|
||||
filter_mbd->plane[2].dst.buf = block_start(
|
||||
denoiser->mc_running_avg_y[denoise_layer_idx].v_buffer,
|
||||
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col);
|
||||
filter_mbd->plane[2].dst.stride =
|
||||
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride;
|
||||
|
||||
set_ref_ptrs(cm, filter_mbd, saved_frame, NONE);
|
||||
vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs);
|
||||
@@ -324,9 +332,17 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
|
||||
int zeromv_filter = 0;
|
||||
VP9_DENOISER *denoiser = &cpi->denoiser;
|
||||
VP9_DENOISER_DECISION decision = COPY_BLOCK;
|
||||
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
|
||||
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
|
||||
|
||||
const int shift =
|
||||
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2
|
||||
? denoiser->num_ref_frames
|
||||
: 0;
|
||||
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME + shift];
|
||||
const int denoise_layer_index =
|
||||
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id - 1;
|
||||
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y[denoise_layer_index];
|
||||
uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
|
||||
|
||||
uint8_t *mc_avg_start =
|
||||
block_start(mc_avg.y_buffer, mc_avg.y_stride, mi_row, mi_col);
|
||||
struct buf_2d src = mb->plane[0].src;
|
||||
@@ -381,7 +397,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
|
||||
&cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,
|
||||
motion_magnitude, is_skin, &zeromv_filter, consec_zeromv,
|
||||
cpi->svc.number_spatial_layers, cpi->Source->y_width, cpi->lst_fb_idx,
|
||||
cpi->gld_fb_idx, cpi->use_svc);
|
||||
cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id);
|
||||
|
||||
if (decision == FILTER_BLOCK) {
|
||||
decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start,
|
||||
@@ -432,7 +448,8 @@ void vp9_denoiser_update_frame_info(
|
||||
VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
|
||||
int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
|
||||
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,
|
||||
int svc_base_is_key) {
|
||||
int svc_base_is_key, int second_spatial_layer) {
|
||||
const int shift = second_spatial_layer ? denoiser->num_ref_frames : 0;
|
||||
// Copy source into denoised reference buffers on KEY_FRAME or
|
||||
// if the just encoded frame was resized. For SVC, copy source if the base
|
||||
// spatial layer was key frame.
|
||||
@@ -441,8 +458,8 @@ void vp9_denoiser_update_frame_info(
|
||||
int i;
|
||||
// Start at 1 so as not to overwrite the INTRA_FRAME
|
||||
for (i = 1; i < denoiser->num_ref_frames; ++i) {
|
||||
if (denoiser->running_avg_y[i].buffer_alloc != NULL)
|
||||
copy_frame(&denoiser->running_avg_y[i], &src);
|
||||
if (denoiser->running_avg_y[i + shift].buffer_alloc != NULL)
|
||||
copy_frame(&denoiser->running_avg_y[i + shift], &src);
|
||||
}
|
||||
denoiser->reset = 0;
|
||||
return;
|
||||
@@ -451,29 +468,29 @@ void vp9_denoiser_update_frame_info(
|
||||
// If more than one refresh occurs, must copy frame buffer.
|
||||
if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > 1) {
|
||||
if (refresh_alt_ref_frame) {
|
||||
copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1],
|
||||
&denoiser->running_avg_y[INTRA_FRAME]);
|
||||
copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1 + shift],
|
||||
&denoiser->running_avg_y[INTRA_FRAME + shift]);
|
||||
}
|
||||
if (refresh_golden_frame) {
|
||||
copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1],
|
||||
&denoiser->running_avg_y[INTRA_FRAME]);
|
||||
copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1 + shift],
|
||||
&denoiser->running_avg_y[INTRA_FRAME + shift]);
|
||||
}
|
||||
if (refresh_last_frame) {
|
||||
copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1],
|
||||
&denoiser->running_avg_y[INTRA_FRAME]);
|
||||
copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1 + shift],
|
||||
&denoiser->running_avg_y[INTRA_FRAME + shift]);
|
||||
}
|
||||
} else {
|
||||
if (refresh_alt_ref_frame) {
|
||||
swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1],
|
||||
&denoiser->running_avg_y[INTRA_FRAME]);
|
||||
swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1 + shift],
|
||||
&denoiser->running_avg_y[INTRA_FRAME + shift]);
|
||||
}
|
||||
if (refresh_golden_frame) {
|
||||
swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1],
|
||||
&denoiser->running_avg_y[INTRA_FRAME]);
|
||||
swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1 + shift],
|
||||
&denoiser->running_avg_y[INTRA_FRAME + shift]);
|
||||
}
|
||||
if (refresh_last_frame) {
|
||||
swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1],
|
||||
&denoiser->running_avg_y[INTRA_FRAME]);
|
||||
swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1 + shift],
|
||||
&denoiser->running_avg_y[INTRA_FRAME + shift]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -522,44 +539,90 @@ static int vp9_denoiser_realloc_svc_helper(VP9_COMMON *cm,
|
||||
}
|
||||
|
||||
int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser,
|
||||
int refresh_alt, int refresh_gld, int refresh_lst,
|
||||
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx) {
|
||||
int svc_buf_shift, int refresh_alt,
|
||||
int refresh_gld, int refresh_lst, int alt_fb_idx,
|
||||
int gld_fb_idx, int lst_fb_idx) {
|
||||
int fail = 0;
|
||||
if (refresh_alt) {
|
||||
// Increase the frame buffer index by 1 to map it to the buffer index in the
|
||||
// denoiser.
|
||||
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, alt_fb_idx + 1);
|
||||
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
|
||||
alt_fb_idx + 1 + svc_buf_shift);
|
||||
if (fail) return 1;
|
||||
}
|
||||
if (refresh_gld) {
|
||||
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, gld_fb_idx + 1);
|
||||
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
|
||||
gld_fb_idx + 1 + svc_buf_shift);
|
||||
if (fail) return 1;
|
||||
}
|
||||
if (refresh_lst) {
|
||||
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, lst_fb_idx + 1);
|
||||
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
|
||||
lst_fb_idx + 1 + svc_buf_shift);
|
||||
if (fail) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser,
|
||||
int width, int height, int ssx, int ssy,
|
||||
int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
|
||||
int use_svc, int noise_sen, int width, int height,
|
||||
int ssx, int ssy,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
int use_highbitdepth,
|
||||
#endif
|
||||
int border) {
|
||||
int i, fail, init_num_ref_frames;
|
||||
int i, layer, fail, init_num_ref_frames;
|
||||
const int legacy_byte_alignment = 0;
|
||||
int num_layers = 1;
|
||||
int scaled_width = width;
|
||||
int scaled_height = height;
|
||||
if (use_svc) {
|
||||
LAYER_CONTEXT *lc = &svc->layer_context[svc->spatial_layer_id *
|
||||
svc->number_temporal_layers +
|
||||
svc->temporal_layer_id];
|
||||
get_layer_resolution(width, height, lc->scaling_factor_num,
|
||||
lc->scaling_factor_den, &scaled_width, &scaled_height);
|
||||
// For SVC: only denoise at most 2 spatial (highest) layers.
|
||||
if (noise_sen >= 2)
|
||||
// Denoise from one spatial layer below the top.
|
||||
svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 2, 0);
|
||||
else
|
||||
// Only denoise the top spatial layer.
|
||||
svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 1, 0);
|
||||
num_layers = svc->number_spatial_layers - svc->first_layer_denoise;
|
||||
}
|
||||
assert(denoiser != NULL);
|
||||
|
||||
denoiser->num_ref_frames = use_svc ? SVC_REF_FRAMES : NONSVC_REF_FRAMES;
|
||||
init_num_ref_frames = use_svc ? MAX_REF_FRAMES : NONSVC_REF_FRAMES;
|
||||
denoiser->num_layers = num_layers;
|
||||
CHECK_MEM_ERROR(cm, denoiser->running_avg_y,
|
||||
vpx_calloc(denoiser->num_ref_frames * num_layers,
|
||||
sizeof(denoiser->running_avg_y[0])));
|
||||
CHECK_MEM_ERROR(
|
||||
cm, denoiser->running_avg_y,
|
||||
vpx_calloc(denoiser->num_ref_frames, sizeof(denoiser->running_avg_y[0])));
|
||||
for (i = 0; i < init_num_ref_frames; ++i) {
|
||||
fail = vpx_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height,
|
||||
ssx, ssy,
|
||||
cm, denoiser->mc_running_avg_y,
|
||||
vpx_calloc(num_layers, sizeof(denoiser->mc_running_avg_y[0])));
|
||||
|
||||
for (layer = 0; layer < num_layers; ++layer) {
|
||||
const int denoise_width = (layer == 0) ? width : scaled_width;
|
||||
const int denoise_height = (layer == 0) ? height : scaled_height;
|
||||
for (i = 0; i < init_num_ref_frames; ++i) {
|
||||
fail = vpx_alloc_frame_buffer(
|
||||
&denoiser->running_avg_y[i + denoiser->num_ref_frames * layer],
|
||||
denoise_width, denoise_height, ssx, ssy,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
use_highbitdepth,
|
||||
#endif
|
||||
border, legacy_byte_alignment);
|
||||
if (fail) {
|
||||
vp9_denoiser_free(denoiser);
|
||||
return 1;
|
||||
}
|
||||
#ifdef OUTPUT_YUV_DENOISED
|
||||
make_grayscale(&denoiser->running_avg_y[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y[layer],
|
||||
denoise_width, denoise_height, ssx, ssy,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
use_highbitdepth,
|
||||
#endif
|
||||
@@ -568,22 +631,10 @@ int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser,
|
||||
vp9_denoiser_free(denoiser);
|
||||
return 1;
|
||||
}
|
||||
#ifdef OUTPUT_YUV_DENOISED
|
||||
make_grayscale(&denoiser->running_avg_y[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height, ssx,
|
||||
ssy,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
use_highbitdepth,
|
||||
#endif
|
||||
border, legacy_byte_alignment);
|
||||
if (fail) {
|
||||
vp9_denoiser_free(denoiser);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// denoiser->last_source only used for noise_estimation, so only for top
|
||||
// layer.
|
||||
fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height, ssx, ssy,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
use_highbitdepth,
|
||||
@@ -609,12 +660,18 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser) {
|
||||
return;
|
||||
}
|
||||
denoiser->frame_buffer_initialized = 0;
|
||||
for (i = 0; i < denoiser->num_ref_frames; ++i) {
|
||||
for (i = 0; i < denoiser->num_ref_frames * denoiser->num_layers; ++i) {
|
||||
vpx_free_frame_buffer(&denoiser->running_avg_y[i]);
|
||||
}
|
||||
vpx_free(denoiser->running_avg_y);
|
||||
denoiser->running_avg_y = NULL;
|
||||
vpx_free_frame_buffer(&denoiser->mc_running_avg_y);
|
||||
|
||||
for (i = 0; i < denoiser->num_layers; ++i) {
|
||||
vpx_free_frame_buffer(&denoiser->mc_running_avg_y[i]);
|
||||
}
|
||||
|
||||
vpx_free(denoiser->mc_running_avg_y);
|
||||
denoiser->mc_running_avg_y = NULL;
|
||||
vpx_free_frame_buffer(&denoiser->last_source);
|
||||
}
|
||||
|
||||
|
@@ -44,11 +44,12 @@ typedef enum vp9_denoiser_level {
|
||||
|
||||
typedef struct vp9_denoiser {
|
||||
YV12_BUFFER_CONFIG *running_avg_y;
|
||||
YV12_BUFFER_CONFIG mc_running_avg_y;
|
||||
YV12_BUFFER_CONFIG *mc_running_avg_y;
|
||||
YV12_BUFFER_CONFIG last_source;
|
||||
int frame_buffer_initialized;
|
||||
int reset;
|
||||
int num_ref_frames;
|
||||
int num_layers;
|
||||
VP9_DENOISER_LEVEL denoising_level;
|
||||
VP9_DENOISER_LEVEL prev_denoising_level;
|
||||
} VP9_DENOISER;
|
||||
@@ -66,12 +67,13 @@ typedef struct {
|
||||
} VP9_PICKMODE_CTX_DEN;
|
||||
|
||||
struct VP9_COMP;
|
||||
struct SVC;
|
||||
|
||||
void vp9_denoiser_update_frame_info(
|
||||
VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
|
||||
int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
|
||||
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,
|
||||
int svc_base_is_key);
|
||||
int svc_base_is_key, int second_spatial_layer);
|
||||
|
||||
void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
|
||||
int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,
|
||||
@@ -84,11 +86,13 @@ void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse,
|
||||
PICK_MODE_CONTEXT *ctx);
|
||||
|
||||
int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser,
|
||||
int refresh_alt, int refresh_gld, int refresh_lst,
|
||||
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx);
|
||||
int svc_buf_shift, int refresh_alt,
|
||||
int refresh_gld, int refresh_lst, int alt_fb_idx,
|
||||
int gld_fb_idx, int lst_fb_idx);
|
||||
|
||||
int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser,
|
||||
int width, int height, int ssx, int ssy,
|
||||
int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
|
||||
int use_svc, int noise_sen, int width, int height,
|
||||
int ssx, int ssy,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
int use_highbitdepth,
|
||||
#endif
|
||||
|
@@ -1513,9 +1513,9 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_key_frame || (low_res &&
|
||||
vt.split[i].split[j].part_variances.none.variance >
|
||||
threshold_4x4avg)) {
|
||||
if (is_key_frame ||
|
||||
(low_res && vt.split[i].split[j].part_variances.none.variance >
|
||||
threshold_4x4avg)) {
|
||||
force_split[split_index] = 0;
|
||||
// Go down to 4x4 down-sampling for variance.
|
||||
variance4x4downsample[i2 + j] = 1;
|
||||
@@ -3403,9 +3403,10 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
|
||||
|
||||
// Rate and distortion based partition search termination clause.
|
||||
if (!cpi->sf.ml_partition_search_early_termination &&
|
||||
!x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
|
||||
(best_rdc.dist < dist_breakout_thr &&
|
||||
best_rdc.rate < rate_breakout_thr))) {
|
||||
!x->e_mbd.lossless &&
|
||||
((best_rdc.dist < (dist_breakout_thr >> 2)) ||
|
||||
(best_rdc.dist < dist_breakout_thr &&
|
||||
best_rdc.rate < rate_breakout_thr))) {
|
||||
do_rect = 0;
|
||||
}
|
||||
}
|
||||
@@ -4620,8 +4621,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
|
||||
|
||||
if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
|
||||
if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
|
||||
CHECK_MEM_ERROR(cm, cpi->tile_data, vpx_malloc(tile_cols * tile_rows *
|
||||
sizeof(*cpi->tile_data)));
|
||||
CHECK_MEM_ERROR(
|
||||
cm, cpi->tile_data,
|
||||
vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
|
||||
cpi->allocated_tiles = tile_cols * tile_rows;
|
||||
|
||||
for (tile_row = 0; tile_row < tile_rows; ++tile_row)
|
||||
|
@@ -50,7 +50,8 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
|
||||
}
|
||||
|
||||
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
|
||||
{ 10, 6 }, { 8, 5 },
|
||||
{ 10, 6 },
|
||||
{ 8, 5 },
|
||||
};
|
||||
|
||||
// 'num' can be negative, but 'shift' must be non-negative.
|
||||
@@ -200,9 +201,9 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
|
||||
const int band_next = band_translate[i + 1];
|
||||
const int token_next =
|
||||
(i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
|
||||
unsigned int(
|
||||
*const token_costs_next)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
|
||||
token_costs + band_next;
|
||||
unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
|
||||
[ENTROPY_TOKENS] =
|
||||
token_costs + band_next;
|
||||
token_cache[rc] = vp9_pt_energy_class[t0];
|
||||
ctx_next = get_coef_context(nb, token_cache, i + 1);
|
||||
token_tree_sel_next = (x == 0);
|
||||
|
@@ -65,12 +65,12 @@
|
||||
#define AM_SEGMENT_ID_INACTIVE 7
|
||||
#define AM_SEGMENT_ID_ACTIVE 0
|
||||
|
||||
#define ALTREF_HIGH_PRECISION_MV 1 // Whether to use high precision mv
|
||||
// for altref computation.
|
||||
#define HIGH_PRECISION_MV_QTHRESH 200 // Q threshold for high precision
|
||||
// mv. Choose a very high value for
|
||||
// now so that HIGH_PRECISION is always
|
||||
// chosen.
|
||||
// Whether to use high precision mv for altref computation.
|
||||
#define ALTREF_HIGH_PRECISION_MV 1
|
||||
|
||||
// Q threshold for high precision mv. Choose a very high value for now so that
|
||||
// HIGH_PRECISION is always chosen.
|
||||
#define HIGH_PRECISION_MV_QTHRESH 200
|
||||
|
||||
#define FRAME_SIZE_FACTOR 128 // empirical params for context model threshold
|
||||
#define FRAME_RATE_FACTOR 8
|
||||
@@ -437,34 +437,37 @@ static int is_psnr_calc_enabled(VP9_COMP *cpi) {
|
||||
|
||||
/* clang-format off */
|
||||
const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
|
||||
{ LEVEL_1, 829440, 36864, 200, 400, 2, 1, 4, 8 },
|
||||
{ LEVEL_1_1, 2764800, 73728, 800, 1000, 2, 1, 4, 8 },
|
||||
{ LEVEL_2, 4608000, 122880, 1800, 1500, 2, 1, 4, 8 },
|
||||
{ LEVEL_2_1, 9216000, 245760, 3600, 2800, 2, 2, 4, 8 },
|
||||
{ LEVEL_3, 20736000, 552960, 7200, 6000, 2, 4, 4, 8 },
|
||||
{ LEVEL_3_1, 36864000, 983040, 12000, 10000, 2, 4, 4, 8 },
|
||||
{ LEVEL_4, 83558400, 2228224, 18000, 16000, 4, 4, 4, 8 },
|
||||
{ LEVEL_4_1, 160432128, 2228224, 30000, 18000, 4, 4, 5, 6 },
|
||||
{ LEVEL_5, 311951360, 8912896, 60000, 36000, 6, 8, 6, 4 },
|
||||
{ LEVEL_5_1, 588251136, 8912896, 120000, 46000, 8, 8, 10, 4 },
|
||||
// sample rate size breadth bitrate cpb
|
||||
{ LEVEL_1, 829440, 36864, 512, 200, 400, 2, 1, 4, 8 },
|
||||
{ LEVEL_1_1, 2764800, 73728, 768, 800, 1000, 2, 1, 4, 8 },
|
||||
{ LEVEL_2, 4608000, 122880, 960, 1800, 1500, 2, 1, 4, 8 },
|
||||
{ LEVEL_2_1, 9216000, 245760, 1344, 3600, 2800, 2, 2, 4, 8 },
|
||||
{ LEVEL_3, 20736000, 552960, 2048, 7200, 6000, 2, 4, 4, 8 },
|
||||
{ LEVEL_3_1, 36864000, 983040, 2752, 12000, 10000, 2, 4, 4, 8 },
|
||||
{ LEVEL_4, 83558400, 2228224, 4160, 18000, 16000, 4, 4, 4, 8 },
|
||||
{ LEVEL_4_1, 160432128, 2228224, 4160, 30000, 18000, 4, 4, 5, 6 },
|
||||
{ LEVEL_5, 311951360, 8912896, 8384, 60000, 36000, 6, 8, 6, 4 },
|
||||
{ LEVEL_5_1, 588251136, 8912896, 8384, 120000, 46000, 8, 8, 10, 4 },
|
||||
// TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
|
||||
// they are finalized (currently tentative).
|
||||
{ LEVEL_5_2, 1176502272, 8912896, 180000, 90000, 8, 8, 10, 4 },
|
||||
{ LEVEL_6, 1176502272, 35651584, 180000, 90000, 8, 16, 10, 4 },
|
||||
{ LEVEL_6_1, 2353004544u, 35651584, 240000, 180000, 8, 16, 10, 4 },
|
||||
{ LEVEL_6_2, 4706009088u, 35651584, 480000, 360000, 8, 16, 10, 4 },
|
||||
{ LEVEL_5_2, 1176502272, 8912896, 8384, 180000, 90000, 8, 8, 10, 4 },
|
||||
{ LEVEL_6, 1176502272, 35651584, 16832, 180000, 90000, 8, 16, 10, 4 },
|
||||
{ LEVEL_6_1, 2353004544u, 35651584, 16832, 240000, 180000, 8, 16, 10, 4 },
|
||||
{ LEVEL_6_2, 4706009088u, 35651584, 16832, 480000, 360000, 8, 16, 10, 4 },
|
||||
};
|
||||
/* clang-format on */
|
||||
|
||||
static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] =
|
||||
{ "The average bit-rate is too high.",
|
||||
"The picture size is too large.",
|
||||
"The luma sample rate is too large.",
|
||||
"The CPB size is too large.",
|
||||
"The compression ratio is too small",
|
||||
"Too many column tiles are used.",
|
||||
"The alt-ref distance is too small.",
|
||||
"Too many reference buffers are used." };
|
||||
static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] = {
|
||||
"The average bit-rate is too high.",
|
||||
"The picture size is too large.",
|
||||
"The picture width/height is too large.",
|
||||
"The luma sample rate is too large.",
|
||||
"The CPB size is too large.",
|
||||
"The compression ratio is too small",
|
||||
"Too many column tiles are used.",
|
||||
"The alt-ref distance is too small.",
|
||||
"Too many reference buffers are used."
|
||||
};
|
||||
|
||||
static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
|
||||
switch (mode) {
|
||||
@@ -544,6 +547,74 @@ static void apply_active_map(VP9_COMP *cpi) {
|
||||
}
|
||||
}
|
||||
|
||||
static void apply_roi_map(VP9_COMP *cpi) {
|
||||
VP9_COMMON *cm = &cpi->common;
|
||||
struct segmentation *const seg = &cm->seg;
|
||||
vpx_roi_map_t *roi = &cpi->roi;
|
||||
const int *delta_q = roi->delta_q;
|
||||
const int *delta_lf = roi->delta_lf;
|
||||
const int *skip = roi->skip;
|
||||
int ref_frame[8];
|
||||
int internal_delta_q[MAX_SEGMENTS];
|
||||
int i;
|
||||
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
|
||||
VP9_ALT_FLAG };
|
||||
|
||||
// TODO(jianj): Investigate why ROI not working in speed < 5 or in non
|
||||
// realtime mode.
|
||||
if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return;
|
||||
if (!roi->enabled) return;
|
||||
|
||||
memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame));
|
||||
|
||||
vp9_enable_segmentation(seg);
|
||||
vp9_clearall_segfeatures(seg);
|
||||
// Select delta coding method;
|
||||
seg->abs_delta = SEGMENT_DELTADATA;
|
||||
|
||||
memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols));
|
||||
|
||||
for (i = 0; i < MAX_SEGMENTS; ++i) {
|
||||
// Translate the external delta q values to internal values.
|
||||
internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i]));
|
||||
if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i];
|
||||
vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
|
||||
vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
|
||||
if (internal_delta_q[i] != 0) {
|
||||
vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
|
||||
vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]);
|
||||
}
|
||||
if (delta_lf[i] != 0) {
|
||||
vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
|
||||
vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]);
|
||||
}
|
||||
if (skip[i] != 0) {
|
||||
vp9_enable_segfeature(seg, i, SEG_LVL_SKIP);
|
||||
vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]);
|
||||
}
|
||||
if (ref_frame[i] >= 0) {
|
||||
int valid_ref = 1;
|
||||
// ALTREF is not used as reference for nonrd_pickmode with 0 lag.
|
||||
if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode)
|
||||
valid_ref = 0;
|
||||
// If GOLDEN is selected, make sure it's set as reference.
|
||||
if (ref_frame[i] == GOLDEN_FRAME &&
|
||||
!(cpi->ref_frame_flags & flag_list[ref_frame[i]])) {
|
||||
valid_ref = 0;
|
||||
}
|
||||
// GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are
|
||||
// same reference.
|
||||
if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0)
|
||||
ref_frame[i] = LAST_FRAME;
|
||||
if (valid_ref) {
|
||||
vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME);
|
||||
vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
roi->enabled = 1;
|
||||
}
|
||||
|
||||
static void init_level_info(Vp9LevelInfo *level_info) {
|
||||
Vp9LevelStats *const level_stats = &level_info->level_stats;
|
||||
Vp9LevelSpec *const level_spec = &level_info->level_spec;
|
||||
@@ -554,6 +625,13 @@ static void init_level_info(Vp9LevelInfo *level_info) {
|
||||
level_spec->min_altref_distance = INT_MAX;
|
||||
}
|
||||
|
||||
static int check_seg_range(int seg_data[8], int range) {
|
||||
return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
|
||||
abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
|
||||
abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
|
||||
abs(seg_data[6]) > range || abs(seg_data[7]) > range);
|
||||
}
|
||||
|
||||
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
|
||||
int i;
|
||||
const Vp9LevelSpec *this_level;
|
||||
@@ -566,6 +644,8 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
|
||||
(double)this_level->max_luma_sample_rate *
|
||||
(1 + SAMPLE_RATE_GRACE_P) ||
|
||||
level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
|
||||
level_spec->max_luma_picture_breadth >
|
||||
this_level->max_luma_picture_breadth ||
|
||||
level_spec->average_bitrate > this_level->average_bitrate ||
|
||||
level_spec->max_cpb_size > this_level->max_cpb_size ||
|
||||
level_spec->compression_ratio < this_level->compression_ratio ||
|
||||
@@ -578,6 +658,61 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
|
||||
return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
|
||||
}
|
||||
|
||||
int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
|
||||
unsigned int cols, int delta_q[8], int delta_lf[8],
|
||||
int skip[8], int ref_frame[8]) {
|
||||
VP9_COMMON *cm = &cpi->common;
|
||||
vpx_roi_map_t *roi = &cpi->roi;
|
||||
const int range = 63;
|
||||
const int ref_frame_range = 3; // Alt-ref
|
||||
const int skip_range = 1;
|
||||
const int frame_rows = cpi->common.mi_rows;
|
||||
const int frame_cols = cpi->common.mi_cols;
|
||||
|
||||
// Check number of rows and columns match
|
||||
if (frame_rows != (int)rows || frame_cols != (int)cols) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
|
||||
!check_seg_range(ref_frame, ref_frame_range) ||
|
||||
!check_seg_range(skip, skip_range))
|
||||
return -1;
|
||||
|
||||
// Also disable segmentation if no deltas are specified.
|
||||
if (!map ||
|
||||
(!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] |
|
||||
delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] |
|
||||
delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] |
|
||||
delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] |
|
||||
skip[5] | skip[6] | skip[7]) &&
|
||||
(ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 &&
|
||||
ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 &&
|
||||
ref_frame[6] == -1 && ref_frame[7] == -1))) {
|
||||
vp9_disable_segmentation(&cm->seg);
|
||||
cpi->roi.enabled = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (roi->roi_map) {
|
||||
vpx_free(roi->roi_map);
|
||||
roi->roi_map = NULL;
|
||||
}
|
||||
CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
|
||||
|
||||
// Copy to ROI sturcture in the compressor.
|
||||
memcpy(roi->roi_map, map, rows * cols);
|
||||
memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0]));
|
||||
memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0]));
|
||||
memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0]));
|
||||
memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0]));
|
||||
roi->enabled = 1;
|
||||
roi->rows = rows;
|
||||
roi->cols = cols;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
|
||||
int cols) {
|
||||
if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
|
||||
@@ -812,6 +947,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
|
||||
vpx_free(cpi->active_map.map);
|
||||
cpi->active_map.map = NULL;
|
||||
|
||||
vpx_free(cpi->roi.roi_map);
|
||||
cpi->roi.roi_map = NULL;
|
||||
|
||||
vpx_free(cpi->consec_zero_mv);
|
||||
cpi->consec_zero_mv = NULL;
|
||||
|
||||
@@ -1116,8 +1254,9 @@ static void alloc_util_frame_buffers(VP9_COMP *cpi) {
|
||||
|
||||
// For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
|
||||
// buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
|
||||
// target of 1/4x1/4.
|
||||
if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc) {
|
||||
// target of 1/4x1/4. number_spatial_layers must be greater than 2.
|
||||
if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc &&
|
||||
cpi->svc.number_spatial_layers > 2) {
|
||||
cpi->svc.scaled_temp_is_alloc = 1;
|
||||
if (vpx_realloc_frame_buffer(
|
||||
&cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1,
|
||||
@@ -1219,8 +1358,8 @@ static void set_tile_limits(VP9_COMP *cpi) {
|
||||
}
|
||||
|
||||
if (cpi->oxcf.target_level == LEVEL_AUTO) {
|
||||
const uint32_t pic_size = cpi->common.width * cpi->common.height;
|
||||
const int level_tile_cols = log_tile_cols_from_picsize_level(pic_size);
|
||||
const int level_tile_cols =
|
||||
log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
|
||||
if (cm->log2_tile_cols > level_tile_cols) {
|
||||
cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
|
||||
}
|
||||
@@ -1848,6 +1987,8 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
|
||||
cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
|
||||
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
|
||||
vp9_cyclic_refresh_reset_resize(cpi);
|
||||
rc->rc_1_frame = 0;
|
||||
rc->rc_2_frame = 0;
|
||||
}
|
||||
|
||||
if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
|
||||
@@ -1858,6 +1999,24 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
|
||||
(int)cpi->oxcf.target_bandwidth);
|
||||
}
|
||||
|
||||
// Check for resetting the rc flags (rc_1_frame, rc_2_frame) if the
|
||||
// configuration change has a large change in avg_frame_bandwidth.
|
||||
// For SVC check for resetting based on spatial layer average bandwidth.
|
||||
// Also reset buffer level to optimal level.
|
||||
if (cm->current_video_frame > 0) {
|
||||
if (cpi->use_svc) {
|
||||
vp9_svc_check_reset_layer_rc_flag(cpi);
|
||||
} else {
|
||||
if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
|
||||
rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
|
||||
rc->rc_1_frame = 0;
|
||||
rc->rc_2_frame = 0;
|
||||
rc->bits_off_target = rc->optimal_buffer_level;
|
||||
rc->buffer_level = rc->optimal_buffer_level;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cpi->alt_ref_source = NULL;
|
||||
rc->is_src_frame_alt_ref = 0;
|
||||
|
||||
@@ -1992,8 +2151,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
|
||||
|
||||
realloc_segmentation_maps(cpi);
|
||||
|
||||
CHECK_MEM_ERROR(cm, cpi->skin_map, vpx_calloc(cm->mi_rows * cm->mi_cols,
|
||||
sizeof(cpi->skin_map[0])));
|
||||
CHECK_MEM_ERROR(
|
||||
cm, cpi->skin_map,
|
||||
vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
|
||||
|
||||
CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
|
||||
|
||||
@@ -2856,18 +3016,26 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
|
||||
cpi->denoiser.denoising_level > kDenLowLow) {
|
||||
int svc_base_is_key = 0;
|
||||
int denoise_svc_second_layer = 0;
|
||||
if (cpi->use_svc) {
|
||||
int realloc_fail = 0;
|
||||
const int svc_buf_shift =
|
||||
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2
|
||||
? cpi->denoiser.num_ref_frames
|
||||
: 0;
|
||||
int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
|
||||
cpi->svc.temporal_layer_id,
|
||||
cpi->svc.number_temporal_layers);
|
||||
LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
|
||||
svc_base_is_key = lc->is_key_frame;
|
||||
|
||||
// Check if we need to allocate extra buffers in the denoiser for
|
||||
denoise_svc_second_layer =
|
||||
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 ? 1
|
||||
: 0;
|
||||
// Check if we need to allocate extra buffers in the denoiser
|
||||
// for
|
||||
// refreshed frames.
|
||||
realloc_fail = vp9_denoiser_realloc_svc(
|
||||
cm, &cpi->denoiser, cpi->refresh_alt_ref_frame,
|
||||
cm, &cpi->denoiser, svc_buf_shift, cpi->refresh_alt_ref_frame,
|
||||
cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx,
|
||||
cpi->gld_fb_idx, cpi->lst_fb_idx);
|
||||
if (realloc_fail)
|
||||
@@ -2878,7 +3046,8 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
|
||||
&cpi->denoiser, *cpi->Source, cpi->common.frame_type,
|
||||
cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame,
|
||||
cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx,
|
||||
cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key);
|
||||
cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key,
|
||||
denoise_svc_second_layer);
|
||||
}
|
||||
#endif
|
||||
if (is_one_pass_cbr_svc(cpi)) {
|
||||
@@ -3313,8 +3482,9 @@ static void setup_denoiser_buffer(VP9_COMP *cpi) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
if (cpi->oxcf.noise_sensitivity > 0 &&
|
||||
!cpi->denoiser.frame_buffer_initialized) {
|
||||
if (vp9_denoiser_alloc(cm, cpi->use_svc, &cpi->denoiser, cm->width,
|
||||
cm->height, cm->subsampling_x, cm->subsampling_y,
|
||||
if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
|
||||
cpi->oxcf.noise_sensitivity, cm->width, cm->height,
|
||||
cm->subsampling_x, cm->subsampling_y,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
cm->use_highbitdepth,
|
||||
#endif
|
||||
@@ -3595,6 +3765,8 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
|
||||
// it may be pretty bad for rate-control,
|
||||
// and I should handle it somehow
|
||||
vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
|
||||
} else if (cpi->roi.enabled && cm->frame_type != KEY_FRAME) {
|
||||
apply_roi_map(cpi);
|
||||
}
|
||||
|
||||
apply_active_map(cpi);
|
||||
@@ -4325,6 +4497,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
|
||||
struct segmentation *const seg = &cm->seg;
|
||||
TX_SIZE t;
|
||||
|
||||
// SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
|
||||
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
|
||||
!cpi->svc.rc_drop_superframe && cpi->oxcf.target_bandwidth == 0) {
|
||||
cpi->svc.skip_enhancement_layer = 1;
|
||||
vp9_rc_postencode_update_drop_frame(cpi);
|
||||
cpi->ext_refresh_frame_flags_pending = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
set_ext_overrides(cpi);
|
||||
vpx_clear_system_state();
|
||||
|
||||
@@ -4416,7 +4597,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
|
||||
if (vp9_rc_drop_frame(cpi) ||
|
||||
(is_one_pass_cbr_svc(cpi) && cpi->svc.rc_drop_superframe == 1)) {
|
||||
vp9_rc_postencode_update_drop_frame(cpi);
|
||||
++cm->current_video_frame;
|
||||
cpi->ext_refresh_frame_flags_pending = 0;
|
||||
cpi->svc.rc_drop_superframe = 1;
|
||||
cpi->last_frame_dropped = 1;
|
||||
@@ -4829,6 +5009,7 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
|
||||
int i, idx;
|
||||
uint64_t luma_samples, dur_end;
|
||||
const uint32_t luma_pic_size = cm->width * cm->height;
|
||||
const uint32_t luma_pic_breadth = VPXMAX(cm->width, cm->height);
|
||||
LevelConstraint *const level_constraint = &cpi->level_constraint;
|
||||
const int8_t level_index = level_constraint->level_index;
|
||||
double cpb_data_size;
|
||||
@@ -4932,6 +5113,11 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
|
||||
level_spec->max_luma_picture_size = luma_pic_size;
|
||||
}
|
||||
|
||||
// update max_luma_picture_breadth
|
||||
if (luma_pic_breadth > level_spec->max_luma_picture_breadth) {
|
||||
level_spec->max_luma_picture_breadth = luma_pic_breadth;
|
||||
}
|
||||
|
||||
// update compression_ratio
|
||||
level_spec->compression_ratio = (double)level_stats->total_uncompressed_size *
|
||||
cm->bit_depth /
|
||||
@@ -4952,6 +5138,15 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
|
||||
level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]);
|
||||
}
|
||||
|
||||
if (level_spec->max_luma_picture_breadth >
|
||||
vp9_level_defs[level_index].max_luma_picture_breadth) {
|
||||
level_constraint->fail_flag |= (1 << LUMA_PIC_BREADTH_TOO_LARGE);
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
|
||||
"Failed to encode to the target level %d. %s",
|
||||
vp9_level_defs[level_index].level,
|
||||
level_fail_messages[LUMA_PIC_BREADTH_TOO_LARGE]);
|
||||
}
|
||||
|
||||
if ((double)level_spec->max_luma_sample_rate >
|
||||
(double)vp9_level_defs[level_index].max_luma_sample_rate *
|
||||
(1 + SAMPLE_RATE_GRACE_P)) {
|
||||
@@ -5152,8 +5347,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
|
||||
cm->intra_only = 0;
|
||||
// if the flags indicate intra frame, but if the current picture is for
|
||||
// non-zero spatial layer, it should not be an intra picture.
|
||||
// TODO(Won Kap): this needs to change if per-layer intra frame is
|
||||
// allowed.
|
||||
if ((source->flags & VPX_EFLAG_FORCE_KF) &&
|
||||
cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
|
||||
source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
|
||||
|
@@ -383,6 +383,7 @@ typedef struct {
|
||||
VP9_LEVEL level;
|
||||
uint64_t max_luma_sample_rate;
|
||||
uint32_t max_luma_picture_size;
|
||||
uint32_t max_luma_picture_breadth;
|
||||
double average_bitrate; // in kilobits per second
|
||||
double max_cpb_size; // in kilobits
|
||||
double compression_ratio;
|
||||
@@ -422,14 +423,15 @@ typedef struct {
|
||||
|
||||
typedef enum {
|
||||
BITRATE_TOO_LARGE = 0,
|
||||
LUMA_PIC_SIZE_TOO_LARGE = 1,
|
||||
LUMA_SAMPLE_RATE_TOO_LARGE = 2,
|
||||
CPB_TOO_LARGE = 3,
|
||||
COMPRESSION_RATIO_TOO_SMALL = 4,
|
||||
TOO_MANY_COLUMN_TILE = 5,
|
||||
ALTREF_DIST_TOO_SMALL = 6,
|
||||
TOO_MANY_REF_BUFFER = 7,
|
||||
TARGET_LEVEL_FAIL_IDS = 8
|
||||
LUMA_PIC_SIZE_TOO_LARGE,
|
||||
LUMA_PIC_BREADTH_TOO_LARGE,
|
||||
LUMA_SAMPLE_RATE_TOO_LARGE,
|
||||
CPB_TOO_LARGE,
|
||||
COMPRESSION_RATIO_TOO_SMALL,
|
||||
TOO_MANY_COLUMN_TILE,
|
||||
ALTREF_DIST_TOO_SMALL,
|
||||
TOO_MANY_REF_BUFFER,
|
||||
TARGET_LEVEL_FAIL_IDS
|
||||
} TARGET_LEVEL_FAIL_ID;
|
||||
|
||||
typedef struct {
|
||||
@@ -721,6 +723,8 @@ typedef struct VP9_COMP {
|
||||
|
||||
uint8_t *count_arf_frame_usage;
|
||||
uint8_t *count_lastgolden_frame_usage;
|
||||
|
||||
vpx_roi_map_t roi;
|
||||
} VP9_COMP;
|
||||
|
||||
void vp9_initialize_enc(void);
|
||||
@@ -866,9 +870,8 @@ static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) {
|
||||
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
static INLINE int denoise_svc(const struct VP9_COMP *const cpi) {
|
||||
return (!cpi->use_svc ||
|
||||
(cpi->use_svc &&
|
||||
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1));
|
||||
return (!cpi->use_svc || (cpi->use_svc && cpi->svc.spatial_layer_id >=
|
||||
cpi->svc.first_layer_denoise));
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -920,10 +923,14 @@ static INLINE int get_level_index(VP9_LEVEL level) {
|
||||
|
||||
// Return the log2 value of max column tiles corresponding to the level that
|
||||
// the picture size fits into.
|
||||
static INLINE int log_tile_cols_from_picsize_level(uint32_t pic_size) {
|
||||
static INLINE int log_tile_cols_from_picsize_level(uint32_t width,
|
||||
uint32_t height) {
|
||||
int i;
|
||||
const uint32_t pic_size = width * height;
|
||||
const uint32_t pic_breadth = VPXMAX(width, height);
|
||||
for (i = LEVEL_1; i < LEVEL_MAX; ++i) {
|
||||
if (vp9_level_defs[i].max_luma_picture_size > pic_size) {
|
||||
if (vp9_level_defs[i].max_luma_picture_size >= pic_size &&
|
||||
vp9_level_defs[i].max_luma_picture_breadth >= pic_breadth) {
|
||||
return get_msb(vp9_level_defs[i].max_col_tiles);
|
||||
}
|
||||
}
|
||||
@@ -932,6 +939,10 @@ static INLINE int log_tile_cols_from_picsize_level(uint32_t pic_size) {
|
||||
|
||||
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);
|
||||
|
||||
int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
|
||||
unsigned int cols, int delta_q[8], int delta_lf[8],
|
||||
int skip[8], int ref_frame[8]);
|
||||
|
||||
void vp9_new_framerate(VP9_COMP *cpi, double framerate);
|
||||
|
||||
void vp9_set_row_mt(VP9_COMP *cpi);
|
||||
|
@@ -66,8 +66,8 @@ static int get_max_tile_cols(VP9_COMP *cpi) {
|
||||
log2_tile_cols =
|
||||
clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
|
||||
if (cpi->oxcf.target_level == LEVEL_AUTO) {
|
||||
const uint32_t pic_size = cpi->common.width * cpi->common.height;
|
||||
const int level_tile_cols = log_tile_cols_from_picsize_level(pic_size);
|
||||
const int level_tile_cols =
|
||||
log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
|
||||
if (log2_tile_cols > level_tile_cols) {
|
||||
log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
|
||||
}
|
||||
@@ -390,8 +390,9 @@ void vp9_row_mt_sync_write_dummy(VP9RowMTSync *const row_mt_sync, int r, int c,
|
||||
}
|
||||
|
||||
#if !CONFIG_REALTIME_ONLY
|
||||
static int first_pass_worker_hook(EncWorkerData *const thread_data,
|
||||
MultiThreadHandle *multi_thread_ctxt) {
|
||||
static int first_pass_worker_hook(void *arg1, void *arg2) {
|
||||
EncWorkerData *const thread_data = (EncWorkerData *)arg1;
|
||||
MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
|
||||
VP9_COMP *const cpi = thread_data->cpi;
|
||||
const VP9_COMMON *const cm = &cpi->common;
|
||||
const int tile_cols = 1 << cm->log2_tile_cols;
|
||||
@@ -470,8 +471,8 @@ void vp9_encode_fp_row_mt(VP9_COMP *cpi) {
|
||||
}
|
||||
}
|
||||
|
||||
launch_enc_workers(cpi, (VPxWorkerHook)first_pass_worker_hook,
|
||||
multi_thread_ctxt, num_workers);
|
||||
launch_enc_workers(cpi, first_pass_worker_hook, multi_thread_ctxt,
|
||||
num_workers);
|
||||
|
||||
first_tile_col = &cpi->tile_data[0];
|
||||
for (i = 1; i < tile_cols; i++) {
|
||||
@@ -480,8 +481,9 @@ void vp9_encode_fp_row_mt(VP9_COMP *cpi) {
|
||||
}
|
||||
}
|
||||
|
||||
static int temporal_filter_worker_hook(EncWorkerData *const thread_data,
|
||||
MultiThreadHandle *multi_thread_ctxt) {
|
||||
static int temporal_filter_worker_hook(void *arg1, void *arg2) {
|
||||
EncWorkerData *const thread_data = (EncWorkerData *)arg1;
|
||||
MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
|
||||
VP9_COMP *const cpi = thread_data->cpi;
|
||||
const VP9_COMMON *const cm = &cpi->common;
|
||||
const int tile_cols = 1 << cm->log2_tile_cols;
|
||||
@@ -553,13 +555,14 @@ void vp9_temporal_filter_row_mt(VP9_COMP *cpi) {
|
||||
}
|
||||
}
|
||||
|
||||
launch_enc_workers(cpi, (VPxWorkerHook)temporal_filter_worker_hook,
|
||||
multi_thread_ctxt, num_workers);
|
||||
launch_enc_workers(cpi, temporal_filter_worker_hook, multi_thread_ctxt,
|
||||
num_workers);
|
||||
}
|
||||
#endif // !CONFIG_REALTIME_ONLY
|
||||
|
||||
static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,
|
||||
MultiThreadHandle *multi_thread_ctxt) {
|
||||
static int enc_row_mt_worker_hook(void *arg1, void *arg2) {
|
||||
EncWorkerData *const thread_data = (EncWorkerData *)arg1;
|
||||
MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
|
||||
VP9_COMP *const cpi = thread_data->cpi;
|
||||
const VP9_COMMON *const cm = &cpi->common;
|
||||
const int tile_cols = 1 << cm->log2_tile_cols;
|
||||
@@ -648,8 +651,8 @@ void vp9_encode_tiles_row_mt(VP9_COMP *cpi) {
|
||||
}
|
||||
}
|
||||
|
||||
launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook,
|
||||
multi_thread_ctxt, num_workers);
|
||||
launch_enc_workers(cpi, enc_row_mt_worker_hook, multi_thread_ctxt,
|
||||
num_workers);
|
||||
|
||||
for (i = 0; i < num_workers; i++) {
|
||||
VPxWorker *const worker = &cpi->workers[i];
|
||||
|
@@ -44,7 +44,6 @@
|
||||
#define COMPLEXITY_STATS_OUTPUT 0
|
||||
|
||||
#define FIRST_PASS_Q 10.0
|
||||
#define GF_MAX_BOOST 96.0
|
||||
#define INTRA_MODE_PENALTY 1024
|
||||
#define MIN_ARF_GF_BOOST 240
|
||||
#define MIN_DECAY_FACTOR 0.01
|
||||
@@ -732,9 +731,8 @@ static void first_pass_stat_calc(VP9_COMP *cpi, FIRSTPASS_STATS *fps,
|
||||
// Exclude any image dead zone
|
||||
if (fp_acc_data->image_data_start_row > 0) {
|
||||
fp_acc_data->intra_skip_count =
|
||||
VPXMAX(0,
|
||||
fp_acc_data->intra_skip_count -
|
||||
(fp_acc_data->image_data_start_row * cm->mb_cols * 2));
|
||||
VPXMAX(0, fp_acc_data->intra_skip_count -
|
||||
(fp_acc_data->image_data_start_row * cm->mb_cols * 2));
|
||||
}
|
||||
|
||||
fp_acc_data->intra_factor = fp_acc_data->intra_factor / (double)num_mbs;
|
||||
@@ -1949,6 +1947,7 @@ static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats,
|
||||
}
|
||||
|
||||
#define BASELINE_ERR_PER_MB 12500.0
|
||||
#define GF_MAX_BOOST 96.0
|
||||
static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame,
|
||||
double this_frame_mv_in_out) {
|
||||
double frame_boost;
|
||||
@@ -2238,9 +2237,6 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
|
||||
}
|
||||
gf_group->arf_update_idx[0] = arf_buffer_indices[0];
|
||||
gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
|
||||
|
||||
// Step over the golden frame / overlay frame
|
||||
if (EOF == input_stats(twopass, &frame_stats)) return;
|
||||
}
|
||||
|
||||
// Deduct the boost bits for arf (or gf if it is not a key frame)
|
||||
@@ -2285,7 +2281,8 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
|
||||
// Define middle frame
|
||||
mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
|
||||
|
||||
normal_frames = (rc->baseline_gf_interval - rc->source_alt_ref_pending);
|
||||
normal_frames =
|
||||
rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending);
|
||||
if (normal_frames > 1)
|
||||
normal_frame_bits = (int)(total_group_bits / normal_frames);
|
||||
else
|
||||
@@ -2383,6 +2380,8 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise,
|
||||
|
||||
// Analyse and define a gf/arf group.
|
||||
#define ARF_DECAY_BREAKOUT 0.10
|
||||
#define ARF_ABS_ZOOM_THRESH 4.0
|
||||
|
||||
static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
RATE_CONTROL *const rc = &cpi->rc;
|
||||
@@ -2411,7 +2410,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
double mv_in_out_accumulator = 0.0;
|
||||
double abs_mv_in_out_accumulator = 0.0;
|
||||
double mv_ratio_accumulator_thresh;
|
||||
double mv_in_out_thresh;
|
||||
double abs_mv_in_out_thresh;
|
||||
double sr_accumulator = 0.0;
|
||||
const double av_err = get_distribution_av_err(cpi, twopass);
|
||||
@@ -2457,8 +2455,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
// Motion breakout threshold for loop below depends on image size.
|
||||
mv_ratio_accumulator_thresh =
|
||||
(cpi->initial_height + cpi->initial_width) / 4.0;
|
||||
mv_in_out_thresh = (cpi->initial_height + cpi->initial_width) / 300.0;
|
||||
abs_mv_in_out_thresh = (cpi->initial_height + cpi->initial_width) / 200.0;
|
||||
abs_mv_in_out_thresh = ARF_ABS_ZOOM_THRESH;
|
||||
|
||||
// Set a maximum and minimum interval for the GF group.
|
||||
// If the image appears almost completely static we can extend beyond this.
|
||||
@@ -2543,14 +2540,17 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
// Update the accumulator for second ref error difference.
|
||||
// This is intended to give an indication of how much the coded error is
|
||||
// increasing over time.
|
||||
sr_accumulator += (next_frame.sr_coded_error - next_frame.coded_error);
|
||||
sr_accumulator = VPXMAX(0.0, sr_accumulator);
|
||||
if (i == 1) {
|
||||
sr_accumulator += next_frame.coded_error;
|
||||
} else {
|
||||
sr_accumulator += (next_frame.sr_coded_error - next_frame.coded_error);
|
||||
}
|
||||
}
|
||||
|
||||
// Break out conditions.
|
||||
if (
|
||||
// Break at active_max_gf_interval unless almost totally static.
|
||||
((i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) ||
|
||||
// Break at maximum of active_max_gf_interval unless almost totally static.
|
||||
if (((twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) &&
|
||||
(i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) ||
|
||||
(
|
||||
// Don't break out with a very short interval.
|
||||
(i >= active_min_gf_interval) &&
|
||||
@@ -2559,7 +2559,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
(!flash_detected) &&
|
||||
((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
|
||||
(abs_mv_in_out_accumulator > abs_mv_in_out_thresh) ||
|
||||
(mv_in_out_accumulator < -mv_in_out_thresh) ||
|
||||
(sr_accumulator > next_frame.intra_error)))) {
|
||||
break;
|
||||
}
|
||||
@@ -2571,8 +2570,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
|
||||
|
||||
// Should we use the alternate reference frame.
|
||||
if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) &&
|
||||
(i >= rc->min_gf_interval)) {
|
||||
if ((twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) && allow_alt_ref &&
|
||||
(i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) {
|
||||
const int forward_frames = (rc->frames_to_key - i >= i - 1)
|
||||
? i - 1
|
||||
: VPXMAX(0, rc->frames_to_key - i);
|
||||
@@ -2600,7 +2599,10 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
#endif
|
||||
|
||||
// Set the interval until the next gf.
|
||||
rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
|
||||
rc->baseline_gf_interval =
|
||||
(twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH)
|
||||
? (i - (is_key_frame || rc->source_alt_ref_pending))
|
||||
: i;
|
||||
|
||||
// Only encode alt reference frame in temporal base layer. So
|
||||
// baseline_gf_interval should be multiple of a temporal layer group
|
||||
@@ -2698,6 +2700,26 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Intra / Inter threshold very low
|
||||
#define VERY_LOW_II 1.5
|
||||
// Clean slide transitions we expect a sharp single frame spike in error.
|
||||
#define ERROR_SPIKE 5.0
|
||||
|
||||
// Slide show transition detection.
|
||||
// Tests for case where there is very low error either side of the current frame
|
||||
// but much higher just for this frame. This can help detect key frames in
|
||||
// slide shows even where the slides are pictures of different sizes.
|
||||
// Also requires that intra and inter errors are very similar to help eliminate
|
||||
// harmful false positives.
|
||||
// It will not help if the transition is a fade or other multi-frame effect.
|
||||
static int slide_transition(const FIRSTPASS_STATS *this_frame,
|
||||
const FIRSTPASS_STATS *last_frame,
|
||||
const FIRSTPASS_STATS *next_frame) {
|
||||
return (this_frame->intra_error < (this_frame->coded_error * VERY_LOW_II)) &&
|
||||
(this_frame->coded_error > (last_frame->coded_error * ERROR_SPIKE)) &&
|
||||
(this_frame->coded_error > (next_frame->coded_error * ERROR_SPIKE));
|
||||
}
|
||||
|
||||
// Threshold for use of the lagging second reference frame. High second ref
|
||||
// usage may point to a transient event like a flash or occlusion rather than
|
||||
// a real scene cut.
|
||||
@@ -2742,6 +2764,7 @@ static int test_candidate_kf(TWO_PASS *twopass,
|
||||
if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
|
||||
(next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
|
||||
((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) ||
|
||||
(slide_transition(this_frame, last_frame, next_frame)) ||
|
||||
((pcnt_intra > MIN_INTRA_LEVEL) &&
|
||||
(pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) &&
|
||||
((this_frame->intra_error /
|
||||
@@ -2813,6 +2836,7 @@ static int test_candidate_kf(TWO_PASS *twopass,
|
||||
#define FRAMES_TO_CHECK_DECAY 8
|
||||
#define MIN_KF_TOT_BOOST 300
|
||||
#define KF_BOOST_SCAN_MAX_FRAMES 32
|
||||
#define KF_ABS_ZOOM_THRESH 6.0
|
||||
|
||||
#ifdef AGGRESSIVE_VBR
|
||||
#define KF_MAX_FRAME_BOOST 80.0
|
||||
@@ -2840,6 +2864,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
double kf_group_err = 0.0;
|
||||
double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
|
||||
double sr_accumulator = 0.0;
|
||||
double abs_mv_in_out_accumulator = 0.0;
|
||||
const double av_err = get_distribution_av_err(cpi, twopass);
|
||||
vp9_zero(next_frame);
|
||||
|
||||
@@ -3004,8 +3029,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
double zm_factor;
|
||||
|
||||
// Monitor for static sections.
|
||||
zero_motion_accumulator = VPXMIN(
|
||||
zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
|
||||
// First frame in kf group the second ref indicator is invalid.
|
||||
if (i > 0) {
|
||||
zero_motion_accumulator = VPXMIN(
|
||||
zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
|
||||
} else {
|
||||
zero_motion_accumulator =
|
||||
next_frame.pcnt_inter - next_frame.pcnt_motion;
|
||||
}
|
||||
|
||||
// Factor 0.75-1.25 based on how much of frame is static.
|
||||
zm_factor = (0.75 + (zero_motion_accumulator / 2.0));
|
||||
@@ -3019,7 +3050,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
KF_MAX_FRAME_BOOST * zm_factor);
|
||||
|
||||
boost_score += frame_boost;
|
||||
if (frame_boost < 25.00) break;
|
||||
|
||||
// Measure of zoom. Large zoom tends to indicate reduced boost.
|
||||
abs_mv_in_out_accumulator +=
|
||||
fabs(next_frame.mv_in_out_count * next_frame.pcnt_motion);
|
||||
|
||||
if ((frame_boost < 25.00) ||
|
||||
(abs_mv_in_out_accumulator > KF_ABS_ZOOM_THRESH))
|
||||
break;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
@@ -3034,10 +3072,16 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
|
||||
twopass->section_intra_rating = calculate_section_intra_ratio(
|
||||
start_position, twopass->stats_in_end, rc->frames_to_key);
|
||||
|
||||
// Apply various clamps for min and max boost
|
||||
rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
|
||||
rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
|
||||
rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
|
||||
// Special case for static / slide show content but dont apply
|
||||
// if the kf group is very short.
|
||||
if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) {
|
||||
rc->kf_boost = VPXMAX((rc->frames_to_key * 100), MAX_KF_TOT_BOOST);
|
||||
} else {
|
||||
// Apply various clamps for min and max boost
|
||||
rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
|
||||
rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
|
||||
rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
|
||||
}
|
||||
|
||||
// Work out how many bits to allocate for the key frame itself.
|
||||
kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost,
|
||||
|
@@ -120,12 +120,12 @@ typedef enum {
|
||||
typedef struct {
|
||||
unsigned char index;
|
||||
unsigned char first_inter_index;
|
||||
RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
|
||||
FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1];
|
||||
unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
|
||||
unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1];
|
||||
unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1];
|
||||
int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1];
|
||||
RATE_FACTOR_LEVEL rf_level[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
unsigned char arf_update_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
unsigned char arf_ref_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 1];
|
||||
} GF_GROUP;
|
||||
|
||||
typedef struct {
|
||||
|
@@ -25,7 +25,9 @@ typedef struct {
|
||||
} ref[MAX_REF_FRAMES];
|
||||
} MBGRAPH_MB_STATS;
|
||||
|
||||
typedef struct { MBGRAPH_MB_STATS *mb_stats; } MBGRAPH_FRAME_STATS;
|
||||
typedef struct {
|
||||
MBGRAPH_MB_STATS *mb_stats;
|
||||
} MBGRAPH_FRAME_STATS;
|
||||
|
||||
struct VP9_COMP;
|
||||
|
||||
|
@@ -1785,7 +1785,10 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
|
||||
}
|
||||
|
||||
static const MV search_pos[4] = {
|
||||
{ -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
|
||||
{ -1, 0 },
|
||||
{ 0, -1 },
|
||||
{ 0, 1 },
|
||||
{ 1, 0 },
|
||||
};
|
||||
|
||||
unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
|
||||
@@ -1876,7 +1879,10 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
|
||||
|
||||
{
|
||||
const uint8_t *const pos[4] = {
|
||||
ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
|
||||
ref_buf - ref_stride,
|
||||
ref_buf - 1,
|
||||
ref_buf + 1,
|
||||
ref_buf + ref_stride,
|
||||
};
|
||||
|
||||
cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
|
||||
|
@@ -21,6 +21,15 @@
|
||||
#include "vp9/encoder/vp9_noise_estimate.h"
|
||||
#include "vp9/encoder/vp9_encoder.h"
|
||||
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
// For SVC: only do noise estimation on top spatial layer.
|
||||
static INLINE int noise_est_svc(const struct VP9_COMP *const cpi) {
|
||||
return (!cpi->use_svc ||
|
||||
(cpi->use_svc &&
|
||||
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1));
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) {
|
||||
ne->enabled = 0;
|
||||
ne->level = kLowLow;
|
||||
@@ -45,7 +54,7 @@ static int enable_noise_estimation(VP9_COMP *const cpi) {
|
||||
#endif
|
||||
// Enable noise estimation if denoising is on.
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) &&
|
||||
cpi->common.width >= 320 && cpi->common.height >= 180)
|
||||
return 1;
|
||||
#endif
|
||||
@@ -111,7 +120,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
|
||||
// Estimate is between current source and last source.
|
||||
YV12_BUFFER_CONFIG *last_source = cpi->Last_Source;
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) {
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) {
|
||||
last_source = &cpi->denoiser.last_source;
|
||||
// Tune these thresholds for different resolutions when denoising is
|
||||
// enabled.
|
||||
@@ -131,7 +140,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
|
||||
(cpi->svc.number_spatial_layers == 1 &&
|
||||
(ne->last_w != cm->width || ne->last_h != cm->height))) {
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))
|
||||
copy_frame(&cpi->denoiser.last_source, cpi->Source);
|
||||
#endif
|
||||
if (last_source != NULL) {
|
||||
@@ -146,7 +155,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
|
||||
ne->count = 0;
|
||||
ne->num_frames_estimate = 10;
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) &&
|
||||
cpi->svc.current_superframe > 1) {
|
||||
vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
|
||||
copy_frame(&cpi->denoiser.last_source, cpi->Source);
|
||||
@@ -249,7 +258,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
|
||||
// Normalize.
|
||||
avg_est = avg_est / num_samples;
|
||||
// Update noise estimate.
|
||||
ne->value = (int)((15 * ne->value + avg_est) >> 4);
|
||||
ne->value = (int)((3 * ne->value + avg_est) >> 2);
|
||||
ne->count++;
|
||||
if (ne->count == ne->num_frames_estimate) {
|
||||
// Reset counter and check noise level condition.
|
||||
@@ -257,14 +266,14 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
|
||||
ne->count = 0;
|
||||
ne->level = vp9_noise_estimate_extract_level(ne);
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))
|
||||
vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))
|
||||
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))
|
||||
copy_frame(&cpi->denoiser.last_source, cpi->Source);
|
||||
#endif
|
||||
}
|
||||
|
@@ -1488,7 +1488,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
int skip_ref_find_pred[4] = { 0 };
|
||||
unsigned int sse_zeromv_normalized = UINT_MAX;
|
||||
unsigned int best_sse_sofar = UINT_MAX;
|
||||
unsigned int thresh_svc_skip_golden = 500;
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
VP9_PICKMODE_CTX_DEN ctx_den;
|
||||
int64_t zero_last_cost_orig = INT64_MAX;
|
||||
@@ -1496,8 +1495,23 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
#endif
|
||||
INTERP_FILTER filter_gf_svc = EIGHTTAP;
|
||||
MV_REFERENCE_FRAME best_second_ref_frame = NONE;
|
||||
const struct segmentation *const seg = &cm->seg;
|
||||
int comp_modes = 0;
|
||||
int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES;
|
||||
int flag_svc_subpel = 0;
|
||||
int svc_mv_col = 0;
|
||||
int svc_mv_row = 0;
|
||||
unsigned int thresh_svc_skip_golden = 500;
|
||||
// Lower the skip threshold if lower spatial layer is better quality relative
|
||||
// to current layer.
|
||||
if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex > 150 &&
|
||||
cm->base_qindex > cpi->svc.lower_layer_qindex + 15)
|
||||
thresh_svc_skip_golden = 100;
|
||||
// Increase skip threshold if lower spatial layer is lower quality relative
|
||||
// to current layer.
|
||||
else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex < 140 &&
|
||||
cm->base_qindex < cpi->svc.lower_layer_qindex - 20)
|
||||
thresh_svc_skip_golden = 1000;
|
||||
|
||||
init_ref_frame_cost(cm, xd, ref_frame_cost);
|
||||
|
||||
@@ -1635,6 +1649,16 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
cpi->sf.use_compound_nonrd_pickmode && usable_ref_frame == ALTREF_FRAME)
|
||||
comp_modes = 2;
|
||||
|
||||
// If the segment reference frame feature is enabled and it's set to GOLDEN
|
||||
// reference, then make sure we don't skip checking GOLDEN, this is to
|
||||
// prevent possibility of not picking any mode.
|
||||
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
|
||||
get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) == GOLDEN_FRAME) {
|
||||
usable_ref_frame = GOLDEN_FRAME;
|
||||
skip_ref_find_pred[GOLDEN_FRAME] = 0;
|
||||
thresh_svc_skip_golden = 0;
|
||||
}
|
||||
|
||||
for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
|
||||
if (!skip_ref_find_pred[ref_frame]) {
|
||||
find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
|
||||
@@ -1647,6 +1671,18 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
if (cpi->use_svc || cpi->oxcf.speed <= 7 || bsize < BLOCK_32X32)
|
||||
x->sb_use_mv_part = 0;
|
||||
|
||||
// Set the flag_svc_subpel to 1 for SVC if the lower spatial layer used
|
||||
// an averaging filter for downsampling (phase = 8). If so, we will test
|
||||
// a nonzero motion mode on the spatial (goldeen) reference.
|
||||
// The nonzero motion is half pixel shifted to left and top (-4, -4).
|
||||
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
|
||||
svc_force_zero_mode[GOLDEN_FRAME - 1] &&
|
||||
cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id - 1] == 8) {
|
||||
svc_mv_col = -4;
|
||||
svc_mv_row = -4;
|
||||
flag_svc_subpel = 1;
|
||||
}
|
||||
|
||||
for (idx = 0; idx < num_inter_modes + comp_modes; ++idx) {
|
||||
int rate_mv = 0;
|
||||
int mode_rd_thresh;
|
||||
@@ -1660,6 +1696,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
int inter_mv_mode = 0;
|
||||
int skip_this_mv = 0;
|
||||
int comp_pred = 0;
|
||||
int force_gf_mv = 0;
|
||||
PREDICTION_MODE this_mode;
|
||||
second_ref_frame = NONE;
|
||||
|
||||
@@ -1680,8 +1717,29 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
comp_pred = 1;
|
||||
}
|
||||
|
||||
if (ref_frame > usable_ref_frame) continue;
|
||||
if (skip_ref_find_pred[ref_frame]) continue;
|
||||
|
||||
// If the segment reference frame feature is enabled then do nothing if the
|
||||
// current ref frame is not allowed.
|
||||
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
|
||||
get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
|
||||
continue;
|
||||
|
||||
if (flag_svc_subpel && ref_frame == GOLDEN_FRAME) {
|
||||
force_gf_mv = 1;
|
||||
// Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row),
|
||||
// otherwise set NEWMV to (svc_mv_col, svc_mv_row).
|
||||
if (this_mode == NEWMV) {
|
||||
frame_mv[this_mode][ref_frame].as_mv.col = svc_mv_col;
|
||||
frame_mv[this_mode][ref_frame].as_mv.row = svc_mv_row;
|
||||
} else if (frame_mv[this_mode][ref_frame].as_mv.col != svc_mv_col ||
|
||||
frame_mv[this_mode][ref_frame].as_mv.row != svc_mv_row) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (comp_pred) {
|
||||
const struct segmentation *const seg = &cm->seg;
|
||||
if (!cpi->allow_comp_inter_inter) continue;
|
||||
// Skip compound inter modes if ARF is not available.
|
||||
if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
|
||||
@@ -1690,9 +1748,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) continue;
|
||||
}
|
||||
|
||||
if (ref_frame > usable_ref_frame) continue;
|
||||
if (skip_ref_find_pred[ref_frame]) continue;
|
||||
|
||||
// For SVC, skip the golden (spatial) reference search if sse of zeromv_last
|
||||
// is below threshold.
|
||||
if (cpi->use_svc && ref_frame == GOLDEN_FRAME &&
|
||||
@@ -1737,7 +1792,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
// Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
|
||||
// is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
|
||||
// later.
|
||||
if (force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
|
||||
if (!force_gf_mv && force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
|
||||
frame_mv[this_mode][ref_frame].as_int != 0) {
|
||||
continue;
|
||||
}
|
||||
@@ -1751,34 +1806,39 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
}
|
||||
|
||||
if (cpi->use_svc) {
|
||||
if (svc_force_zero_mode[ref_frame - 1] &&
|
||||
if (!force_gf_mv && svc_force_zero_mode[ref_frame - 1] &&
|
||||
frame_mv[this_mode][ref_frame].as_int != 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (sf->reference_masking &&
|
||||
!(frame_mv[this_mode][ref_frame].as_int == 0 &&
|
||||
ref_frame == LAST_FRAME)) {
|
||||
if (usable_ref_frame < ALTREF_FRAME) {
|
||||
if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) {
|
||||
i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
|
||||
if ((cpi->ref_frame_flags & flag_list[i]))
|
||||
if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
|
||||
ref_frame_skip_mask |= (1 << ref_frame);
|
||||
// Disable this drop out case if the ref frame segment level feature is
|
||||
// enabled for this segment. This is to prevent the possibility that we end
|
||||
// up unable to pick any mode.
|
||||
if (!segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) {
|
||||
if (sf->reference_masking &&
|
||||
!(frame_mv[this_mode][ref_frame].as_int == 0 &&
|
||||
ref_frame == LAST_FRAME)) {
|
||||
if (usable_ref_frame < ALTREF_FRAME) {
|
||||
if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) {
|
||||
i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
|
||||
if ((cpi->ref_frame_flags & flag_list[i]))
|
||||
if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
|
||||
ref_frame_skip_mask |= (1 << ref_frame);
|
||||
}
|
||||
} else if (!cpi->rc.is_src_frame_alt_ref &&
|
||||
!(frame_mv[this_mode][ref_frame].as_int == 0 &&
|
||||
ref_frame == ALTREF_FRAME)) {
|
||||
int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME;
|
||||
int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME;
|
||||
if (((cpi->ref_frame_flags & flag_list[ref1]) &&
|
||||
(x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) ||
|
||||
((cpi->ref_frame_flags & flag_list[ref2]) &&
|
||||
(x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1))))
|
||||
ref_frame_skip_mask |= (1 << ref_frame);
|
||||
}
|
||||
} else if (!cpi->rc.is_src_frame_alt_ref &&
|
||||
!(frame_mv[this_mode][ref_frame].as_int == 0 &&
|
||||
ref_frame == ALTREF_FRAME)) {
|
||||
int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME;
|
||||
int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME;
|
||||
if (((cpi->ref_frame_flags & flag_list[ref1]) &&
|
||||
(x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) ||
|
||||
((cpi->ref_frame_flags & flag_list[ref2]) &&
|
||||
(x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1))))
|
||||
ref_frame_skip_mask |= (1 << ref_frame);
|
||||
}
|
||||
if (ref_frame_skip_mask & (1 << ref_frame)) continue;
|
||||
}
|
||||
if (ref_frame_skip_mask & (1 << ref_frame)) continue;
|
||||
|
||||
// Select prediction reference frames.
|
||||
for (i = 0; i < MAX_MB_PLANE; i++) {
|
||||
@@ -1808,7 +1868,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
&rd_thresh_freq_fact[mode_index])))
|
||||
continue;
|
||||
|
||||
if (this_mode == NEWMV) {
|
||||
if (this_mode == NEWMV && !force_gf_mv) {
|
||||
if (ref_frame > LAST_FRAME && !cpi->use_svc &&
|
||||
cpi->oxcf.rc_mode == VPX_CBR) {
|
||||
int tmp_sad;
|
||||
@@ -1949,7 +2009,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
|
||||
pred_filter_search &&
|
||||
(ref_frame == LAST_FRAME ||
|
||||
(ref_frame == GOLDEN_FRAME &&
|
||||
(ref_frame == GOLDEN_FRAME && !force_gf_mv &&
|
||||
(cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) &&
|
||||
(((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
|
||||
int pf_rate[3];
|
||||
@@ -2173,9 +2233,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
|
||||
// For spatial enhancemanent layer: perform intra prediction only if base
|
||||
// layer is chosen as the reference. Always perform intra prediction if
|
||||
// LAST is the only reference or is_key_frame is set.
|
||||
// LAST is the only reference, or is_key_frame is set, or on base
|
||||
// temporal layer.
|
||||
if (cpi->svc.spatial_layer_id) {
|
||||
perform_intra_pred =
|
||||
cpi->svc.temporal_layer_id == 0 ||
|
||||
cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame ||
|
||||
!(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) ||
|
||||
(!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
|
||||
@@ -2185,6 +2247,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
||||
if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
|
||||
cpi->rc.is_src_frame_alt_ref)
|
||||
perform_intra_pred = 0;
|
||||
|
||||
// If the segment reference frame feature is enabled and set then
|
||||
// skip the intra prediction.
|
||||
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
|
||||
get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) > 0)
|
||||
perform_intra_pred = 0;
|
||||
|
||||
// Perform intra prediction search, if the best SAD is above a certain
|
||||
// threshold.
|
||||
if (best_rdc.rdcost == INT64_MAX ||
|
||||
|
@@ -31,10 +31,13 @@
|
||||
#include "vp9/encoder/vp9_encodemv.h"
|
||||
#include "vp9/encoder/vp9_ratectrl.h"
|
||||
|
||||
// Max rate target for 1080P and below encodes under normal circumstances
|
||||
// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
|
||||
// Max rate per frame for 1080P and below encodes if no level requirement given.
|
||||
// For larger formats limit to MAX_MB_RATE bits per MB
|
||||
// 4Mbits is derived from the level requirement for level 4 (1080P 30) which
|
||||
// requires that HW can sustain a rate of 16Mbits over a 4 frame group.
|
||||
// If a lower level requirement is specified then this may over ride this value.
|
||||
#define MAX_MB_RATE 250
|
||||
#define MAXRATE_1080P 2025000
|
||||
#define MAXRATE_1080P 4000000
|
||||
|
||||
#define DEFAULT_KF_BOOST 2000
|
||||
#define DEFAULT_GF_BOOST 2000
|
||||
@@ -1100,6 +1103,9 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
|
||||
// Baseline value derived from cpi->active_worst_quality and kf boost.
|
||||
active_best_quality =
|
||||
get_kf_active_quality(rc, active_worst_quality, cm->bit_depth);
|
||||
if (cpi->twopass.kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) {
|
||||
active_best_quality /= 4;
|
||||
}
|
||||
|
||||
// Allow somewhat lower kf minq with small image formats.
|
||||
if ((cm->width * cm->height) <= (352 * 288)) {
|
||||
@@ -1488,15 +1494,22 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
|
||||
cpi->rc.last_frame_is_src_altref = cpi->rc.is_src_frame_alt_ref;
|
||||
}
|
||||
if (cm->frame_type != KEY_FRAME) rc->reset_high_source_sad = 0;
|
||||
|
||||
rc->last_avg_frame_bandwidth = rc->avg_frame_bandwidth;
|
||||
if (cpi->use_svc &&
|
||||
cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
|
||||
cpi->svc.lower_layer_qindex = cm->base_qindex;
|
||||
}
|
||||
|
||||
void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
|
||||
// Update buffer level with zero size, update frame counters, and return.
|
||||
update_buffer_level(cpi, 0);
|
||||
cpi->common.current_video_frame++;
|
||||
cpi->rc.frames_since_key++;
|
||||
cpi->rc.frames_to_key--;
|
||||
cpi->rc.rc_2_frame = 0;
|
||||
cpi->rc.rc_1_frame = 0;
|
||||
cpi->rc.last_avg_frame_bandwidth = cpi->rc.avg_frame_bandwidth;
|
||||
}
|
||||
|
||||
static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
|
||||
@@ -1580,9 +1593,8 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
|
||||
// Adjust boost and af_ratio based on avg_frame_low_motion, which varies
|
||||
// between 0 and 100 (stationary, 100% zero/small motion).
|
||||
rc->gfu_boost =
|
||||
VPXMAX(500,
|
||||
DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /
|
||||
(rc->avg_frame_low_motion + 100));
|
||||
VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /
|
||||
(rc->avg_frame_low_motion + 100));
|
||||
rc->af_ratio_onepass_vbr = VPXMIN(15, VPXMAX(5, 3 * rc->gfu_boost / 400));
|
||||
}
|
||||
adjust_gfint_frame_constraint(cpi, rc->frames_to_key);
|
||||
@@ -1857,13 +1869,8 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
|
||||
rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
|
||||
cpi->framerate, rc->min_gf_interval);
|
||||
|
||||
// Extended interval for genuinely static scenes
|
||||
rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
|
||||
|
||||
if (is_altref_enabled(cpi)) {
|
||||
if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
|
||||
rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
|
||||
}
|
||||
// Extended max interval for genuinely static scenes like slide shows.
|
||||
rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH;
|
||||
|
||||
if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
|
||||
rc->max_gf_interval = rc->static_scene_max_gf_interval;
|
||||
@@ -1873,9 +1880,12 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
|
||||
|
||||
if (oxcf->target_level == LEVEL_AUTO) {
|
||||
const uint32_t pic_size = cpi->common.width * cpi->common.height;
|
||||
const uint32_t pic_breadth =
|
||||
VPXMAX(cpi->common.width, cpi->common.height);
|
||||
int i;
|
||||
for (i = LEVEL_1; i < LEVEL_MAX; ++i) {
|
||||
if (vp9_level_defs[i].max_luma_picture_size > pic_size) {
|
||||
if (vp9_level_defs[i].max_luma_picture_size >= pic_size &&
|
||||
vp9_level_defs[i].max_luma_picture_breadth >= pic_breadth) {
|
||||
if (rc->min_gf_interval <=
|
||||
(int)vp9_level_defs[i].min_altref_distance) {
|
||||
rc->min_gf_interval =
|
||||
@@ -1904,12 +1914,12 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) {
|
||||
VPXMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
|
||||
|
||||
// A maximum bitrate for a frame is defined.
|
||||
// The baseline for this aligns with HW implementations that
|
||||
// can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
|
||||
// per 16x16 MB (averaged over a frame). However this limit is extended if
|
||||
// a very high rate is given on the command line or the the rate cannnot
|
||||
// be acheived because of a user specificed max q (e.g. when the user
|
||||
// specifies lossless encode.
|
||||
// However this limit is extended if a very high rate is given on the command
|
||||
// line or the the rate cannnot be acheived because of a user specificed max q
|
||||
// (e.g. when the user specifies lossless encode).
|
||||
//
|
||||
// If a level is specified that requires a lower maximum rate then the level
|
||||
// value take precedence.
|
||||
vbr_max_bits =
|
||||
(int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) /
|
||||
100);
|
||||
|
@@ -34,6 +34,14 @@ extern "C" {
|
||||
|
||||
#define FRAME_OVERHEAD_BITS 200
|
||||
|
||||
// Threshold used to define a KF group as static (e.g. a slide show).
|
||||
// Essentially this means that no frame in the group has more than 1% of MBs
|
||||
// that are not marked as coded with 0,0 motion in the first pass.
|
||||
#define STATIC_KF_GROUP_THRESH 99
|
||||
|
||||
// The maximum duration of a GF group that is static (for example a slide show).
|
||||
#define MAX_STATIC_GF_GROUP_LENGTH 250
|
||||
|
||||
typedef enum {
|
||||
INTER_NORMAL = 0,
|
||||
INTER_HIGH = 1,
|
||||
@@ -152,6 +160,8 @@ typedef struct {
|
||||
int rc_2_frame;
|
||||
int q_1_frame;
|
||||
int q_2_frame;
|
||||
// Keep track of the last target average frame bandwidth.
|
||||
int last_avg_frame_bandwidth;
|
||||
|
||||
// Auto frame-scaling variables.
|
||||
FRAME_SCALE_LEVEL frame_size_selector;
|
||||
|
@@ -59,7 +59,9 @@ typedef struct {
|
||||
MV_REFERENCE_FRAME ref_frame[2];
|
||||
} MODE_DEFINITION;
|
||||
|
||||
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
|
||||
typedef struct {
|
||||
MV_REFERENCE_FRAME ref_frame[2];
|
||||
} REF_DEFINITION;
|
||||
|
||||
struct rdcost_block_args {
|
||||
const VP9_COMP *cpi;
|
||||
|
@@ -37,14 +37,16 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
|
||||
svc->scaled_one_half = 0;
|
||||
svc->current_superframe = 0;
|
||||
svc->non_reference_frame = 0;
|
||||
svc->skip_enhancement_layer = 0;
|
||||
|
||||
for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
|
||||
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
|
||||
svc->ext_frame_flags[sl] = 0;
|
||||
svc->ext_lst_fb_idx[sl] = 0;
|
||||
svc->ext_gld_fb_idx[sl] = 1;
|
||||
svc->ext_alt_fb_idx[sl] = 2;
|
||||
svc->downsample_filter_type[sl] = EIGHTTAP;
|
||||
svc->downsample_filter_phase[sl] = 0; // Set to 8 for averaging filter.
|
||||
svc->downsample_filter_type[sl] = BILINEAR;
|
||||
svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter.
|
||||
}
|
||||
|
||||
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
|
||||
@@ -153,6 +155,8 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
|
||||
int sl, tl, layer = 0, spatial_layer_target;
|
||||
float bitrate_alloc = 1.0;
|
||||
|
||||
cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
|
||||
|
||||
if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
|
||||
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
|
||||
for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
|
||||
@@ -389,9 +393,9 @@ int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
|
||||
.is_key_frame;
|
||||
}
|
||||
|
||||
static void get_layer_resolution(const int width_org, const int height_org,
|
||||
const int num, const int den, int *width_out,
|
||||
int *height_out) {
|
||||
void get_layer_resolution(const int width_org, const int height_org,
|
||||
const int num, const int den, int *width_out,
|
||||
int *height_out) {
|
||||
int w, h;
|
||||
|
||||
if (width_out == NULL || height_out == NULL || den == 0) return;
|
||||
@@ -545,6 +549,8 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
|
||||
if (!spatial_id) {
|
||||
cpi->ref_frame_flags = VP9_LAST_FLAG;
|
||||
} else {
|
||||
if (spatial_id == cpi->svc.number_spatial_layers - 1)
|
||||
cpi->ext_refresh_alt_ref_frame = 0;
|
||||
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
|
||||
}
|
||||
}
|
||||
@@ -604,6 +610,7 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
|
||||
int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
|
||||
int width = 0, height = 0;
|
||||
LAYER_CONTEXT *lc = NULL;
|
||||
cpi->svc.skip_enhancement_layer = 0;
|
||||
if (cpi->svc.number_spatial_layers > 1) cpi->svc.use_base_mv = 1;
|
||||
cpi->svc.force_zero_mode_spatial_ref = 1;
|
||||
cpi->svc.mi_stride[cpi->svc.spatial_layer_id] = cpi->common.mi_stride;
|
||||
@@ -656,10 +663,14 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
|
||||
lc->scaling_factor_num, lc->scaling_factor_den, &width,
|
||||
&height);
|
||||
|
||||
// For resolutions <= QVGA: set phase of the filter = 8 (for symmetric
|
||||
// For resolutions <= VGA: set phase of the filter = 8 (for symmetric
|
||||
// averaging filter), use bilinear for now.
|
||||
if (width * height <= 320 * 240) {
|
||||
if (width * height <= 640 * 480) {
|
||||
cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] = BILINEAR;
|
||||
// Use Eightap_smooth for low resolutions.
|
||||
if (width * height <= 320 * 240)
|
||||
cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] =
|
||||
EIGHTTAP_SMOOTH;
|
||||
cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id] = 8;
|
||||
}
|
||||
|
||||
@@ -861,3 +872,28 @@ void vp9_svc_reset_key_frame(VP9_COMP *const cpi) {
|
||||
vp9_update_temporal_layer_framerate(cpi);
|
||||
vp9_restore_layer_context(cpi);
|
||||
}
|
||||
|
||||
void vp9_svc_check_reset_layer_rc_flag(VP9_COMP *const cpi) {
|
||||
SVC *svc = &cpi->svc;
|
||||
int sl, tl;
|
||||
for (sl = 0; sl < svc->number_spatial_layers; ++sl) {
|
||||
// Check for reset based on avg_frame_bandwidth for spatial layer sl.
|
||||
int layer = LAYER_IDS_TO_IDX(sl, svc->number_temporal_layers - 1,
|
||||
svc->number_temporal_layers);
|
||||
LAYER_CONTEXT *lc = &svc->layer_context[layer];
|
||||
RATE_CONTROL *lrc = &lc->rc;
|
||||
if (lrc->avg_frame_bandwidth > (3 * lrc->last_avg_frame_bandwidth >> 1) ||
|
||||
lrc->avg_frame_bandwidth < (lrc->last_avg_frame_bandwidth >> 1)) {
|
||||
// Reset for all temporal layers with spatial layer sl.
|
||||
for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
|
||||
int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
|
||||
LAYER_CONTEXT *lc = &svc->layer_context[layer];
|
||||
RATE_CONTROL *lrc = &lc->rc;
|
||||
lrc->rc_1_frame = 0;
|
||||
lrc->rc_2_frame = 0;
|
||||
lrc->bits_off_target = lrc->optimal_buffer_level;
|
||||
lrc->buffer_level = lrc->optimal_buffer_level;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -49,7 +49,7 @@ typedef struct {
|
||||
uint8_t speed;
|
||||
} LAYER_CONTEXT;
|
||||
|
||||
typedef struct {
|
||||
typedef struct SVC {
|
||||
int spatial_layer_id;
|
||||
int temporal_layer_id;
|
||||
int number_spatial_layers;
|
||||
@@ -99,6 +99,12 @@ typedef struct {
|
||||
|
||||
BLOCK_SIZE *prev_partition_svc;
|
||||
int mi_stride[VPX_MAX_LAYERS];
|
||||
|
||||
int first_layer_denoise;
|
||||
|
||||
int skip_enhancement_layer;
|
||||
|
||||
int lower_layer_qindex;
|
||||
} SVC;
|
||||
|
||||
struct VP9_COMP;
|
||||
@@ -128,6 +134,10 @@ void vp9_save_layer_context(struct VP9_COMP *const cpi);
|
||||
// Initialize second pass rc for spatial svc.
|
||||
void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi);
|
||||
|
||||
void get_layer_resolution(const int width_org, const int height_org,
|
||||
const int num, const int den, int *width_out,
|
||||
int *height_out);
|
||||
|
||||
// Increment number of video frames in layer
|
||||
void vp9_inc_frame_in_layer(struct VP9_COMP *const cpi);
|
||||
|
||||
@@ -148,6 +158,8 @@ void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi);
|
||||
|
||||
void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi);
|
||||
|
||||
void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
@@ -170,13 +170,13 @@ void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fadst4_sse2(in);
|
||||
write_buffer_4x4(output, in);
|
||||
break;
|
||||
case ADST_ADST:
|
||||
default:
|
||||
assert(tx_type == ADST_ADST);
|
||||
load_buffer_4x4(input, in, stride);
|
||||
fadst4_sse2(in);
|
||||
fadst4_sse2(in);
|
||||
write_buffer_4x4(output, in);
|
||||
break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1097,14 +1097,14 @@ void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
right_shift_8x8(in, 1);
|
||||
write_buffer_8x8(output, in, 8);
|
||||
break;
|
||||
case ADST_ADST:
|
||||
default:
|
||||
assert(tx_type == ADST_ADST);
|
||||
load_buffer_8x8(input, in, stride);
|
||||
fadst8_sse2(in);
|
||||
fadst8_sse2(in);
|
||||
right_shift_8x8(in, 1);
|
||||
write_buffer_8x8(output, in, 8);
|
||||
break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1963,13 +1963,13 @@ void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fadst16_sse2(in0, in1);
|
||||
write_buffer_16x16(output, in0, in1, 16);
|
||||
break;
|
||||
case ADST_ADST:
|
||||
default:
|
||||
assert(tx_type == ADST_ADST);
|
||||
load_buffer_16x16(input, in0, in1, stride);
|
||||
fadst16_sse2(in0, in1);
|
||||
right_shift_16x16(in0, in1);
|
||||
fadst16_sse2(in0, in1);
|
||||
write_buffer_16x16(output, in0, in1, 16);
|
||||
break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
}
|
||||
|
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Usee of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
|
140
vp9/encoder/x86/vp9_quantize_avx2.c
Normal file
140
vp9/encoder/x86/vp9_quantize_avx2.c
Normal file
@@ -0,0 +1,140 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <immintrin.h> // AVX2
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_dsp/vpx_dsp_common.h"
|
||||
#include "vpx_dsp/x86/bitdepth_conversion_avx2.h"
|
||||
#include "vpx_dsp/x86/quantize_x86.h"
|
||||
|
||||
// Zero fill 8 positions in the output buffer.
|
||||
static INLINE void store_zero_tran_low(tran_low_t *a) {
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
_mm256_storeu_si256((__m256i *)(a), zero);
|
||||
_mm256_storeu_si256((__m256i *)(a + 8), zero);
|
||||
#else
|
||||
_mm256_storeu_si256((__m256i *)(a), zero);
|
||||
#endif
|
||||
}
|
||||
|
||||
static INLINE __m256i scan_eob_256(const __m256i *iscan_ptr,
|
||||
__m256i *coeff256) {
|
||||
const __m256i iscan = _mm256_loadu_si256(iscan_ptr);
|
||||
const __m256i zero256 = _mm256_setzero_si256();
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
// The _mm256_packs_epi32() in load_tran_low() packs the 64 bit coeff as
|
||||
// B1 A1 B0 A0. Shuffle to B1 B0 A1 A0 in order to scan eob correctly.
|
||||
const __m256i _coeff256 = _mm256_permute4x64_epi64(*coeff256, 0xd8);
|
||||
const __m256i zero_coeff0 = _mm256_cmpeq_epi16(_coeff256, zero256);
|
||||
#else
|
||||
const __m256i zero_coeff0 = _mm256_cmpeq_epi16(*coeff256, zero256);
|
||||
#endif
|
||||
const __m256i nzero_coeff0 = _mm256_cmpeq_epi16(zero_coeff0, zero256);
|
||||
// Add one to convert from indices to counts
|
||||
const __m256i iscan_plus_one = _mm256_sub_epi16(iscan, nzero_coeff0);
|
||||
return _mm256_and_si256(iscan_plus_one, nzero_coeff0);
|
||||
}
|
||||
|
||||
void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
int skip_block, const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
|
||||
uint16_t *eob_ptr, const int16_t *scan_ptr,
|
||||
const int16_t *iscan_ptr) {
|
||||
__m128i eob;
|
||||
__m256i round256, quant256, dequant256;
|
||||
__m256i eob256, thr256;
|
||||
|
||||
(void)scan_ptr;
|
||||
(void)skip_block;
|
||||
assert(!skip_block);
|
||||
|
||||
coeff_ptr += n_coeffs;
|
||||
iscan_ptr += n_coeffs;
|
||||
qcoeff_ptr += n_coeffs;
|
||||
dqcoeff_ptr += n_coeffs;
|
||||
n_coeffs = -n_coeffs;
|
||||
|
||||
{
|
||||
__m256i coeff256;
|
||||
|
||||
// Setup global values
|
||||
{
|
||||
const __m128i round = _mm_load_si128((const __m128i *)round_ptr);
|
||||
const __m128i quant = _mm_load_si128((const __m128i *)quant_ptr);
|
||||
const __m128i dequant = _mm_load_si128((const __m128i *)dequant_ptr);
|
||||
round256 = _mm256_castsi128_si256(round);
|
||||
round256 = _mm256_permute4x64_epi64(round256, 0x54);
|
||||
|
||||
quant256 = _mm256_castsi128_si256(quant);
|
||||
quant256 = _mm256_permute4x64_epi64(quant256, 0x54);
|
||||
|
||||
dequant256 = _mm256_castsi128_si256(dequant);
|
||||
dequant256 = _mm256_permute4x64_epi64(dequant256, 0x54);
|
||||
}
|
||||
|
||||
{
|
||||
__m256i qcoeff256;
|
||||
__m256i qtmp256;
|
||||
coeff256 = load_tran_low(coeff_ptr + n_coeffs);
|
||||
qcoeff256 = _mm256_abs_epi16(coeff256);
|
||||
qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
|
||||
qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
|
||||
qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
|
||||
store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs);
|
||||
coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
|
||||
store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
|
||||
}
|
||||
|
||||
eob256 = scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256);
|
||||
n_coeffs += 8 * 2;
|
||||
}
|
||||
|
||||
// remove dc constants
|
||||
dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31);
|
||||
quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31);
|
||||
round256 = _mm256_permute2x128_si256(round256, round256, 0x31);
|
||||
|
||||
thr256 = _mm256_srai_epi16(dequant256, 1);
|
||||
|
||||
// AC only loop
|
||||
while (n_coeffs < 0) {
|
||||
__m256i coeff256 = load_tran_low(coeff_ptr + n_coeffs);
|
||||
__m256i qcoeff256 = _mm256_abs_epi16(coeff256);
|
||||
int32_t nzflag =
|
||||
_mm256_movemask_epi8(_mm256_cmpgt_epi16(qcoeff256, thr256));
|
||||
|
||||
if (nzflag) {
|
||||
__m256i qtmp256;
|
||||
qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
|
||||
qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
|
||||
qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
|
||||
store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs);
|
||||
coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
|
||||
store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
|
||||
eob256 = _mm256_max_epi16(
|
||||
eob256,
|
||||
scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256));
|
||||
} else {
|
||||
store_zero_tran_low(qcoeff_ptr + n_coeffs);
|
||||
store_zero_tran_low(dqcoeff_ptr + n_coeffs);
|
||||
}
|
||||
n_coeffs += 8 * 2;
|
||||
}
|
||||
|
||||
eob = _mm_max_epi16(_mm256_castsi256_si128(eob256),
|
||||
_mm256_extracti128_si256(eob256, 1));
|
||||
|
||||
*eob_ptr = accumulate_eob(eob);
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user