Compare commits

..

1 Commits

Author SHA1 Message Date
wangch
eea111f16a Test gerrit. 2017-12-05 18:07:21 -05:00
150 changed files with 3854 additions and 5687 deletions

View File

@@ -1,12 +1,12 @@
--- ---
Language: Cpp Language: Cpp
# BasedOnStyle: Google # BasedOnStyle: Google
# Generated with clang-format 5.0.0 # Generated with clang-format 4.0.1
AccessModifierOffset: -1 AccessModifierOffset: -1
AlignAfterOpenBracket: Align AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Left AlignEscapedNewlinesLeft: true
AlignOperands: true AlignOperands: true
AlignTrailingComments: true AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true
@@ -33,20 +33,14 @@ BraceWrapping:
BeforeCatch: false BeforeCatch: false
BeforeElse: false BeforeElse: false
IndentBraces: false IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: true BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true BreakStringLiterals: true
ColumnLimit: 80 ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:' CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4 ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4 ContinuationIndentWidth: 4
@@ -54,11 +48,7 @@ Cpp11BracedListStyle: false
DerivePointerAlignment: false DerivePointerAlignment: false
DisableFormat: false DisableFormat: false
ExperimentalAutoDetectBinPacking: false ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeCategories: IncludeCategories:
- Regex: '^<.*\.h>' - Regex: '^<.*\.h>'
Priority: 1 Priority: 1
@@ -80,7 +70,6 @@ NamespaceIndentation: None
ObjCBlockIndentWidth: 2 ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false ObjCSpaceBeforeProtocolList: false
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300 PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120 PenaltyBreakFirstLessLess: 120
@@ -90,7 +79,6 @@ PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Right PointerAlignment: Right
ReflowComments: true ReflowComments: true
SortIncludes: false SortIncludes: false
SortUsingDeclarations: true
SpaceAfterCStyleCast: false SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true SpaceBeforeAssignmentOperators: true

View File

@@ -3,7 +3,6 @@ Aex Converse <aconverse@google.com>
Aex Converse <aconverse@google.com> <alex.converse@gmail.com> Aex Converse <aconverse@google.com> <alex.converse@gmail.com>
Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com> Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
Alpha Lam <hclam@google.com> <hclam@chromium.org> Alpha Lam <hclam@google.com> <hclam@chromium.org>
Chris Cunningham <chcunningham@chromium.org>
Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com> Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
Deb Mukherjee <debargha@google.com> Deb Mukherjee <debargha@google.com>
Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com> Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
@@ -22,21 +21,18 @@ Marco Paniconi <marpan@google.com>
Marco Paniconi <marpan@google.com> <marpan@chromium.org> Marco Paniconi <marpan@google.com> <marpan@chromium.org>
Pascal Massimino <pascal.massimino@gmail.com> Pascal Massimino <pascal.massimino@gmail.com>
Paul Wilkins <paulwilkins@google.com> Paul Wilkins <paulwilkins@google.com>
Peter Boström <pbos@chromium.org> <pbos@google.com>
Peter de Rivaz <peter.derivaz@gmail.com> Peter de Rivaz <peter.derivaz@gmail.com>
Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com> Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com>
Ralph Giles <giles@xiph.org> <giles@entropywave.com> Ralph Giles <giles@xiph.org> <giles@entropywave.com>
Ralph Giles <giles@xiph.org> <giles@mozilla.com> Ralph Giles <giles@xiph.org> <giles@mozilla.com>
Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com> Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
Sami Pietilä <samipietila@google.com> Sami Pietilä <samipietila@google.com>
Shiyou Yin <yinshiyou-hf@loongson.cn>
Tamar Levy <tamar.levy@intel.com> Tamar Levy <tamar.levy@intel.com>
Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com> Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com> Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com> Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com>
Tom Finegan <tomfinegan@google.com> Tom Finegan <tomfinegan@google.com>
Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org> Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
Urvang Joshi <urvang@google.com> <urvang@chromium.org>
Yaowu Xu <yaowu@google.com> <adam@xuyaowu.com> Yaowu Xu <yaowu@google.com> <adam@xuyaowu.com>
Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com> Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
Yaowu Xu <yaowu@google.com> <Yaowu Xu> Yaowu Xu <yaowu@google.com> <Yaowu Xu>

16
AUTHORS
View File

@@ -3,13 +3,13 @@
Aaron Watry <awatry@gmail.com> Aaron Watry <awatry@gmail.com>
Abo Talib Mahfoodh <ab.mahfoodh@gmail.com> Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
Adam Xu <adam@xuyaowu.com>
Adrian Grange <agrange@google.com> Adrian Grange <agrange@google.com>
Aex Converse <aconverse@google.com> Aex Converse <aconverse@google.com>
Ahmad Sharif <asharif@google.com> Ahmad Sharif <asharif@google.com>
Aleksey Vasenev <margtu-fivt@ya.ru> Aleksey Vasenev <margtu-fivt@ya.ru>
Alexander Potapenko <glider@google.com> Alexander Potapenko <glider@google.com>
Alexander Voronov <avoronov@graphics.cs.msu.ru> Alexander Voronov <avoronov@graphics.cs.msu.ru>
Alexandra Hájková <alexandra.khirnova@gmail.com>
Alexis Ballier <aballier@gentoo.org> Alexis Ballier <aballier@gentoo.org>
Alok Ahuja <waveletcoeff@gmail.com> Alok Ahuja <waveletcoeff@gmail.com>
Alpha Lam <hclam@google.com> Alpha Lam <hclam@google.com>
@@ -17,7 +17,6 @@ A.Mahfoodh <ab.mahfoodh@gmail.com>
Ami Fischman <fischman@chromium.org> Ami Fischman <fischman@chromium.org>
Andoni Morales Alastruey <ylatuya@gmail.com> Andoni Morales Alastruey <ylatuya@gmail.com>
Andres Mejia <mcitadel@gmail.com> Andres Mejia <mcitadel@gmail.com>
Andrew Lewis <andrewlewis@google.com>
Andrew Russell <anrussell@google.com> Andrew Russell <anrussell@google.com>
Angie Chiang <angiebird@google.com> Angie Chiang <angiebird@google.com>
Aron Rosenberg <arosenberg@logitech.com> Aron Rosenberg <arosenberg@logitech.com>
@@ -25,9 +24,7 @@ Attila Nagy <attilanagy@google.com>
Brion Vibber <bvibber@wikimedia.org> Brion Vibber <bvibber@wikimedia.org>
changjun.yang <changjun.yang@intel.com> changjun.yang <changjun.yang@intel.com>
Charles 'Buck' Krasic <ckrasic@google.com> Charles 'Buck' Krasic <ckrasic@google.com>
Cheng Chen <chengchen@google.com>
chm <chm@rock-chips.com> chm <chm@rock-chips.com>
Chris Cunningham <chcunningham@chromium.org>
Christian Duvivier <cduvivier@google.com> Christian Duvivier <cduvivier@google.com>
Daniele Castagna <dcastagna@chromium.org> Daniele Castagna <dcastagna@chromium.org>
Daniel Kang <ddkang@google.com> Daniel Kang <ddkang@google.com>
@@ -49,12 +46,10 @@ Geza Lore <gezalore@gmail.com>
Ghislain MARY <ghislainmary2@gmail.com> Ghislain MARY <ghislainmary2@gmail.com>
Giuseppe Scrivano <gscrivano@gnu.org> Giuseppe Scrivano <gscrivano@gnu.org>
Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com> Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
Gregor Jasny <gjasny@gmail.com>
Guillaume Martres <gmartres@google.com> Guillaume Martres <gmartres@google.com>
Guillermo Ballester Valor <gbvalor@gmail.com> Guillermo Ballester Valor <gbvalor@gmail.com>
Hangyu Kuang <hkuang@google.com> Hangyu Kuang <hkuang@google.com>
Hanno Böck <hanno@hboeck.de> Hanno Böck <hanno@hboeck.de>
Han Shen <shenhan@google.com>
Henrik Lundin <hlundin@google.com> Henrik Lundin <hlundin@google.com>
Hui Su <huisu@google.com> Hui Su <huisu@google.com>
Ivan Krasin <krasin@chromium.org> Ivan Krasin <krasin@chromium.org>
@@ -88,7 +83,6 @@ Justin Clift <justin@salasaga.org>
Justin Lebar <justin.lebar@gmail.com> Justin Lebar <justin.lebar@gmail.com>
Kaustubh Raste <kaustubh.raste@imgtec.com> Kaustubh Raste <kaustubh.raste@imgtec.com>
KO Myung-Hun <komh@chollian.net> KO Myung-Hun <komh@chollian.net>
Kyle Siefring <kylesiefring@gmail.com>
Lawrence Velázquez <larryv@macports.org> Lawrence Velázquez <larryv@macports.org>
Linfeng Zhang <linfengz@google.com> Linfeng Zhang <linfengz@google.com>
Lou Quillio <louquillio@google.com> Lou Quillio <louquillio@google.com>
@@ -107,7 +101,6 @@ Mikhal Shemer <mikhal@google.com>
Min Chen <chenm003@gmail.com> Min Chen <chenm003@gmail.com>
Minghai Shang <minghai@google.com> Minghai Shang <minghai@google.com>
Min Ye <yeemmi@google.com> Min Ye <yeemmi@google.com>
Moriyoshi Koizumi <mozo@mozo.jp>
Morton Jonuschat <yabawock@gmail.com> Morton Jonuschat <yabawock@gmail.com>
Nathan E. Egge <negge@mozilla.com> Nathan E. Egge <negge@mozilla.com>
Nico Weber <thakis@chromium.org> Nico Weber <thakis@chromium.org>
@@ -118,15 +111,12 @@ Paul Wilkins <paulwilkins@google.com>
Pavol Rusnak <stick@gk2.sk> Pavol Rusnak <stick@gk2.sk>
Paweł Hajdan <phajdan@google.com> Paweł Hajdan <phajdan@google.com>
Pengchong Jin <pengchong@google.com> Pengchong Jin <pengchong@google.com>
Peter Boström <pbos@chromium.org> Peter Boström <pbos@google.com>
Peter Collingbourne <pcc@chromium.org>
Peter de Rivaz <peter.derivaz@gmail.com> Peter de Rivaz <peter.derivaz@gmail.com>
Philip Jägenstedt <philipj@opera.com> Philip Jägenstedt <philipj@opera.com>
Priit Laes <plaes@plaes.org> Priit Laes <plaes@plaes.org>
Rafael Ávila de Espíndola <rafael.espindola@gmail.com> Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
Rafaël Carré <funman@videolan.org> Rafaël Carré <funman@videolan.org>
Rafael de Lucena Valle <rafaeldelucena@gmail.com>
Rahul Chaudhry <rahulchaudhry@google.com>
Ralph Giles <giles@xiph.org> Ralph Giles <giles@xiph.org>
Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com> Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com>
Rob Bradford <rob@linux.intel.com> Rob Bradford <rob@linux.intel.com>
@@ -145,7 +135,6 @@ Shiyou Yin <yinshiyou-hf@loongson.cn>
Shunyao Li <shunyaoli@google.com> Shunyao Li <shunyaoli@google.com>
Stefan Holmer <holmer@google.com> Stefan Holmer <holmer@google.com>
Suman Sunkara <sunkaras@google.com> Suman Sunkara <sunkaras@google.com>
Sylvestre Ledru <sylvestre@mozilla.com>
Taekhyun Kim <takim@nvidia.com> Taekhyun Kim <takim@nvidia.com>
Takanori MATSUURA <t.matsuu@gmail.com> Takanori MATSUURA <t.matsuu@gmail.com>
Tamar Levy <tamar.levy@intel.com> Tamar Levy <tamar.levy@intel.com>
@@ -158,7 +147,6 @@ Tom Finegan <tomfinegan@google.com>
Tristan Matthews <le.businessman@gmail.com> Tristan Matthews <le.businessman@gmail.com>
Urvang Joshi <urvang@google.com> Urvang Joshi <urvang@google.com>
Vignesh Venkatasubramanian <vigneshv@google.com> Vignesh Venkatasubramanian <vigneshv@google.com>
Vlad Tsyrklevich <vtsyrklevich@chromium.org>
Yaowu Xu <yaowu@google.com> Yaowu Xu <yaowu@google.com>
Yi Luo <luoyi@google.com> Yi Luo <luoyi@google.com>
Yongzhe Wang <yongzhe@google.com> Yongzhe Wang <yongzhe@google.com>

View File

@@ -1,28 +1,3 @@
2017-01-04 v1.7.0 "Mandarin Duck"
This release focused on high bit depth performance (10/12 bit) and vp9
encoding improvements.
- Upgrading:
This release is ABI incompatible due to new vp9 encoder features.
Frame parallel decoding for vp9 has been removed.
- Enhancements:
vp9 encoding supports additional threads with --row-mt. This can be greater
than the number of tiles.
Two new vp9 encoder options have been added:
--corpus-complexity
--tune-content=film
Additional tooling for respecting the vp9 "level" profiles has been added.
- Bug fixes:
A variety of fuzzing issues.
vp8 threading fix for ARM.
Codec control VP9_SET_SKIP_LOOP_FILTER fixed.
Reject invalid multi resolution configurations.
2017-01-09 v1.6.1 "Long Tailed Duck" 2017-01-09 v1.6.1 "Long Tailed Duck"
This release improves upon the VP9 encoder and speeds up the encoding and This release improves upon the VP9 encoder and speeds up the encoding and
decoding processes. decoding processes.

4
README
View File

@@ -1,4 +1,4 @@
README - 24 January 2018 README - 26 January 2017
Welcome to the WebM VP8/VP9 Codec SDK! Welcome to the WebM VP8/VP9 Codec SDK!
@@ -63,8 +63,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
armv8-linux-gcc armv8-linux-gcc
mips32-linux-gcc mips32-linux-gcc
mips64-linux-gcc mips64-linux-gcc
ppc64-linux-gcc
ppc64le-linux-gcc
sparc-solaris-gcc sparc-solaris-gcc
x86-android-gcc x86-android-gcc
x86-darwin8-gcc x86-darwin8-gcc

View File

@@ -1,13 +1,4 @@
#!/usr/bin/env perl #!/usr/bin/env perl
##
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
## tree. An additional intellectual property rights grant can be found
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
no strict 'refs'; no strict 'refs';
use warnings; use warnings;
@@ -209,7 +200,6 @@ sub filter {
sub common_top() { sub common_top() {
my $include_guard = uc($opts{sym})."_H_"; my $include_guard = uc($opts{sym})."_H_";
print <<EOF; print <<EOF;
// This file is generated. Do not edit.
#ifndef ${include_guard} #ifndef ${include_guard}
#define ${include_guard} #define ${include_guard}

View File

@@ -60,7 +60,6 @@ if [ ${bare} ]; then
echo "${changelog_version}${git_version_id}" > $$.tmp echo "${changelog_version}${git_version_id}" > $$.tmp
else else
cat<<EOF>$$.tmp cat<<EOF>$$.tmp
// This file is generated. Do not edit.
#define VERSION_MAJOR $major_version #define VERSION_MAJOR $major_version
#define VERSION_MINOR $minor_version #define VERSION_MINOR $minor_version
#define VERSION_PATCH $patch_version #define VERSION_PATCH $patch_version

2
configure vendored
View File

@@ -665,7 +665,7 @@ process_toolchain() {
gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror" enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
all_targets="${all_targets} solution" all_targets="${all_targets} solution"
INLINE="__inline" INLINE="__forceinline"
;; ;;
esac esac

View File

@@ -429,8 +429,7 @@ static void set_rate_control_stats(struct RateControlStats *rc,
rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl]; rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl];
if (tl > 0) { if (tl > 0) {
rc->layer_pfb[layer] = rc->layer_pfb[layer] =
1000.0 * 1000.0 * (cfg->layer_target_bitrate[layer] -
(cfg->layer_target_bitrate[layer] -
cfg->layer_target_bitrate[layer - 1]) / cfg->layer_target_bitrate[layer - 1]) /
(rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]); (rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
} else { } else {
@@ -574,8 +573,8 @@ void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
} else { } else {
if (is_key_frame) { if (is_key_frame) {
ref_frame_config->frame_flags[sl] = ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
} else { } else {
ref_frame_config->frame_flags[sl] = ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
@@ -589,24 +588,14 @@ void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
} else { } else {
ref_frame_config->frame_flags[sl] = ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
if (sl == num_spatial_layers - 1)
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
} }
} }
if (tl == 0) { if (tl == 0) {
ref_frame_config->lst_fb_idx[sl] = sl; ref_frame_config->lst_fb_idx[sl] = sl;
if (sl) { if (sl)
if (is_key_frame) {
ref_frame_config->lst_fb_idx[sl] = sl - 1;
ref_frame_config->gld_fb_idx[sl] = sl;
} else {
ref_frame_config->gld_fb_idx[sl] = sl - 1; ref_frame_config->gld_fb_idx[sl] = sl - 1;
} else
} else {
ref_frame_config->gld_fb_idx[sl] = 0; ref_frame_config->gld_fb_idx[sl] = 0;
}
ref_frame_config->alt_fb_idx[sl] = 0; ref_frame_config->alt_fb_idx[sl] = 0;
} else if (tl == 1) { } else if (tl == 1) {
ref_frame_config->lst_fb_idx[sl] = sl; ref_frame_config->lst_fb_idx[sl] = sl;
@@ -749,8 +738,6 @@ int main(int argc, const char **argv) {
// the encode for the whole superframe. The encoder will internally loop // the encode for the whole superframe. The encoder will internally loop
// over all the spatial layers for the current superframe. // over all the spatial layers for the current superframe.
vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id); vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
// TODO(jianj): Fix the parameter passing for "is_key_frame" in
// set_frame_flags_bypass_model() for case of periodic key frames.
set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id, set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id,
svc_ctx.spatial_layers, frame_cnt == 0, svc_ctx.spatial_layers, frame_cnt == 0,
&ref_frame_config); &ref_frame_config);

View File

@@ -26,29 +26,19 @@
#include "../tools_common.h" #include "../tools_common.h"
#include "../video_writer.h" #include "../video_writer.h"
#define ROI_MAP 0 #define VP8_ROI_MAP 0
#define zero(Dest) memset(&Dest, 0, sizeof(Dest));
static const char *exec_name; static const char *exec_name;
void usage_exit(void) { exit(EXIT_FAILURE); } void usage_exit(void) { exit(EXIT_FAILURE); }
// Denoiser states for vp8, for temporal denoising. // Denoiser states, for temporal denoising.
enum denoiserStateVp8 { enum denoiserState {
kVp8DenoiserOff, kDenoiserOff,
kVp8DenoiserOnYOnly, kDenoiserOnYOnly,
kVp8DenoiserOnYUV, kDenoiserOnYUV,
kVp8DenoiserOnYUVAggressive, kDenoiserOnYUVAggressive,
kVp8DenoiserOnAdaptive kDenoiserOnAdaptive
};
// Denoiser states for vp9, for temporal denoising.
enum denoiserStateVp9 {
kVp9DenoiserOff,
kVp9DenoiserOnYOnly,
// For SVC: denoise the top two spatial layers.
kVp9DenoiserOnYTwoSpatialLayers
}; };
static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 }; static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 };
@@ -101,9 +91,8 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
for (i = 0; i < cfg->ts_number_layers; ++i) { for (i = 0; i < cfg->ts_number_layers; ++i) {
if (i > 0) { if (i > 0) {
rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i]; rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
rc->layer_pfb[i] = rc->layer_pfb[i] = 1000.0 * (rc->layer_target_bitrate[i] -
1000.0 * rc->layer_target_bitrate[i - 1]) /
(rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
(rc->layer_framerate[i] - rc->layer_framerate[i - 1]); (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
} }
rc->layer_input_frames[i] = 0; rc->layer_input_frames[i] = 0;
@@ -167,60 +156,38 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
die("Error: Number of input frames not equal to output! \n"); die("Error: Number of input frames not equal to output! \n");
} }
#if ROI_MAP #if VP8_ROI_MAP
static void set_roi_map(const char *enc_name, vpx_codec_enc_cfg_t *cfg, static void vp8_set_roi_map(vpx_codec_enc_cfg_t *cfg, vpx_roi_map_t *roi) {
vpx_roi_map_t *roi) {
unsigned int i, j; unsigned int i, j;
int block_size = 0; memset(roi, 0, sizeof(*roi));
uint8_t is_vp8 = strncmp(enc_name, "vp8", 3) == 0 ? 1 : 0;
uint8_t is_vp9 = strncmp(enc_name, "vp9", 3) == 0 ? 1 : 0;
if (!is_vp8 && !is_vp9) {
die("unsupported codec.");
}
zero(*roi);
block_size = is_vp9 && !is_vp8 ? 8 : 16;
// ROI is based on the segments (4 for vp8, 8 for vp9), smallest unit for // ROI is based on the segments (4 for vp8, 8 for vp9), smallest unit for
// segment is 16x16 for vp8, 8x8 for vp9. // segment is 16x16 for vp8, 8x8 for vp9.
roi->rows = (cfg->g_h + block_size - 1) / block_size; roi->rows = (cfg->g_h + 15) / 16;
roi->cols = (cfg->g_w + block_size - 1) / block_size; roi->cols = (cfg->g_w + 15) / 16;
// Applies delta QP on the segment blocks, varies from -63 to 63. // Applies delta QP on the segment blocks, varies from -63 to 63.
// Setting to negative means lower QP (better quality). // Setting to negative means lower QP (better quality).
// Below we set delta_q to the extreme (-63) to show strong effect. // Below we set delta_q to the extreme (-63) to show strong effect.
// VP8 uses the first 4 segments. VP9 uses all 8 segments. roi->delta_q[0] = 0;
zero(roi->delta_q);
roi->delta_q[1] = -63; roi->delta_q[1] = -63;
roi->delta_q[2] = 0;
roi->delta_q[3] = 0;
// Applies delta loopfilter strength on the segment blocks, varies from -63 to // Applies delta loopfilter strength on the segment blocks, varies from -63 to
// 63. Setting to positive means stronger loopfilter. VP8 uses the first 4 // 63. Setting to positive means stronger loopfilter.
// segments. VP9 uses all 8 segments. roi->delta_lf[0] = 0;
zero(roi->delta_lf); roi->delta_lf[1] = 0;
roi->delta_lf[2] = 0;
roi->delta_lf[3] = 0;
if (is_vp8) {
// Applies skip encoding threshold on the segment blocks, varies from 0 to // Applies skip encoding threshold on the segment blocks, varies from 0 to
// UINT_MAX. Larger value means more skipping of encoding is possible. // UINT_MAX. Larger value means more skipping of encoding is possible.
// This skip threshold only applies on delta frames. // This skip threshold only applies on delta frames.
zero(roi->static_threshold); roi->static_threshold[0] = 0;
} roi->static_threshold[1] = 0;
roi->static_threshold[2] = 0;
if (is_vp9) { roi->static_threshold[3] = 0;
// Apply skip segment. Setting to 1 means this block will be copied from
// previous frame.
zero(roi->skip);
}
if (is_vp9) {
// Apply ref frame segment.
// -1 : Do not apply this segment.
// 0 : Froce using intra.
// 1 : Force using last.
// 2 : Force using golden.
// 3 : Force using alfref but not used in non-rd pickmode for 0 lag.
memset(roi->ref_frame, -1, sizeof(roi->ref_frame));
roi->ref_frame[1] = 1;
}
// Use 2 states: 1 is center square, 0 is the rest. // Use 2 states: 1 is center square, 0 is the rest.
roi->roi_map = roi->roi_map =
@@ -588,7 +555,7 @@ int main(int argc, char **argv) {
int layering_mode = 0; int layering_mode = 0;
int layer_flags[VPX_TS_MAX_PERIODICITY] = { 0 }; int layer_flags[VPX_TS_MAX_PERIODICITY] = { 0 };
int flag_periodicity = 1; int flag_periodicity = 1;
#if ROI_MAP #if VP8_ROI_MAP
vpx_roi_map_t roi; vpx_roi_map_t roi;
#endif #endif
vpx_svc_layer_id_t layer_id = { 0, 0 }; vpx_svc_layer_id_t layer_id = { 0, 0 };
@@ -788,11 +755,11 @@ int main(int argc, char **argv) {
if (strncmp(encoder->name, "vp8", 3) == 0) { if (strncmp(encoder->name, "vp8", 3) == 0) {
vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed); vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kVp8DenoiserOff); vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0); vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0);
#if ROI_MAP #if VP8_ROI_MAP
set_roi_map(encoder->name, &cfg, &roi); vp8_set_roi_map(&cfg, &roi);
if (vpx_codec_control(&codec, VP8E_SET_ROI_MAP, &roi)) if (vpx_codec_control(&codec, VP8E_SET_ROI_MAP, &roi))
die_codec(&codec, "Failed to set ROI map"); die_codec(&codec, "Failed to set ROI map");
#endif #endif
@@ -805,16 +772,10 @@ int main(int argc, char **argv) {
vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0); vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0);
vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0); vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0); vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kVp9DenoiserOff); vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0); vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1)); vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
#if ROI_MAP
set_roi_map(encoder->name, &cfg, &roi);
if (vpx_codec_control(&codec, VP9E_SET_ROI_MAP, &roi))
die_codec(&codec, "Failed to set ROI map");
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 0);
#endif
// TODO(marpan/jianj): There is an issue with row-mt for low resolutons at // TODO(marpan/jianj): There is an issue with row-mt for low resolutons at
// high speed settings, disable its use for those cases for now. // high speed settings, disable its use for those cases for now.
if (cfg.g_threads > 1 && ((cfg.g_w > 320 && cfg.g_h > 240) || speed < 7)) if (cfg.g_threads > 1 && ((cfg.g_w > 320 && cfg.g_h > 240) || speed < 7))
@@ -942,8 +903,5 @@ int main(int argc, char **argv) {
for (i = 0; i < cfg.ts_number_layers; ++i) vpx_video_writer_close(outfile[i]); for (i = 0; i < cfg.ts_number_layers; ++i) vpx_video_writer_close(outfile[i]);
vpx_img_free(&raw); vpx_img_free(&raw);
#if ROI_MAP
free(roi.roi_map);
#endif
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@@ -943,6 +943,18 @@ GENERATE_XML = NO
XML_OUTPUT = xml XML_OUTPUT = xml
# The XML_SCHEMA tag can be used to specify an XML schema,
# which can be used by a validating XML parser to check the
# syntax of the XML files.
XML_SCHEMA =
# The XML_DTD tag can be used to specify an XML DTD,
# which can be used by a validating XML parser to check the
# syntax of the XML files.
XML_DTD =
# If the XML_PROGRAMLISTING tag is set to YES Doxygen will # If the XML_PROGRAMLISTING tag is set to YES Doxygen will
# dump the program listings (including syntax highlighting # dump the program listings (including syntax highlighting
# and cross-referencing information) to the XML output. Note that # and cross-referencing information) to the XML output. Note that

View File

@@ -233,8 +233,8 @@ OBJS-yes += $(LIBVPX_OBJS)
LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS) $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
SO_VERSION_MAJOR := 5 SO_VERSION_MAJOR := 4
SO_VERSION_MINOR := 0 SO_VERSION_MINOR := 1
SO_VERSION_PATCH := 0 SO_VERSION_PATCH := 0
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS)) ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib

View File

@@ -215,7 +215,7 @@ using std::tr1::make_tuple;
#if CONFIG_VP9_ENCODER #if CONFIG_VP9_ENCODER
const BlockinessParam c_vp9_tests[] = { const BlockinessParam c_vp9_tests[] = {
make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238) make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238),
}; };
INSTANTIATE_TEST_CASE_P(C, BlockinessVP9Test, ::testing::ValuesIn(c_vp9_tests)); INSTANTIATE_TEST_CASE_P(C, BlockinessVP9Test, ::testing::ValuesIn(c_vp9_tests));
#endif #endif

View File

@@ -205,7 +205,7 @@ using std::tr1::make_tuple;
#if CONFIG_VP9_ENCODER #if CONFIG_VP9_ENCODER
const ConsistencyParam c_vp9_tests[] = { const ConsistencyParam c_vp9_tests[] = {
make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238) make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238),
}; };
INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test, INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test,
::testing::ValuesIn(c_vp9_tests)); ::testing::ValuesIn(c_vp9_tests));

View File

@@ -539,7 +539,6 @@ class DatarateTestVP9Large
denoiser_offon_test_ = 0; denoiser_offon_test_ = 0;
denoiser_offon_period_ = -1; denoiser_offon_period_ = -1;
frame_parallel_decoding_mode_ = 1; frame_parallel_decoding_mode_ = 1;
use_roi_ = 0;
} }
// //
@@ -622,10 +621,6 @@ class DatarateTestVP9Large
encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING,
frame_parallel_decoding_mode_); frame_parallel_decoding_mode_);
if (use_roi_) {
encoder->Control(VP9E_SET_ROI_MAP, &roi_);
}
if (cfg_.ts_number_layers > 1) { if (cfg_.ts_number_layers > 1) {
if (video->frame() == 0) { if (video->frame() == 0) {
encoder->Control(VP9E_SET_SVC, 1); encoder->Control(VP9E_SET_SVC, 1);
@@ -706,8 +701,6 @@ class DatarateTestVP9Large
int denoiser_offon_test_; int denoiser_offon_test_;
int denoiser_offon_period_; int denoiser_offon_period_;
int frame_parallel_decoding_mode_; int frame_parallel_decoding_mode_;
bool use_roi_;
vpx_roi_map_t roi_;
}; };
// Check basic rate targeting for VBR mode with 0 lag. // Check basic rate targeting for VBR mode with 0 lag.
@@ -1080,68 +1073,6 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
} }
} }
class DatarateTestVP9RealTime : public DatarateTestVP9Large {
public:
virtual ~DatarateTestVP9RealTime() {}
};
// Check VP9 region of interest feature.
TEST_P(DatarateTestVP9RealTime, RegionOfInterest) {
if (deadline_ != VPX_DL_REALTIME || set_cpu_used_ < 5) return;
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
cfg_.rc_dropframe_thresh = 0;
cfg_.rc_min_quantizer = 0;
cfg_.rc_max_quantizer = 63;
cfg_.rc_end_usage = VPX_CBR;
cfg_.g_lag_in_frames = 0;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 300);
cfg_.rc_target_bitrate = 450;
cfg_.g_w = 352;
cfg_.g_h = 288;
ResetModel();
// Set ROI parameters
use_roi_ = true;
memset(&roi_, 0, sizeof(roi_));
roi_.rows = (cfg_.g_h + 7) / 8;
roi_.cols = (cfg_.g_w + 7) / 8;
roi_.delta_q[1] = -20;
roi_.delta_lf[1] = -20;
memset(roi_.ref_frame, -1, sizeof(roi_.ref_frame));
roi_.ref_frame[1] = 1;
// Use 2 states: 1 is center square, 0 is the rest.
roi_.roi_map = reinterpret_cast<uint8_t *>(
calloc(roi_.rows * roi_.cols, sizeof(*roi_.roi_map)));
ASSERT_TRUE(roi_.roi_map != NULL);
for (unsigned int i = 0; i < roi_.rows; ++i) {
for (unsigned int j = 0; j < roi_.cols; ++j) {
if (i > (roi_.rows >> 2) && i < ((roi_.rows * 3) >> 2) &&
j > (roi_.cols >> 2) && j < ((roi_.cols * 3) >> 2)) {
roi_.roi_map[i * roi_.cols + j] = 1;
}
}
}
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_[0] * 0.90)
<< " The datarate for the file exceeds the target!";
ASSERT_LE(cfg_.rc_target_bitrate, effective_datarate_[0] * 1.4)
<< " The datarate for the file missed the target!";
free(roi_.roi_map);
}
#if CONFIG_VP9_TEMPORAL_DENOISING #if CONFIG_VP9_TEMPORAL_DENOISING
class DatarateTestVP9LargeDenoiser : public DatarateTestVP9Large { class DatarateTestVP9LargeDenoiser : public DatarateTestVP9Large {
public: public:
@@ -1285,78 +1216,18 @@ class DatarateOnePassCbrSvc
} }
virtual void ResetModel() { virtual void ResetModel() {
last_pts_ = 0; last_pts_ = 0;
bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
frame_number_ = 0;
first_drop_ = 0;
bits_total_ = 0;
duration_ = 0.0; duration_ = 0.0;
mismatch_psnr_ = 0.0; mismatch_psnr_ = 0.0;
mismatch_nframes_ = 0; mismatch_nframes_ = 0;
denoiser_on_ = 0; denoiser_on_ = 0;
tune_content_ = 0; tune_content_ = 0;
base_speed_setting_ = 5; base_speed_setting_ = 5;
spatial_layer_id_ = 0;
temporal_layer_id_ = 0;
update_pattern_ = 0;
memset(bits_in_buffer_model_, 0, sizeof(bits_in_buffer_model_));
memset(bits_total_, 0, sizeof(bits_total_));
memset(layer_target_avg_bandwidth_, 0, sizeof(layer_target_avg_bandwidth_));
dynamic_drop_layer_ = false;
} }
virtual void BeginPassHook(unsigned int /*pass*/) {} virtual void BeginPassHook(unsigned int /*pass*/) {}
// Example pattern for spatial layers and 2 temporal layers used in the
// bypass/flexible mode. The pattern corresponds to the pattern
// VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
// non-flexible mode, except that we disable inter-layer prediction.
void set_frame_flags_bypass_mode(
int tl, int num_spatial_layers, int is_key_frame,
vpx_svc_ref_frame_config_t *ref_frame_config) {
for (int sl = 0; sl < num_spatial_layers; ++sl) {
if (!tl) {
if (!sl) {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
VP8_EFLAG_NO_UPD_ARF;
} else {
if (is_key_frame) {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
} else {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF;
}
}
} else if (tl == 1) {
if (!sl) {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
} else {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_REF_GF;
}
}
if (tl == 0) {
ref_frame_config->lst_fb_idx[sl] = sl;
if (sl) {
if (is_key_frame) {
ref_frame_config->lst_fb_idx[sl] = sl - 1;
ref_frame_config->gld_fb_idx[sl] = sl;
} else {
ref_frame_config->gld_fb_idx[sl] = sl - 1;
}
} else {
ref_frame_config->gld_fb_idx[sl] = 0;
}
ref_frame_config->alt_fb_idx[sl] = 0;
} else if (tl == 1) {
ref_frame_config->lst_fb_idx[sl] = sl;
ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1;
ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl;
}
}
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) { ::libvpx_test::Encoder *encoder) {
if (video->frame() == 0) { if (video->frame() == 0) {
@@ -1381,137 +1252,36 @@ class DatarateOnePassCbrSvc
encoder->Control(VP8E_SET_STATIC_THRESHOLD, 1); encoder->Control(VP8E_SET_STATIC_THRESHOLD, 1);
encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_); encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_);
} }
if (update_pattern_ && video->frame() >= 100) {
vpx_svc_layer_id_t layer_id;
if (video->frame() == 100) {
cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
encoder->Config(&cfg_);
}
// Set layer id since the pattern changed.
layer_id.spatial_layer_id = 0;
layer_id.temporal_layer_id = (video->frame() % 2 != 0);
encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
set_frame_flags_bypass_mode(layer_id.temporal_layer_id,
number_spatial_layers_, 0, &ref_frame_config);
encoder->Control(VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config);
}
if (dynamic_drop_layer_) {
if (video->frame() == 100) {
// Change layer bitrates to set top layer to 0. This will trigger skip
// encoding/dropping of top spatial layer.
cfg_.rc_target_bitrate -= cfg_.layer_target_bitrate[2];
cfg_.layer_target_bitrate[2] = 0;
encoder->Config(&cfg_);
} else if (video->frame() == 300) {
// Change layer bitrate on top layer to non-zero to start encoding it
// again.
cfg_.layer_target_bitrate[2] = 500;
cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[2];
encoder->Config(&cfg_);
}
}
const vpx_rational_t tb = video->timebase(); const vpx_rational_t tb = video->timebase();
timebase_ = static_cast<double>(tb.num) / tb.den; timebase_ = static_cast<double>(tb.num) / tb.den;
duration_ = 0; duration_ = 0;
} }
virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {
vpx_svc_layer_id_t layer_id;
encoder->Control(VP9E_GET_SVC_LAYER_ID, &layer_id);
spatial_layer_id_ = layer_id.spatial_layer_id;
temporal_layer_id_ = layer_id.temporal_layer_id;
// Update buffer with per-layer target frame bandwidth, this is done
// for every frame passed to the encoder (encoded or dropped).
// For temporal layers, update the cumulative buffer level.
for (int sl = 0; sl < number_spatial_layers_; ++sl) {
for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) {
const int layer = sl * number_temporal_layers_ + tl;
bits_in_buffer_model_[layer] +=
static_cast<int64_t>(layer_target_avg_bandwidth_[layer]);
}
}
}
vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz,
uint32_t sizes[8], int *count) {
uint8_t marker;
marker = *(data + data_sz - 1);
*count = 0;
if ((marker & 0xe0) == 0xc0) {
const uint32_t frames = (marker & 0x7) + 1;
const uint32_t mag = ((marker >> 3) & 0x3) + 1;
const size_t index_sz = 2 + mag * frames;
// This chunk is marked as having a superframe index but doesn't have
// enough data for it, thus it's an invalid superframe index.
if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME;
{
const uint8_t marker2 = *(data + data_sz - index_sz);
// This chunk is marked as having a superframe index but doesn't have
// the matching marker byte at the front of the index therefore it's an
// invalid chunk.
if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME;
}
{
uint32_t i, j;
const uint8_t *x = &data[data_sz - index_sz + 1];
for (i = 0; i < frames; ++i) {
uint32_t this_sz = 0;
for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8);
sizes[i] = this_sz;
}
*count = frames;
}
}
return VPX_CODEC_OK;
}
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
uint32_t sizes[8] = { 0 }; vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
int count = 0; if (last_pts_ == 0) duration = 1;
last_pts_ = pkt->data.frame.pts; bits_in_buffer_model_ += static_cast<int64_t>(
duration * timebase_ * cfg_.rc_target_bitrate * 1000);
const bool key_frame = const bool key_frame =
(pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
parse_superframe_index(static_cast<const uint8_t *>(pkt->data.frame.buf), if (!key_frame) {
pkt->data.frame.sz, sizes, &count); // TODO(marpan): This check currently fails for some of the SVC tests,
if (!dynamic_drop_layer_) ASSERT_EQ(count, number_spatial_layers_); // re-enable when issue (webm:1350) is resolved.
for (int sl = 0; sl < number_spatial_layers_; ++sl) { // ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
sizes[sl] = sizes[sl] << 3; // << pkt->data.frame.pts;
// Update the total encoded bits per layer.
// For temporal layers, update the cumulative encoded bits per layer.
for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) {
const int layer = sl * number_temporal_layers_ + tl;
bits_total_[layer] += static_cast<int64_t>(sizes[sl]);
// Update the per-layer buffer level with the encoded frame size.
bits_in_buffer_model_[layer] -= static_cast<int64_t>(sizes[sl]);
// There should be no buffer underrun, except on the base
// temporal layer, since there may be key frames there.
if (!key_frame && tl > 0) {
ASSERT_GE(bits_in_buffer_model_[layer], 0)
<< "Buffer Underrun at frame " << pkt->data.frame.pts;
} }
const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
bits_in_buffer_model_ -= static_cast<int64_t>(frame_size_in_bits);
bits_total_ += frame_size_in_bits;
if (!first_drop_ && duration > 1) first_drop_ = last_pts_ + 1;
last_pts_ = pkt->data.frame.pts;
bits_in_last_frame_ = frame_size_in_bits;
++frame_number_;
} }
ASSERT_EQ(pkt->data.frame.width[sl],
top_sl_width_ * svc_params_.scaling_factor_num[sl] /
svc_params_.scaling_factor_den[sl]);
ASSERT_EQ(pkt->data.frame.height[sl],
top_sl_height_ * svc_params_.scaling_factor_num[sl] /
svc_params_.scaling_factor_den[sl]);
}
}
virtual void EndPassHook(void) { virtual void EndPassHook(void) {
for (int sl = 0; sl < number_spatial_layers_; ++sl) { if (bits_total_) {
for (int tl = 0; tl < number_temporal_layers_; ++tl) { const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit
const int layer = sl * number_temporal_layers_ + tl;
const double file_size_in_kb = bits_total_[layer] / 1000.;
duration_ = (last_pts_ + 1) * timebase_; duration_ = (last_pts_ + 1) * timebase_;
file_datarate_[layer] = file_size_in_kb / duration_; file_datarate_ = file_size_in_kb / duration_;
}
} }
} }
@@ -1524,11 +1294,13 @@ class DatarateOnePassCbrSvc
unsigned int GetMismatchFrames() { return mismatch_nframes_; } unsigned int GetMismatchFrames() { return mismatch_nframes_; }
vpx_codec_pts_t last_pts_; vpx_codec_pts_t last_pts_;
int64_t bits_in_buffer_model_[VPX_MAX_LAYERS]; int64_t bits_in_buffer_model_;
double timebase_; double timebase_;
int64_t bits_total_[VPX_MAX_LAYERS]; int frame_number_;
vpx_codec_pts_t first_drop_;
int64_t bits_total_;
double duration_; double duration_;
double file_datarate_[VPX_MAX_LAYERS]; double file_datarate_;
size_t bits_in_last_frame_; size_t bits_in_last_frame_;
vpx_svc_extra_cfg_t svc_params_; vpx_svc_extra_cfg_t svc_params_;
int speed_setting_; int speed_setting_;
@@ -1537,27 +1309,14 @@ class DatarateOnePassCbrSvc
int denoiser_on_; int denoiser_on_;
int tune_content_; int tune_content_;
int base_speed_setting_; int base_speed_setting_;
int spatial_layer_id_;
int temporal_layer_id_;
int number_spatial_layers_;
int number_temporal_layers_;
int layer_target_avg_bandwidth_[VPX_MAX_LAYERS];
bool dynamic_drop_layer_;
unsigned int top_sl_width_;
unsigned int top_sl_height_;
vpx_svc_ref_frame_config_t ref_frame_config;
int update_pattern_;
}; };
static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg, static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
const vpx_svc_extra_cfg_t *svc_params, const vpx_svc_extra_cfg_t *svc_params,
int spatial_layers, int temporal_layers, int spatial_layers, int temporal_layers,
int temporal_layering_mode, int temporal_layering_mode) {
int *layer_target_avg_bandwidth,
int64_t *bits_in_buffer_model) {
int sl, spatial_layer_target; int sl, spatial_layer_target;
float total = 0; float total = 0;
float alloc_ratio[VPX_MAX_LAYERS] = { 0 }; float alloc_ratio[VPX_MAX_LAYERS] = { 0 };
float framerate = 30.0;
for (sl = 0; sl < spatial_layers; ++sl) { for (sl = 0; sl < spatial_layers; ++sl) {
if (svc_params->scaling_factor_den[sl] > 0) { if (svc_params->scaling_factor_den[sl] > 0) {
alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] * 1.0 / alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] * 1.0 /
@@ -1577,41 +1336,8 @@ static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
} else if (temporal_layering_mode == 2) { } else if (temporal_layering_mode == 2) {
enc_cfg->layer_target_bitrate[index] = spatial_layer_target * 2 / 3; enc_cfg->layer_target_bitrate[index] = spatial_layer_target * 2 / 3;
enc_cfg->layer_target_bitrate[index + 1] = spatial_layer_target; enc_cfg->layer_target_bitrate[index + 1] = spatial_layer_target;
} else if (temporal_layering_mode <= 1) {
enc_cfg->layer_target_bitrate[index] = spatial_layer_target;
} }
} }
for (sl = 0; sl < spatial_layers; ++sl) {
for (int tl = 0; tl < temporal_layers; ++tl) {
const int layer = sl * temporal_layers + tl;
float layer_framerate = framerate;
if (temporal_layers == 2 && tl == 0) layer_framerate = framerate / 2;
if (temporal_layers == 3 && tl == 0) layer_framerate = framerate / 4;
if (temporal_layers == 3 && tl == 1) layer_framerate = framerate / 2;
layer_target_avg_bandwidth[layer] = static_cast<int>(
enc_cfg->layer_target_bitrate[layer] * 1000.0 / layer_framerate);
bits_in_buffer_model[layer] =
enc_cfg->layer_target_bitrate[layer] * enc_cfg->rc_buf_initial_sz;
}
}
}
static void CheckLayerRateTargeting(vpx_codec_enc_cfg_t *const cfg,
int number_spatial_layers,
int number_temporal_layers,
double *file_datarate,
double thresh_overshoot,
double thresh_undershoot) {
for (int sl = 0; sl < number_spatial_layers; ++sl)
for (int tl = 0; tl < number_temporal_layers; ++tl) {
const int layer = sl * number_temporal_layers + tl;
ASSERT_GE(cfg->layer_target_bitrate[layer],
file_datarate[layer] * thresh_overshoot)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg->layer_target_bitrate[layer],
file_datarate[layer] * thresh_undershoot)
<< " The datarate for the file is lower than the target by too much!";
}
} }
// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1 // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1
@@ -1637,21 +1363,14 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TLScreenContent1) {
svc_params_.scaling_factor_den[1] = 288; svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 10; cfg_.rc_dropframe_thresh = 10;
cfg_.kf_max_dist = 9999; cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
top_sl_width_ = 1280;
top_sl_height_ = 720;
cfg_.rc_target_bitrate = 500; cfg_.rc_target_bitrate = 500;
ResetModel(); ResetModel();
tune_content_ = 1; tune_content_ = 1;
base_speed_setting_ = speed_setting_; base_speed_setting_ = speed_setting_;
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode, cfg_.ts_number_layers, cfg_.temporal_layering_mode);
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
} }
@@ -1679,30 +1398,26 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL) {
svc_params_.scaling_factor_den[1] = 288; svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 0; cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999; cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
number_temporal_layers_ = cfg_.ts_number_layers; 30, 1, 0, 200);
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
// TODO(marpan): Check that effective_datarate for each layer hits the // TODO(marpan): Check that effective_datarate for each layer hits the
// layer target_bitrate. // layer target_bitrate.
for (int i = 200; i <= 800; i += 200) { for (int i = 200; i <= 800; i += 200) {
cfg_.rc_target_bitrate = i; cfg_.rc_target_bitrate = i;
ResetModel(); ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode, cfg_.ts_number_layers, cfg_.temporal_layering_mode);
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_, ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
number_temporal_layers_, file_datarate_, 0.78, << " The datarate for the file exceeds the target by too much!";
1.15); ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER #if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern // Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter. // will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 200 (half of the sequence) // Since frame dropper is off, we can expcet 100 (half of the sequence)
// mismatched frames. // mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames()); EXPECT_EQ(static_cast<unsigned int>(100), GetMismatchFrames());
#endif #endif
} }
} }
@@ -1731,43 +1446,33 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLDenoiserOn) {
svc_params_.scaling_factor_den[1] = 288; svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 0; cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999; cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
// TODO(marpan): Check that effective_datarate for each layer hits the // TODO(marpan): Check that effective_datarate for each layer hits the
// layer target_bitrate. // layer target_bitrate.
// For SVC, noise_sen = 1 means denoising only the top spatial layer
// noise_sen = 2 means denoising the two top spatial layers.
for (int noise_sen = 1; noise_sen <= 2; noise_sen++) {
for (int i = 600; i <= 1000; i += 200) { for (int i = 600; i <= 1000; i += 200) {
cfg_.rc_target_bitrate = i; cfg_.rc_target_bitrate = i;
ResetModel(); ResetModel();
denoiser_on_ = noise_sen; denoiser_on_ = 1;
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode, cfg_.ts_number_layers, cfg_.temporal_layering_mode);
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_, ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
number_temporal_layers_, file_datarate_, 0.78, << " The datarate for the file exceeds the target by too much!";
1.15); ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER #if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC // Number of temporal layers > 1, so half of the frames in this SVC pattern
// pattern
// will be non-reference frame and hence encoder will avoid loopfilter. // will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 200 (half of the sequence) // Since frame dropper is off, we can expcet 150 (half of the sequence)
// mismatched frames. // mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames()); EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
#endif #endif
} }
} }
}
// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 3 // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 3
// temporal layers. Run CIF clip with 1 thread, and few short key frame periods. // temporal layers. Run CIF clip with 1 thread, and few short key frame periods.
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLSmallKf) { TEST_P(DatarateOnePassCbrSvc, DISABLED_OnePassCbrSvc2SL3TLSmallKf) {
cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000; cfg_.rc_buf_sz = 1000;
@@ -1788,25 +1493,21 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLSmallKf) {
svc_params_.scaling_factor_num[1] = 288; svc_params_.scaling_factor_num[1] = 288;
svc_params_.scaling_factor_den[1] = 288; svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 10; cfg_.rc_dropframe_thresh = 10;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 200);
cfg_.rc_target_bitrate = 400; cfg_.rc_target_bitrate = 400;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
// For this 3 temporal layer case, pattern repeats every 4 frames, so choose // For this 3 temporal layer case, pattern repeats every 4 frames, so choose
// 4 key neighboring key frame periods (so key frame will land on 0-2-1-2). // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
for (int j = 64; j <= 67; j++) { for (int j = 64; j <= 67; j++) {
cfg_.kf_max_dist = j; cfg_.kf_max_dist = j;
ResetModel(); ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode, cfg_.ts_number_layers, cfg_.temporal_layering_mode);
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_, ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.80)
number_temporal_layers_, file_datarate_, 0.78, << " The datarate for the file exceeds the target by too much!";
1.15); ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
} }
} }
@@ -1834,25 +1535,22 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL4Threads) {
svc_params_.scaling_factor_den[1] = 288; svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 0; cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999; cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
top_sl_width_ = 1280;
top_sl_height_ = 720;
cfg_.rc_target_bitrate = 800; cfg_.rc_target_bitrate = 800;
ResetModel(); ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode, cfg_.ts_number_layers, cfg_.temporal_layering_mode);
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_, ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
number_temporal_layers_, file_datarate_, 0.78, 1.15); << " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER #if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern // Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter. // will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 30 (half of the sequence) // Since frame dropper is off, we can expcet 150 (half of the sequence)
// mismatched frames. // mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(30), GetMismatchFrames()); EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
#endif #endif
} }
@@ -1882,126 +1580,25 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL) {
svc_params_.scaling_factor_den[2] = 288; svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 0; cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999; cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
cfg_.rc_target_bitrate = 800; cfg_.rc_target_bitrate = 800;
ResetModel(); ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode, cfg_.ts_number_layers, cfg_.temporal_layering_mode);
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_, ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
number_temporal_layers_, file_datarate_, 0.78, 1.15); << " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER #if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern // Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter. // will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 200 (half of the sequence) // Since frame dropper is off, we can expcet 150 (half of the sequence)
// mismatched frames. // mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames()); EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
#endif #endif
} }
// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
// 2 temporal layers, with a change on the fly from the fixed SVC pattern to one
// generate via SVC_SET_REF_FRAME_CONFIG. The new pattern also disables
// inter-layer prediction.
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL2TLDynamicPatternChange) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
cfg_.rc_min_quantizer = 0;
cfg_.rc_max_quantizer = 63;
cfg_.rc_end_usage = VPX_CBR;
cfg_.g_lag_in_frames = 0;
cfg_.ss_number_layers = 3;
cfg_.ts_number_layers = 2;
cfg_.ts_rate_decimator[0] = 2;
cfg_.ts_rate_decimator[1] = 1;
cfg_.g_error_resilient = 1;
cfg_.g_threads = 1;
cfg_.temporal_layering_mode = 2;
svc_params_.scaling_factor_num[0] = 72;
svc_params_.scaling_factor_den[0] = 288;
svc_params_.scaling_factor_num[1] = 144;
svc_params_.scaling_factor_den[1] = 288;
svc_params_.scaling_factor_num[2] = 288;
svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
// Change SVC pattern on the fly.
update_pattern_ = 1;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
cfg_.rc_target_bitrate = 800;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
#if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 200 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
#endif
}
// Check basic rate targeting for 1 pass CBR SVC with 3 spatial layers and on
// the fly switching to 2 spatial layers and then back to 3. This switch is done
// by setting top spatial layer bitrate to 0, and then back to non-zero, during
// the sequence.
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL_to_2SL_dynamic) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
cfg_.rc_min_quantizer = 0;
cfg_.rc_max_quantizer = 63;
cfg_.rc_end_usage = VPX_CBR;
cfg_.g_lag_in_frames = 0;
cfg_.ss_number_layers = 3;
cfg_.ts_number_layers = 1;
cfg_.ts_rate_decimator[0] = 1;
cfg_.g_error_resilient = 1;
cfg_.g_threads = 1;
cfg_.temporal_layering_mode = 0;
svc_params_.scaling_factor_num[0] = 72;
svc_params_.scaling_factor_den[0] = 288;
svc_params_.scaling_factor_num[1] = 144;
svc_params_.scaling_factor_den[1] = 288;
svc_params_.scaling_factor_num[2] = 288;
svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
cfg_.rc_target_bitrate = 800;
ResetModel();
dynamic_drop_layer_ = true;
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
// Don't check rate targeting on top spatial layer since it will be skipped
// for part of the sequence.
CheckLayerRateTargeting(&cfg_, number_spatial_layers_ - 1,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
}
// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3 // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3
// temporal layers. Run CIF clip with 1 thread, and few short key frame periods. // temporal layers. Run CIF clip with 1 thread, and few short key frame periods.
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) { TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) {
@@ -2027,25 +1624,20 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) {
svc_params_.scaling_factor_num[2] = 288; svc_params_.scaling_factor_num[2] = 288;
svc_params_.scaling_factor_den[2] = 288; svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 10; cfg_.rc_dropframe_thresh = 10;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
cfg_.rc_target_bitrate = 800; cfg_.rc_target_bitrate = 800;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
// For this 3 temporal layer case, pattern repeats every 4 frames, so choose // For this 3 temporal layer case, pattern repeats every 4 frames, so choose
// 4 key neighboring key frame periods (so key frame will land on 0-2-1-2). // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
for (int j = 32; j <= 35; j++) { for (int j = 32; j <= 35; j++) {
cfg_.kf_max_dist = j; cfg_.kf_max_dist = j;
ResetModel(); ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode, cfg_.ts_number_layers, cfg_.temporal_layering_mode);
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_, ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.80)
number_temporal_layers_, file_datarate_, 0.78, << " The datarate for the file exceeds the target by too much!";
1.15); ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.30)
<< " The datarate for the file is lower than the target by too much!";
} }
} }
@@ -2075,25 +1667,22 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4threads) {
svc_params_.scaling_factor_den[2] = 288; svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 0; cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999; cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
top_sl_width_ = 1280;
top_sl_height_ = 720;
cfg_.rc_target_bitrate = 800; cfg_.rc_target_bitrate = 800;
ResetModel(); ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode, cfg_.ts_number_layers, cfg_.temporal_layering_mode);
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_, ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
number_temporal_layers_, file_datarate_, 0.78, 1.15); << " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER #if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern // Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter. // will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 30 (half of the sequence) // Since frame dropper is off, we can expcet 150 (half of the sequence)
// mismatched frames. // mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(30), GetMismatchFrames()); EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
#endif #endif
} }
@@ -2125,21 +1714,9 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TL5x5MultipleRuns) {
cfg_.layer_target_bitrate[0] = 300; cfg_.layer_target_bitrate[0] = 300;
cfg_.layer_target_bitrate[1] = 1400; cfg_.layer_target_bitrate[1] = 1400;
cfg_.rc_target_bitrate = 1700; cfg_.rc_target_bitrate = 1700;
number_spatial_layers_ = cfg_.ss_number_layers; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
number_temporal_layers_ = cfg_.ts_number_layers;
ResetModel(); ResetModel();
layer_target_avg_bandwidth_[0] = cfg_.layer_target_bitrate[0] * 1000 / 30;
bits_in_buffer_model_[0] =
cfg_.layer_target_bitrate[0] * cfg_.rc_buf_initial_sz;
layer_target_avg_bandwidth_[1] = cfg_.layer_target_bitrate[1] * 1000 / 30;
bits_in_buffer_model_[1] =
cfg_.layer_target_bitrate[1] * cfg_.rc_buf_initial_sz;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
top_sl_width_ = 1280;
top_sl_height_ = 720;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames()); EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
} }
@@ -2152,9 +1729,6 @@ VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
::testing::Values(::libvpx_test::kOnePassGood, ::testing::Values(::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime), ::libvpx_test::kRealTime),
::testing::Range(2, 9)); ::testing::Range(2, 9));
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime,
::testing::Values(::libvpx_test::kRealTime),
::testing::Range(5, 9));
#if CONFIG_VP9_TEMPORAL_DENOISING #if CONFIG_VP9_TEMPORAL_DENOISING
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser, VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser,
::testing::Values(::libvpx_test::kRealTime), ::testing::Values(::libvpx_test::kRealTime),

View File

@@ -28,8 +28,8 @@
using libvpx_test::ACMRandom; using libvpx_test::ACMRandom;
using libvpx_test::Buffer; using libvpx_test::Buffer;
using std::tr1::make_tuple;
using std::tr1::tuple; using std::tr1::tuple;
using std::tr1::make_tuple;
namespace { namespace {
typedef void (*PartialFdctFunc)(const int16_t *in, tran_low_t *out, int stride); typedef void (*PartialFdctFunc)(const int16_t *in, tran_low_t *out, int stride);

File diff suppressed because it is too large Load Diff

View File

@@ -106,90 +106,4 @@ TEST(EncodeAPI, ImageSizeSetting) {
} }
#endif #endif
// Set up 2 spatial streams with 2 temporal layers per stream, and generate
// invalid configuration by setting the temporal layer rate allocation
// (ts_target_bitrate[]) to 0 for both layers. This should fail independent of
// CONFIG_MULTI_RES_ENCODING.
TEST(EncodeAPI, MultiResEncode) {
static const vpx_codec_iface_t *kCodecs[] = {
#if CONFIG_VP8_ENCODER
&vpx_codec_vp8_cx_algo,
#endif
#if CONFIG_VP9_ENCODER
&vpx_codec_vp9_cx_algo,
#endif
};
const int width = 1280;
const int height = 720;
const int width_down = width / 2;
const int height_down = height / 2;
const int target_bitrate = 1000;
const int framerate = 30;
for (int c = 0; c < NELEMENTS(kCodecs); ++c) {
const vpx_codec_iface_t *const iface = kCodecs[c];
vpx_codec_ctx_t enc[2];
vpx_codec_enc_cfg_t cfg[2];
vpx_rational_t dsf[2] = { { 2, 1 }, { 2, 1 } };
memset(enc, 0, sizeof(enc));
for (int i = 0; i < 2; i++) {
vpx_codec_enc_config_default(iface, &cfg[i], 0);
}
/* Highest-resolution encoder settings */
cfg[0].g_w = width;
cfg[0].g_h = height;
cfg[0].rc_dropframe_thresh = 0;
cfg[0].rc_end_usage = VPX_CBR;
cfg[0].rc_resize_allowed = 0;
cfg[0].rc_min_quantizer = 2;
cfg[0].rc_max_quantizer = 56;
cfg[0].rc_undershoot_pct = 100;
cfg[0].rc_overshoot_pct = 15;
cfg[0].rc_buf_initial_sz = 500;
cfg[0].rc_buf_optimal_sz = 600;
cfg[0].rc_buf_sz = 1000;
cfg[0].g_error_resilient = 1; /* Enable error resilient mode */
cfg[0].g_lag_in_frames = 0;
cfg[0].kf_mode = VPX_KF_AUTO;
cfg[0].kf_min_dist = 3000;
cfg[0].kf_max_dist = 3000;
cfg[0].rc_target_bitrate = target_bitrate; /* Set target bitrate */
cfg[0].g_timebase.num = 1; /* Set fps */
cfg[0].g_timebase.den = framerate;
memcpy(&cfg[1], &cfg[0], sizeof(cfg[0]));
cfg[1].rc_target_bitrate = 500;
cfg[1].g_w = width_down;
cfg[1].g_h = height_down;
for (int i = 0; i < 2; i++) {
cfg[i].ts_number_layers = 2;
cfg[i].ts_periodicity = 2;
cfg[i].ts_rate_decimator[0] = 2;
cfg[i].ts_rate_decimator[1] = 1;
cfg[i].ts_layer_id[0] = 0;
cfg[i].ts_layer_id[1] = 1;
// Invalid parameters.
cfg[i].ts_target_bitrate[0] = 0;
cfg[i].ts_target_bitrate[1] = 0;
}
// VP9 should report incapable, VP8 invalid for all configurations.
const char kVP9Name[] = "WebM Project VP9";
const bool is_vp9 = strncmp(kVP9Name, vpx_codec_iface_name(iface),
sizeof(kVP9Name) - 1) == 0;
EXPECT_EQ(is_vp9 ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM,
vpx_codec_enc_init_multi(&enc[0], iface, &cfg[0], 2, 0, &dsf[0]));
for (int i = 0; i < 2; i++) {
vpx_codec_destroy(&enc[i]);
}
}
}
} // namespace } // namespace

View File

@@ -201,8 +201,6 @@ void EncoderTest::RunLoop(VideoSource *video) {
PreEncodeFrameHook(video, encoder.get()); PreEncodeFrameHook(video, encoder.get());
encoder->EncodeFrame(video, frame_flags_); encoder->EncodeFrame(video, frame_flags_);
PostEncodeFrameHook(encoder.get());
CxDataIterator iter = encoder->GetCxData(); CxDataIterator iter = encoder->GetCxData();
bool has_cxdata = false; bool has_cxdata = false;

View File

@@ -128,11 +128,6 @@ class Encoder {
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
} }
void Control(int ctrl_id, struct vpx_svc_ref_frame_config *arg) {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
void Control(int ctrl_id, struct vpx_svc_parameters *arg) { void Control(int ctrl_id, struct vpx_svc_parameters *arg) {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
@@ -142,12 +137,15 @@ class Encoder {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
} }
#endif
#if CONFIG_VP8_ENCODER
void Control(int ctrl_id, vpx_roi_map_t *arg) { void Control(int ctrl_id, vpx_roi_map_t *arg) {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
} }
#endif #endif
void Config(const vpx_codec_enc_cfg_t *cfg) { void Config(const vpx_codec_enc_cfg_t *cfg) {
const vpx_codec_err_t res = vpx_codec_enc_config_set(&encoder_, cfg); const vpx_codec_err_t res = vpx_codec_enc_config_set(&encoder_, cfg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
@@ -221,8 +219,6 @@ class EncoderTest {
virtual void PreEncodeFrameHook(VideoSource * /*video*/, virtual void PreEncodeFrameHook(VideoSource * /*video*/,
Encoder * /*encoder*/) {} Encoder * /*encoder*/) {}
virtual void PostEncodeFrameHook(Encoder * /*encoder*/) {}
// Hook to be called on every compressed data packet. // Hook to be called on every compressed data packet.
virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {} virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}

View File

@@ -675,9 +675,7 @@ INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
::testing::Values(make_tuple(&vpx_fdct8x8_neon, ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
&vpx_idct8x8_64_add_neon, &vpx_idct8x8_64_add_neon,
0, VPX_BITS_8))); 0, VPX_BITS_8)));
// TODO(linfengz): reenable these functions once test vector failures are #if !CONFIG_VP9_HIGHBITDEPTH
// addressed.
#if 0 // !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
NEON, FwdTrans8x8HT, NEON, FwdTrans8x8HT,
::testing::Values( ::testing::Values(

View File

@@ -174,4 +174,4 @@ INSTANTIATE_TEST_CASE_P(MSA, IDCTTest,
INSTANTIATE_TEST_CASE_P(MMI, IDCTTest, INSTANTIATE_TEST_CASE_P(MMI, IDCTTest,
::testing::Values(vp8_short_idct4x4llm_mmi)); ::testing::Values(vp8_short_idct4x4llm_mmi));
#endif // HAVE_MMI #endif // HAVE_MMI
} // namespace }

View File

@@ -123,7 +123,6 @@ TEST_P(InvalidFileTest, ReturnCode) { RunTest(); }
#if CONFIG_VP8_DECODER #if CONFIG_VP8_DECODER
const DecodeParam kVP8InvalidFileTests[] = { const DecodeParam kVP8InvalidFileTests[] = {
{ 1, "invalid-bug-1443.ivf" }, { 1, "invalid-bug-1443.ivf" },
{ 1, "invalid-token-partition.ivf" },
}; };
VP8_INSTANTIATE_TEST_CASE(InvalidFileTest, VP8_INSTANTIATE_TEST_CASE(InvalidFileTest,

View File

@@ -114,18 +114,6 @@ void InitInput(Pixel *s, Pixel *ref_s, ACMRandom *rnd, const uint8_t limit,
} }
} }
uint8_t GetOuterThresh(ACMRandom *rnd) {
return static_cast<uint8_t>(rnd->RandRange(3 * MAX_LOOP_FILTER + 5));
}
uint8_t GetInnerThresh(ACMRandom *rnd) {
return static_cast<uint8_t>(rnd->RandRange(MAX_LOOP_FILTER + 1));
}
uint8_t GetHevThresh(ACMRandom *rnd) {
return static_cast<uint8_t>(rnd->RandRange(MAX_LOOP_FILTER + 1) >> 4);
}
class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> { class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
public: public:
virtual ~Loop8Test6Param() {} virtual ~Loop8Test6Param() {}
@@ -174,15 +162,15 @@ TEST_P(Loop8Test6Param, OperationCheck) {
int first_failure = -1; int first_failure = -1;
for (int i = 0; i < count_test_block; ++i) { for (int i = 0; i < count_test_block; ++i) {
int err_count = 0; int err_count = 0;
uint8_t tmp = GetOuterThresh(&rnd); uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd); tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd); tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -233,15 +221,15 @@ TEST_P(Loop8Test6Param, ValueCheck) {
for (int i = 0; i < count_test_block; ++i) { for (int i = 0; i < count_test_block; ++i) {
int err_count = 0; int err_count = 0;
uint8_t tmp = GetOuterThresh(&rnd); uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd); tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd); tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -283,27 +271,27 @@ TEST_P(Loop8Test9Param, OperationCheck) {
int first_failure = -1; int first_failure = -1;
for (int i = 0; i < count_test_block; ++i) { for (int i = 0; i < count_test_block; ++i) {
int err_count = 0; int err_count = 0;
uint8_t tmp = GetOuterThresh(&rnd); uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd); tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd); tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetOuterThresh(&rnd); tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd); tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd); tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -346,27 +334,27 @@ TEST_P(Loop8Test9Param, ValueCheck) {
int first_failure = -1; int first_failure = -1;
for (int i = 0; i < count_test_block; ++i) { for (int i = 0; i < count_test_block; ++i) {
int err_count = 0; int err_count = 0;
uint8_t tmp = GetOuterThresh(&rnd); uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd); tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd); tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetOuterThresh(&rnd); tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd); tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd); tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t, DECLARE_ALIGNED(16, const uint8_t,
thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };

View File

@@ -277,29 +277,12 @@ class ResizeTest
SetMode(GET_PARAM(1)); SetMode(GET_PARAM(1));
} }
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
ASSERT_NE(static_cast<int>(pkt->data.frame.width[0]), 0);
ASSERT_NE(static_cast<int>(pkt->data.frame.height[0]), 0);
encode_frame_width_.push_back(pkt->data.frame.width[0]);
encode_frame_height_.push_back(pkt->data.frame.height[0]);
}
unsigned int GetFrameWidth(size_t idx) const {
return encode_frame_width_[idx];
}
unsigned int GetFrameHeight(size_t idx) const {
return encode_frame_height_[idx];
}
virtual void DecompressedFrameHook(const vpx_image_t &img, virtual void DecompressedFrameHook(const vpx_image_t &img,
vpx_codec_pts_t pts) { vpx_codec_pts_t pts) {
frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h)); frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
} }
std::vector<FrameInfo> frame_info_list_; std::vector<FrameInfo> frame_info_list_;
std::vector<unsigned int> encode_frame_width_;
std::vector<unsigned int> encode_frame_height_;
}; };
TEST_P(ResizeTest, TestExternalResizeWorks) { TEST_P(ResizeTest, TestExternalResizeWorks) {
@@ -313,9 +296,6 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
const unsigned int frame = static_cast<unsigned>(info->pts); const unsigned int frame = static_cast<unsigned>(info->pts);
unsigned int expected_w; unsigned int expected_w;
unsigned int expected_h; unsigned int expected_h;
const size_t idx = info - frame_info_list_.begin();
ASSERT_EQ(info->w, GetFrameWidth(idx));
ASSERT_EQ(info->h, GetFrameHeight(idx));
ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w, ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
&expected_h, 0); &expected_h, 0);
EXPECT_EQ(expected_w, info->w) EXPECT_EQ(expected_w, info->w)
@@ -484,23 +464,8 @@ class ResizeRealtimeTest
++mismatch_nframes_; ++mismatch_nframes_;
} }
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
ASSERT_NE(static_cast<int>(pkt->data.frame.width[0]), 0);
ASSERT_NE(static_cast<int>(pkt->data.frame.height[0]), 0);
encode_frame_width_.push_back(pkt->data.frame.width[0]);
encode_frame_height_.push_back(pkt->data.frame.height[0]);
}
unsigned int GetMismatchFrames() { return mismatch_nframes_; } unsigned int GetMismatchFrames() { return mismatch_nframes_; }
unsigned int GetFrameWidth(size_t idx) const {
return encode_frame_width_[idx];
}
unsigned int GetFrameHeight(size_t idx) const {
return encode_frame_height_[idx];
}
void DefaultConfig() { void DefaultConfig() {
cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 600; cfg_.rc_buf_optimal_sz = 600;
@@ -528,8 +493,6 @@ class ResizeRealtimeTest
bool change_bitrate_; bool change_bitrate_;
double mismatch_psnr_; double mismatch_psnr_;
int mismatch_nframes_; int mismatch_nframes_;
std::vector<unsigned int> encode_frame_width_;
std::vector<unsigned int> encode_frame_height_;
}; };
TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) { TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
@@ -619,9 +582,6 @@ TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
int resize_count = 0; int resize_count = 0;
for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin(); for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
info != frame_info_list_.end(); ++info) { info != frame_info_list_.end(); ++info) {
const size_t idx = info - frame_info_list_.begin();
ASSERT_EQ(info->w, GetFrameWidth(idx));
ASSERT_EQ(info->h, GetFrameHeight(idx));
if (info->w != last_w || info->h != last_h) { if (info->w != last_w || info->h != last_h) {
resize_count++; resize_count++;
if (resize_count == 1) { if (resize_count == 1) {

View File

@@ -112,9 +112,8 @@ INSTANTIATE_TEST_CASE_P(
#endif // HAVE_SSE2 #endif // HAVE_SSE2
#if HAVE_MSA #if HAVE_MSA
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(MSA, SumSquaresTest, ::testing::Values(make_tuple(
MSA, SumSquaresTest, &vpx_sum_squares_2d_i16_c,
::testing::Values(make_tuple(&vpx_sum_squares_2d_i16_c,
&vpx_sum_squares_2d_i16_msa))); &vpx_sum_squares_2d_i16_msa)));
#endif // HAVE_MSA #endif // HAVE_MSA
} // namespace } // namespace

View File

@@ -734,8 +734,6 @@ endif # CONFIG_VP9_HIGHBITDEPTH
# Invalid files for testing libvpx error checking. # Invalid files for testing libvpx error checking.
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm

View File

@@ -852,7 +852,5 @@ e402cbbf9e550ae017a1e9f1f73931c1d18474e8 *invalid-crbug-667044.webm
d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-crbug-667044.webm.res d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-crbug-667044.webm.res
fd9df7f3f6992af1d7a9dde975c9a0d6f28c053d *invalid-bug-1443.ivf fd9df7f3f6992af1d7a9dde975c9a0d6f28c053d *invalid-bug-1443.ivf
fd3020fa6e9ca5966206738654c97dec313b0a95 *invalid-bug-1443.ivf.res fd3020fa6e9ca5966206738654c97dec313b0a95 *invalid-bug-1443.ivf.res
1a0e405606939f2febab1a21b30c37cb8f2c8cb1 *invalid-token-partition.ivf
90a8a95e7024f015b87f5483a65036609b3d1b74 *invalid-token-partition.ivf.res
17696cd21e875f1d6e5d418cbf89feab02c8850a *vp90-2-22-svc_1280x720_1.webm 17696cd21e875f1d6e5d418cbf89feab02c8850a *vp90-2-22-svc_1280x720_1.webm
e2f9e1e47a791b4e939a9bdc50bf7a25b3761f77 *vp90-2-22-svc_1280x720_1.webm.md5 e2f9e1e47a791b4e939a9bdc50bf7a25b3761f77 *vp90-2-22-svc_1280x720_1.webm.md5

View File

@@ -61,6 +61,7 @@ int main(int argc, char **argv) {
#if !CONFIG_SHARED #if !CONFIG_SHARED
// Shared library builds don't support whitebox tests // Shared library builds don't support whitebox tests
// that exercise internal symbols. // that exercise internal symbols.
#if CONFIG_VP8 #if CONFIG_VP8
vp8_rtcd(); vp8_rtcd();
#endif // CONFIG_VP8 #endif // CONFIG_VP8

View File

@@ -27,8 +27,8 @@
namespace { namespace {
using libvpx_test::ACMRandom;
using std::string; using std::string;
using libvpx_test::ACMRandom;
#if CONFIG_WEBM_IO #if CONFIG_WEBM_IO

View File

@@ -59,7 +59,7 @@ const TestVideoParam kTestVectors[] = {
// Encoding modes tested // Encoding modes tested
const libvpx_test::TestMode kEncodingModeVectors[] = { const libvpx_test::TestMode kEncodingModeVectors[] = {
::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime ::libvpx_test::kRealTime,
}; };
// Speed settings tested // Speed settings tested

View File

@@ -22,7 +22,7 @@ namespace {
// Encoding modes // Encoding modes
const libvpx_test::TestMode kEncodingModeVectors[] = { const libvpx_test::TestMode kEncodingModeVectors[] = {
::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime ::libvpx_test::kRealTime,
}; };
// Encoding speeds // Encoding speeds

View File

@@ -14,9 +14,9 @@
#include "third_party/googletest/src/include/gtest/gtest.h" #include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vp9_rtcd.h"
#include "./vpx_config.h" #include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h" #include "./vpx_dsp_rtcd.h"
#include "./vp9_rtcd.h"
#include "test/acm_random.h" #include "test/acm_random.h"
#include "test/buffer.h" #include "test/buffer.h"
#include "test/clear_system_state.h" #include "test/clear_system_state.h"
@@ -42,7 +42,7 @@ typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
uint16_t *eob, const int16_t *scan, uint16_t *eob, const int16_t *scan,
const int16_t *iscan); const int16_t *iscan);
typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t, typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
int /*max_size*/, bool /*is_fp*/> int /*max_size*/>
QuantizeParam; QuantizeParam;
// Wrapper for FP version which does not use zbin or quant_shift. // Wrapper for FP version which does not use zbin or quant_shift.
@@ -69,15 +69,11 @@ void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
class VP9QuantizeBase { class VP9QuantizeBase {
public: public:
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp) VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size)
: bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) { : bit_depth_(bit_depth), max_size_(max_size) {
max_value_ = (1 << bit_depth_) - 1; max_value_ = (1 << bit_depth_) - 1;
zbin_ptr_ = zbin_ptr_ =
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_))); reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
round_fp_ptr_ = reinterpret_cast<int16_t *>(
vpx_memalign(16, 8 * sizeof(*round_fp_ptr_)));
quant_fp_ptr_ = reinterpret_cast<int16_t *>(
vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_)));
round_ptr_ = round_ptr_ =
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*round_ptr_))); reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*round_ptr_)));
quant_ptr_ = quant_ptr_ =
@@ -90,15 +86,11 @@ class VP9QuantizeBase {
~VP9QuantizeBase() { ~VP9QuantizeBase() {
vpx_free(zbin_ptr_); vpx_free(zbin_ptr_);
vpx_free(round_fp_ptr_);
vpx_free(quant_fp_ptr_);
vpx_free(round_ptr_); vpx_free(round_ptr_);
vpx_free(quant_ptr_); vpx_free(quant_ptr_);
vpx_free(quant_shift_ptr_); vpx_free(quant_shift_ptr_);
vpx_free(dequant_ptr_); vpx_free(dequant_ptr_);
zbin_ptr_ = NULL; zbin_ptr_ = NULL;
round_fp_ptr_ = NULL;
quant_fp_ptr_ = NULL;
round_ptr_ = NULL; round_ptr_ = NULL;
quant_ptr_ = NULL; quant_ptr_ = NULL;
quant_shift_ptr_ = NULL; quant_shift_ptr_ = NULL;
@@ -108,8 +100,6 @@ class VP9QuantizeBase {
protected: protected:
int16_t *zbin_ptr_; int16_t *zbin_ptr_;
int16_t *round_fp_ptr_;
int16_t *quant_fp_ptr_;
int16_t *round_ptr_; int16_t *round_ptr_;
int16_t *quant_ptr_; int16_t *quant_ptr_;
int16_t *quant_shift_ptr_; int16_t *quant_shift_ptr_;
@@ -117,136 +107,29 @@ class VP9QuantizeBase {
const vpx_bit_depth_t bit_depth_; const vpx_bit_depth_t bit_depth_;
int max_value_; int max_value_;
const int max_size_; const int max_size_;
const bool is_fp_;
}; };
class VP9QuantizeTest : public VP9QuantizeBase, class VP9QuantizeTest : public VP9QuantizeBase,
public ::testing::TestWithParam<QuantizeParam> { public ::testing::TestWithParam<QuantizeParam> {
public: public:
VP9QuantizeTest() VP9QuantizeTest()
: VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)), : VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3)), quantize_op_(GET_PARAM(0)),
quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {} ref_quantize_op_(GET_PARAM(1)) {}
protected: protected:
const QuantizeFunc quantize_op_; const QuantizeFunc quantize_op_;
const QuantizeFunc ref_quantize_op_; const QuantizeFunc ref_quantize_op_;
}; };
// This quantizer compares the AC coefficients to the quantization step size to
// determine if further multiplication operations are needed.
// Based on vp9_quantize_fp_sse2().
inline void quant_fp_nz(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan, int is_32x32) {
int i, eob = -1;
const int thr = dequant_ptr[1] >> (1 + is_32x32);
(void)iscan;
(void)skip_block;
assert(!skip_block);
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
for (i = 0; i < n_coeffs; i += 16) {
int y;
int nzflag_cnt = 0;
int abs_coeff[16];
int coeff_sign[16];
// count nzflag for each row (16 tran_low_t)
for (y = 0; y < 16; ++y) {
const int rc = i + y;
const int coeff = coeff_ptr[rc];
coeff_sign[y] = (coeff >> 31);
abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y];
// The first 16 are skipped in the sse2 code. Do the same here to match.
if (i >= 16 && (abs_coeff[y] <= thr)) {
nzflag_cnt++;
}
}
for (y = 0; y < 16; ++y) {
const int rc = i + y;
// If all of the AC coeffs in a row has magnitude less than the
// quantization step_size/2, quantize to zero.
if (nzflag_cnt < 16) {
int tmp;
int _round;
if (is_32x32) {
_round = ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
} else {
_round = round_ptr[rc != 0];
}
tmp = clamp(abs_coeff[y] + _round, INT16_MIN, INT16_MAX);
tmp = (tmp * quant_ptr[rc != 0]) >> (16 - is_32x32);
qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y];
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
if (is_32x32) {
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
} else {
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
}
} else {
qcoeff_ptr[rc] = 0;
dqcoeff_ptr[rc] = 0;
}
}
}
// Scan for eob.
for (i = 0; i < n_coeffs; i++) {
// Use the scan order to find the correct eob.
const int rc = scan[i];
if (qcoeff_ptr[rc]) {
eob = i;
}
}
*eob_ptr = eob + 1;
}
void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan) {
quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 0);
}
void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan) {
quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 1);
}
void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round, void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
int16_t *quant, int16_t *quant_shift, int16_t *quant, int16_t *quant_shift,
int16_t *dequant, int16_t *round_fp, int16_t *dequant) {
int16_t *quant_fp) {
// Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
const int max_qrounding_factor_fp = 64;
for (int j = 0; j < 2; j++) { for (int j = 0; j < 2; j++) {
// The range is 4 to 1828 in the VP9 tables.
const int qlookup = rnd->RandRange(1825) + 4;
round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7;
quant_fp[j] = (1 << 16) / qlookup;
// Values determined by deconstructing vp9_init_quantizer(). // Values determined by deconstructing vp9_init_quantizer().
// zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
// values or U/V values of any bit depth. This is because y_delta is not // values or U/V values of any bit depth. This is because y_delta is not
// factored into the vp9_ac_quant() call. // factored into the vp9_ac_quant() call.
zbin[j] = rnd->RandRange(1200); zbin[j] = rnd->RandRange(1200);
// round may be up to 685 for Y values or 914 for U/V. // round may be up to 685 for Y values or 914 for U/V.
round[j] = rnd->RandRange(914); round[j] = rnd->RandRange(914);
// quant ranges from 1 to -32703 // quant ranges from 1 to -32703
@@ -258,8 +141,6 @@ void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
} }
for (int j = 2; j < 8; j++) { for (int j = 2; j < 8; j++) {
zbin[j] = zbin[1]; zbin[j] = zbin[1];
round_fp[j] = round_fp[1];
quant_fp[j] = quant_fp[1];
round[j] = round[1]; round[j] = round[1];
quant[j] = quant[1]; quant[j] = quant[1];
quant_shift[j] = quant_shift[1]; quant_shift[j] = quant_shift[1];
@@ -298,18 +179,18 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
const int count = (4 << sz) * (4 << sz); const int count = (4 << sz) * (4 << sz);
coeff.Set(&rnd, -max_value_, max_value_); coeff.Set(&rnd, -max_value_, max_value_);
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_, GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_, quant_shift_ptr_, dequant_ptr_);
quant_fp_ptr_);
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
scan_order->scan, scan_order->iscan);
ASM_REGISTER_STATE_CHECK(quantize_op_( ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr, round_ptr_, quant_ptr_, quant_shift_ptr_,
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(), ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
dequant_ptr_, &ref_eob, scan_order->scan,
scan_order->iscan);
ASM_REGISTER_STATE_CHECK(
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
round_ptr_, quant_ptr_, quant_shift_ptr_,
qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan)); dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff)); EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
@@ -360,18 +241,18 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
coeff.TopLeftPixel()[rnd(count)] = coeff.TopLeftPixel()[rnd(count)] =
static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_; static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_, GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_, quant_shift_ptr_, dequant_ptr_);
quant_fp_ptr_);
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
scan_order->scan, scan_order->iscan);
ASM_REGISTER_STATE_CHECK(quantize_op_( ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr, round_ptr_, quant_ptr_, quant_shift_ptr_,
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(), ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
dequant_ptr_, &ref_eob, scan_order->scan,
scan_order->iscan);
ASM_REGISTER_STATE_CHECK(
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
round_ptr_, quant_ptr_, quant_shift_ptr_,
qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan)); dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff)); EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
@@ -418,10 +299,7 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
const int count = (4 << sz) * (4 << sz); const int count = (4 << sz) * (4 << sz);
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_, GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_, quant_shift_ptr_, dequant_ptr_);
quant_fp_ptr_);
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
if (i == 0) { if (i == 0) {
// When |coeff values| are less than zbin the results are 0. // When |coeff values| are less than zbin the results are 0.
@@ -441,10 +319,10 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
vpx_usec_timer timer; vpx_usec_timer timer;
vpx_usec_timer_start(&timer); vpx_usec_timer_start(&timer);
for (int j = 0; j < 100000000 / count; ++j) { for (int j = 0; j < 100000000 / count; ++j) {
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(), round_ptr_, quant_ptr_, quant_shift_ptr_,
dqcoeff.TopLeftPixel(), dequant_ptr_, &eob, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
scan_order->scan, scan_order->iscan); dequant_ptr_, &eob, scan_order->scan, scan_order->iscan);
} }
vpx_usec_timer_mark(&timer); vpx_usec_timer_mark(&timer);
const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer)); const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
@@ -467,54 +345,50 @@ INSTANTIATE_TEST_CASE_P(
SSE2, VP9QuantizeTest, SSE2, VP9QuantizeTest,
::testing::Values( ::testing::Values(
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c, make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
VPX_BITS_8, 16, false), VPX_BITS_8, 16),
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c, make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
VPX_BITS_10, 16, false), VPX_BITS_10, 16),
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c, make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
VPX_BITS_12, 16, false), VPX_BITS_12, 16),
make_tuple(&vpx_highbd_quantize_b_32x32_sse2, make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false), &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32),
make_tuple(&vpx_highbd_quantize_b_32x32_sse2, make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false), &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32),
make_tuple(&vpx_highbd_quantize_b_32x32_sse2, make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false))); &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32)));
#else #else
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(SSE2, VP9QuantizeTest,
SSE2, VP9QuantizeTest, ::testing::Values(make_tuple(&vpx_quantize_b_sse2,
::testing::Values(make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c, &vpx_quantize_b_c,
VPX_BITS_8, 16, false), VPX_BITS_8, 16)));
make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
16, true)));
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
DISABLED_SSE2, VP9QuantizeTest,
::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
16)));
#endif // HAVE_SSE2 #endif // HAVE_SSE2
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH #if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(
SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
VPX_BITS_8, 16, false),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
16, true),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
&QuantFPWrapper<quantize_fp_32x32_nz_c>,
VPX_BITS_8, 32, true)));
#else
INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest, INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_ssse3, ::testing::Values(make_tuple(&vpx_quantize_b_ssse3,
&vpx_quantize_b_c, &vpx_quantize_b_c,
VPX_BITS_8, 16, false))); VPX_BITS_8, 16)));
#endif
#if ARCH_X86_64 #if ARCH_X86_64
// TODO(johannkoenig): SSSE3 optimizations do not yet pass this test. // TODO(johannkoenig): SSSE3 optimizations do not yet pass this test.
INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, VP9QuantizeTest, INSTANTIATE_TEST_CASE_P(
::testing::Values(make_tuple( DISABLED_SSSE3, VP9QuantizeTest,
&vpx_quantize_b_32x32_ssse3, ::testing::Values(make_tuple(&vpx_quantize_b_32x32_ssse3,
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false))); &vpx_quantize_b_32x32_c, VPX_BITS_8, 32),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
16),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
VPX_BITS_8, 32)));
#endif // ARCH_X86_64 #endif // ARCH_X86_64
#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
@@ -524,54 +398,36 @@ INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, VP9QuantizeTest,
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
AVX, VP9QuantizeTest, AVX, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c, ::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c,
VPX_BITS_8, 16, false), VPX_BITS_8, 16),
// Even though SSSE3 and AVX do not match the reference // Even though SSSE3 and AVX do not match the reference
// code, we can keep them in sync with each other. // code, we can keep them in sync with each other.
make_tuple(&vpx_quantize_b_32x32_avx, make_tuple(&vpx_quantize_b_32x32_avx,
&vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32, &vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32)));
false)));
#endif // HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH
#if ARCH_X86_64 && HAVE_AVX2
INSTANTIATE_TEST_CASE_P(
AVX2, VP9QuantizeTest,
::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
16, true)));
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
// TODO(webm:1448): dqcoeff is not handled correctly in HBD builds. // TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
NEON, VP9QuantizeTest, NEON, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c, ::testing::Values(
VPX_BITS_8, 16, false), make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c, VPX_BITS_8, 16),
make_tuple(&vpx_quantize_b_32x32_neon, make_tuple(&vpx_quantize_b_32x32_neon, &vpx_quantize_b_32x32_c,
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32, VPX_BITS_8, 32),
false),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>, make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16),
16, true),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>, make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32)));
VPX_BITS_8, 32, true)));
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
// Only useful to compare "Speed" test results. // Only useful to compare "Speed" test results.
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
DISABLED_C, VP9QuantizeTest, DISABLED_C, VP9QuantizeTest,
::testing::Values( ::testing::Values(
make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16, false), make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16),
make_tuple(&vpx_quantize_b_32x32_c, &vpx_quantize_b_32x32_c, VPX_BITS_8, make_tuple(&vpx_quantize_b_32x32_c, &vpx_quantize_b_32x32_c, VPX_BITS_8,
32, false), 32),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>, make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true), &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16),
make_tuple(&QuantFPWrapper<quantize_fp_nz_c>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
make_tuple(&QuantFPWrapper<quantize_fp_32x32_nz_c>,
&QuantFPWrapper<quantize_fp_32x32_nz_c>, VPX_BITS_8, 32,
true),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>, make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32, &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32)));
true)));
} // namespace } // namespace

View File

@@ -47,7 +47,7 @@ class ScaleTest : public VpxScaleBase,
scale_fn_(&img_, &dst_img_, filter_type, phase_scaler)); scale_fn_(&img_, &dst_img_, filter_type, phase_scaler));
} }
void RunTest(INTERP_FILTER filter_type) { void RunTest() {
static const int kNumSizesToTest = 20; static const int kNumSizesToTest = 20;
static const int kNumScaleFactorsToTest = 4; static const int kNumScaleFactorsToTest = 4;
static const int kSizesToTest[] = { static const int kSizesToTest[] = {
@@ -55,6 +55,7 @@ class ScaleTest : public VpxScaleBase,
22, 24, 26, 28, 30, 32, 34, 68, 128, 134 22, 24, 26, 28, 30, 32, 34, 68, 128, 134
}; };
static const int kScaleFactors[] = { 1, 2, 3, 4 }; static const int kScaleFactors[] = { 1, 2, 3, 4 };
for (INTERP_FILTER filter_type = 0; filter_type < 4; ++filter_type) {
for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) { for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) {
for (int h = 0; h < kNumSizesToTest; ++h) { for (int h = 0; h < kNumSizesToTest; ++h) {
const int src_height = kSizesToTest[h]; const int src_height = kSizesToTest[h];
@@ -81,8 +82,8 @@ class ScaleTest : public VpxScaleBase,
if (src_width > 4 * dst_width || src_height > 4 * dst_height) { if (src_width > 4 * dst_width || src_height > 4 * dst_height) {
continue; continue;
} }
ASSERT_NO_FATAL_FAILURE(ResetScaleImages(src_width, src_height, ASSERT_NO_FATAL_FAILURE(ResetScaleImages(
dst_width, dst_height)); src_width, src_height, dst_width, dst_height));
ReferenceScaleFrame(filter_type, phase_scaler); ReferenceScaleFrame(filter_type, phase_scaler);
ScaleFrame(filter_type, phase_scaler); ScaleFrame(filter_type, phase_scaler);
if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc, if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc,
@@ -91,8 +92,8 @@ class ScaleTest : public VpxScaleBase,
"filter_type = %d, phase_scaler = %d, src_width = %4d, " "filter_type = %d, phase_scaler = %d, src_width = %4d, "
"src_height = %4d, dst_width = %4d, dst_height = %4d, " "src_height = %4d, dst_width = %4d, dst_height = %4d, "
"scale factor = %d:%d\n", "scale factor = %d:%d\n",
filter_type, phase_scaler, src_width, src_height, dst_width, filter_type, phase_scaler, src_width, src_height,
dst_height, sf_down, sf_up); dst_width, dst_height, sf_down, sf_up);
PrintDiff(); PrintDiff();
} }
CompareImages(dst_img_); CompareImages(dst_img_);
@@ -103,6 +104,7 @@ class ScaleTest : public VpxScaleBase,
} }
} }
} }
}
void PrintDiffComponent(const uint8_t *const ref, const uint8_t *const opt, void PrintDiffComponent(const uint8_t *const ref, const uint8_t *const opt,
const int stride, const int width, const int height, const int stride, const int width, const int height,
@@ -143,10 +145,7 @@ class ScaleTest : public VpxScaleBase,
ScaleFrameFunc scale_fn_; ScaleFrameFunc scale_fn_;
}; };
TEST_P(ScaleTest, ScaleFrame_EightTap) { RunTest(EIGHTTAP); } TEST_P(ScaleTest, ScaleFrame) { ASSERT_NO_FATAL_FAILURE(RunTest()); }
TEST_P(ScaleTest, ScaleFrame_EightTapSmooth) { RunTest(EIGHTTAP_SMOOTH); }
TEST_P(ScaleTest, ScaleFrame_EightTapSharp) { RunTest(EIGHTTAP_SHARP); }
TEST_P(ScaleTest, ScaleFrame_Bilinear) { RunTest(BILINEAR); }
TEST_P(ScaleTest, DISABLED_Speed) { TEST_P(ScaleTest, DISABLED_Speed) {
static const int kCountSpeedTestBlock = 100; static const int kCountSpeedTestBlock = 100;

View File

@@ -147,6 +147,7 @@ TEST(VPxWorkerThreadTest, TestInterfaceAPI) {
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Multi-threaded decode tests // Multi-threaded decode tests
#if CONFIG_WEBM_IO #if CONFIG_WEBM_IO
struct FileList { struct FileList {
const char *name; const char *name;

72
tools/all_builds.py Executable file
View File

@@ -0,0 +1,72 @@
#!/usr/bin/python
import getopt
import subprocess
import sys
LONG_OPTIONS = ["shard=", "shards="]
BASE_COMMAND = "./configure --enable-internal-stats --enable-experimental"
def RunCommand(command):
run = subprocess.Popen(command, shell=True)
output = run.communicate()
if run.returncode:
print "Non-zero return code: " + str(run.returncode) + " => exiting!"
sys.exit(1)
def list_of_experiments():
experiments = []
configure_file = open("configure")
list_start = False
for line in configure_file.read().split("\n"):
if line == 'EXPERIMENT_LIST="':
list_start = True
elif line == '"':
list_start = False
elif list_start:
currently_broken = ["csm"]
experiment = line[4:]
if experiment not in currently_broken:
experiments.append(experiment)
return experiments
def main(argv):
# Parse arguments
options = {"--shard": 0, "--shards": 1}
if "--" in argv:
opt_end_index = argv.index("--")
else:
opt_end_index = len(argv)
try:
o, _ = getopt.getopt(argv[1:opt_end_index], None, LONG_OPTIONS)
except getopt.GetoptError, err:
print str(err)
print "Usage: %s [--shard=<n> --shards=<n>] -- [configure flag ...]"%argv[0]
sys.exit(2)
options.update(o)
extra_args = argv[opt_end_index + 1:]
# Shard experiment list
shard = int(options["--shard"])
shards = int(options["--shards"])
experiments = list_of_experiments()
base_command = " ".join([BASE_COMMAND] + extra_args)
configs = [base_command]
configs += ["%s --enable-%s" % (base_command, e) for e in experiments]
my_configs = zip(configs, range(len(configs)))
my_configs = filter(lambda x: x[1] % shards == shard, my_configs)
my_configs = [e[0] for e in my_configs]
# Run configs for this shard
for config in my_configs:
test_build(config)
def test_build(configure_command):
print "\033[34m\033[47mTesting %s\033[0m" % (configure_command)
RunCommand(configure_command)
RunCommand("make clean")
RunCommand("make")
if __name__ == "__main__":
main(sys.argv)

15
tools/author_first_release.sh Executable file
View File

@@ -0,0 +1,15 @@
#!/bin/bash
##
## List the release each author first contributed to.
##
## Usage: author_first_release.sh [TAGS]
##
## If the TAGS arguments are unspecified, all tags reported by `git tag`
## will be considered.
##
tags=${@:-$(git tag)}
for tag in $tags; do
git shortlog -n -e -s $tag |
cut -f2- |
awk "{print \"${tag#v}\t\"\$0}"
done | sort -k2 | uniq -f2

158
tools/ftfy.sh Executable file
View File

@@ -0,0 +1,158 @@
#!/bin/sh
self="$0"
dirname_self=$(dirname "$self")
usage() {
cat <<EOF >&2
Usage: $self [option]
This script applies a whitespace transformation to the commit at HEAD. If no
options are given, then the modified files are left in the working tree.
Options:
-h, --help Shows this message
-n, --dry-run Shows a diff of the changes to be made.
--amend Squashes the changes into the commit at HEAD
This option will also reformat the commit message.
--commit Creates a new commit containing only the whitespace changes
--msg-only Reformat the commit message only, ignore the patch itself.
EOF
rm -f ${CLEAN_FILES}
exit 1
}
log() {
echo "${self##*/}: $@" >&2
}
vpx_style() {
for f; do
case "$f" in
*.h|*.c|*.cc)
clang-format -i --style=file "$f"
;;
esac
done
}
apply() {
[ $INTERSECT_RESULT -ne 0 ] && patch -p1 < "$1"
}
commit() {
LAST_CHANGEID=$(git show | awk '/Change-Id:/{print $2}')
if [ -z "$LAST_CHANGEID" ]; then
log "HEAD doesn't have a Change-Id, unable to generate a new commit"
exit 1
fi
# Build a deterministic Change-Id from the parent's
NEW_CHANGEID=${LAST_CHANGEID}-styled
NEW_CHANGEID=I$(echo $NEW_CHANGEID | git hash-object --stdin)
# Commit, preserving authorship from the parent commit.
git commit -a -C HEAD > /dev/null
git commit --amend -F- << EOF
Cosmetic: Fix whitespace in change ${LAST_CHANGEID:0:9}
Change-Id: ${NEW_CHANGEID}
EOF
}
show_commit_msg_diff() {
if [ $DIFF_MSG_RESULT -ne 0 ]; then
log "Modified commit message:"
diff -u "$ORIG_COMMIT_MSG" "$NEW_COMMIT_MSG" | tail -n +3
fi
}
amend() {
show_commit_msg_diff
if [ $DIFF_MSG_RESULT -ne 0 ] || [ $INTERSECT_RESULT -ne 0 ]; then
git commit -a --amend -F "$NEW_COMMIT_MSG"
fi
}
diff_msg() {
git log -1 --format=%B > "$ORIG_COMMIT_MSG"
"${dirname_self}"/wrap-commit-msg.py \
< "$ORIG_COMMIT_MSG" > "$NEW_COMMIT_MSG"
cmp -s "$ORIG_COMMIT_MSG" "$NEW_COMMIT_MSG"
DIFF_MSG_RESULT=$?
}
# Temporary files
ORIG_DIFF=orig.diff.$$
MODIFIED_DIFF=modified.diff.$$
FINAL_DIFF=final.diff.$$
ORIG_COMMIT_MSG=orig.commit-msg.$$
NEW_COMMIT_MSG=new.commit-msg.$$
CLEAN_FILES="${ORIG_DIFF} ${MODIFIED_DIFF} ${FINAL_DIFF}"
CLEAN_FILES="${CLEAN_FILES} ${ORIG_COMMIT_MSG} ${NEW_COMMIT_MSG}"
# Preconditions
[ $# -lt 2 ] || usage
if ! clang-format -version >/dev/null 2>&1; then
log "clang-format not found"
exit 1
fi
if ! git diff --quiet HEAD; then
log "Working tree is dirty, commit your changes first"
exit 1
fi
# Need to be in the root
cd "$(git rev-parse --show-toplevel)"
# Collect the original diff
git show > "${ORIG_DIFF}"
# Apply the style guide on new and modified files and collect its diff
for f in $(git diff HEAD^ --name-only -M90 --diff-filter=AM); do
case "$f" in
third_party/*) continue;;
esac
vpx_style "$f"
done
git diff --no-color --no-ext-diff > "${MODIFIED_DIFF}"
# Intersect the two diffs
"${dirname_self}"/intersect-diffs.py \
"${ORIG_DIFF}" "${MODIFIED_DIFF}" > "${FINAL_DIFF}"
INTERSECT_RESULT=$?
git reset --hard >/dev/null
# Fixup the commit message
diff_msg
# Handle options
if [ -n "$1" ]; then
case "$1" in
-h|--help) usage;;
-n|--dry-run) cat "${FINAL_DIFF}"; show_commit_msg_diff;;
--commit) apply "${FINAL_DIFF}"; commit;;
--amend) apply "${FINAL_DIFF}"; amend;;
--msg-only) amend;;
*) usage;;
esac
else
apply "${FINAL_DIFF}"
if ! git diff --quiet; then
log "Formatting changes applied, verify and commit."
log "See also: http://www.webmproject.org/code/contribute/conventions/"
git diff --stat
fi
fi
rm -f ${CLEAN_FILES}

View File

@@ -37,9 +37,7 @@ extern "C" {
#define SEGMENT_DELTADATA 0 #define SEGMENT_DELTADATA 0
#define SEGMENT_ABSDATA 1 #define SEGMENT_ABSDATA 1
typedef struct { typedef struct { int r, c; } POS;
int r, c;
} POS;
#define PLANE_TYPE_Y_NO_DC 0 #define PLANE_TYPE_Y_NO_DC 0
#define PLANE_TYPE_Y2 1 #define PLANE_TYPE_Y2 1
@@ -182,9 +180,6 @@ typedef struct {
unsigned int low_res_ref_frames[MAX_REF_FRAMES]; unsigned int low_res_ref_frames[MAX_REF_FRAMES];
// The video frame counter value for the key frame, for lowest resolution. // The video frame counter value for the key frame, for lowest resolution.
unsigned int key_frame_counter_value; unsigned int key_frame_counter_value;
// Flags to signal skipped encoding of previous and base layer stream.
unsigned int skip_encoding_prev_stream;
unsigned int skip_encoding_base_stream;
LOWER_RES_MB_INFO *mb_info; LOWER_RES_MB_INFO *mb_info;
} LOWER_RES_FRAME_INFO; } LOWER_RES_FRAME_INFO;
#endif #endif

View File

@@ -20,7 +20,8 @@ static void copy_and_extend_plane(unsigned char *s, /* source */
int et, /* extend top border */ int et, /* extend top border */
int el, /* extend left border */ int el, /* extend left border */
int eb, /* extend bottom border */ int eb, /* extend bottom border */
int er) { /* extend right border */ int er /* extend right border */
) {
int i; int i;
unsigned char *src_ptr1, *src_ptr2; unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2; unsigned char *dest_ptr1, *dest_ptr2;

View File

@@ -12,7 +12,7 @@
#include "vpx_mem/vpx_mem.h" #include "vpx_mem/vpx_mem.h"
void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst, void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst,
int stride, char *eobs) { int stride, int8_t *eobs) {
int i, j; int i, j;
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
@@ -33,7 +33,8 @@ void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst,
} }
void vp8_dequant_idct_add_uv_block_mmi(int16_t *q, int16_t *dq, uint8_t *dstu, void vp8_dequant_idct_add_uv_block_mmi(int16_t *q, int16_t *dq, uint8_t *dstu,
uint8_t *dstv, int stride, char *eobs) { uint8_t *dstv, int stride,
int8_t *eobs) {
int i, j; int i, j;
for (i = 0; i < 2; i++) { for (i = 0; i < 2; i++) {

View File

@@ -461,87 +461,96 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
); );
} }
/* clang-format off */
#define VP8_MBLOOP_HPSRAB \ #define VP8_MBLOOP_HPSRAB \
"punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" \ "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
"punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" \ "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" \
"psrah %[ftmp10], %[ftmp10], %[ftmp9] \n\t" \ "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
"psrah %[ftmp11], %[ftmp11], %[ftmp9] \n\t" \ "punpckhbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" \
"packsshb %[ftmp0], %[ftmp10], %[ftmp11] \n\t" "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t" \
"psrah %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
"packsshb %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
#define VP8_MBLOOP_HPSRAB_PMULHH(reg1, reg2) \
"pmulhh " #reg1 ", " #reg1 ", " #reg2 " \n\t"
#define VP8_MBLOOP_HPSRAB_ADD(reg) \ #define VP8_MBLOOP_HPSRAB_ADD(reg) \
"punpcklbh %[ftmp1], %[ftmp0], %[ftmp12] \n\t" \ "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
"punpckhbh %[ftmp2], %[ftmp0], %[ftmp12] \n\t" \ "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" \
"pmulhh %[ftmp1], %[ftmp1], " #reg " \n\t" \ "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
"pmulhh %[ftmp2], %[ftmp2], " #reg " \n\t" \ "punpckhbh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \
"paddh %[ftmp1], %[ftmp1], %[ff_ph_003f] \n\t" \ VP8_MBLOOP_HPSRAB_PMULHH(%[ftmp3], reg) \
"paddh %[ftmp2], %[ftmp2], %[ff_ph_003f] \n\t" \ VP8_MBLOOP_HPSRAB_PMULHH(%[ftmp8], reg) \
"psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t" \ "paddh %[ftmp3], %[ftmp3], %[ff_ph_003f] \n\t" \
"psrah %[ftmp2], %[ftmp2], %[ftmp9] \n\t" \ "paddh %[ftmp8], %[ftmp8], %[ff_ph_003f] \n\t" \
"packsshb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t" \
/* clang-format on */ "psrah %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
"packsshb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
void vp8_mbloop_filter_horizontal_edge_mmi( void vp8_mbloop_filter_horizontal_edge_mmi(
unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit, unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit,
const unsigned char *limit, const unsigned char *thresh, int count) { const unsigned char *limit, const unsigned char *thresh, int count) {
uint32_t tmp[1]; uint32_t tmp[1];
double ftmp[13]; mips_reg addr[2];
DECLARE_ALIGNED(8, const uint64_t, srct[1]);
double ftmp[10];
__asm__ volatile ( __asm__ volatile (
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp9], 0x07(%[limit]) \n\t" "gsldlc1 %[ftmp9], 0x07(%[limit]) \n\t"
"gsldrc1 %[ftmp9], 0x00(%[limit]) \n\t" "gsldrc1 %[ftmp9], 0x00(%[limit]) \n\t"
/* ftmp1: p3 */
"gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
/* ftmp3: p2 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp3], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp3], 0x00(%[src_ptr]) \n\t"
/* ftmp4: p1 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp4], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp4], 0x00(%[src_ptr]) \n\t"
/* ftmp5: p0 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
/* ftmp6: q0 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
/* ftmp7: q1 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
/* ftmp8: q2 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
/* ftmp2: q3 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp2], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp2], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[ftmp12], 0x07(%[blimit]) \n\t" MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step])
"gsldrc1 %[ftmp12], 0x00(%[blimit]) \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[addr1], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp1], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[addr1]) \n\t"
MMI_SUBU(%[addr1], %[addr0], %[tmp0])
"gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp0], %[ftmp1], %[ftmp3] \n\t" "pasubub %[ftmp0], %[ftmp1], %[ftmp3] \n\t"
"psubusb %[ftmp0], %[ftmp0], %[ftmp9] \n\t" "psubusb %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
/* ftmp4:p1 */
MMI_SLL(%[tmp0], %[src_pixel_step], 0x01)
MMI_SUBU(%[addr1], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp1], %[ftmp3], %[ftmp4] \n\t" "pasubub %[ftmp1], %[ftmp3], %[ftmp4] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
"pasubub %[ftmp10], %[ftmp4], %[ftmp5] \n\t"
"psubusb %[ftmp1], %[ftmp10], %[ftmp9] \n\t" /* ftmp5:p0 */
MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp5], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp5], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp1], %[ftmp4], %[ftmp5] \n\t"
"sdc1 %[ftmp1], 0x00(%[srct]) \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
"pasubub %[ftmp11], %[ftmp7], %[ftmp6] \n\t"
"psubusb %[ftmp1], %[ftmp11], %[ftmp9] \n\t" /* ftmp6:q0 */
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
/* ftmp7:q1 */
"gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t"
"pasubub %[ftmp1], %[ftmp7], %[ftmp6] \n\t"
"sdc1 %[ftmp1], 0x08(%[srct]) \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
MMI_ADDU(%[addr1], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp8], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp1], %[ftmp8], %[ftmp7] \n\t" "pasubub %[ftmp1], %[ftmp8], %[ftmp7] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
MMI_ADDU(%[addr1], %[addr0], %[tmp0])
"gsldlc1 %[ftmp2], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp2], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp1], %[ftmp2], %[ftmp8] \n\t" "pasubub %[ftmp1], %[ftmp2], %[ftmp8] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
@@ -554,7 +563,9 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"mtc1 %[tmp0], %[ftmp9] \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t"
"psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp12] \n\t" "gsldlc1 %[ftmp9], 0x07(%[blimit]) \n\t"
"gsldrc1 %[ftmp9], 0x00(%[blimit]) \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
"xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t" "xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
/* ftmp0: mask */ /* ftmp0: mask */
@@ -562,8 +573,10 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"gsldlc1 %[ftmp9], 0x07(%[thresh]) \n\t" "gsldlc1 %[ftmp9], 0x07(%[thresh]) \n\t"
"gsldrc1 %[ftmp9], 0x00(%[thresh]) \n\t" "gsldrc1 %[ftmp9], 0x00(%[thresh]) \n\t"
"psubusb %[ftmp1], %[ftmp10], %[ftmp9] \n\t" "ldc1 %[ftmp1], 0x00(%[srct]) \n\t"
"psubusb %[ftmp2], %[ftmp11], %[ftmp9] \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"ldc1 %[ftmp2], 0x08(%[srct]) \n\t"
"psubusb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "paddb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t" "xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
"pcmpeqb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "pcmpeqb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
@@ -575,13 +588,14 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t" "xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t"
"xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t" "xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t"
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t" "xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
"psubsb %[ftmp2], %[ftmp4], %[ftmp7] \n\t" "psubsb %[ftmp2], %[ftmp4], %[ftmp7] \n\t"
"psubsb %[ftmp9], %[ftmp6], %[ftmp5] \n\t" "psubsb %[ftmp9], %[ftmp6], %[ftmp5] \n\t"
"paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"and %[ftmp2], %[ftmp2], %[ftmp0] \n\t" "and %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
"pandn %[ftmp12], %[ftmp1], %[ftmp2] \n\t" "sdc1 %[ftmp2], 0x00(%[srct]) \n\t"
"and %[ftmp2], %[ftmp2], %[ftmp1] \n\t" "and %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
"li %[tmp0], 0x0b \n\t" "li %[tmp0], 0x0b \n\t"
@@ -592,66 +606,70 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"paddsb %[ftmp0], %[ftmp2], %[ff_pb_04] \n\t" "paddsb %[ftmp0], %[ftmp2], %[ff_pb_04] \n\t"
VP8_MBLOOP_HPSRAB VP8_MBLOOP_HPSRAB
"psubsb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" "psubsb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"ldc1 %[ftmp2], 0x00(%[srct]) \n\t"
"pandn %[ftmp2], %[ftmp1], %[ftmp2] \n\t"
"li %[tmp0], 0x07 \n\t" "li %[tmp0], 0x07 \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t"
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1b00]) VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1b00])
"psubsb %[ftmp6], %[ftmp6], %[ftmp1] \n\t" "psubsb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"paddsb %[ftmp5], %[ftmp5], %[ftmp1] \n\t" "paddsb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
"xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t" "xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t"
"xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t" "xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0]) MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t" "gssdlc1 %[ftmp5], 0x07(%[addr1]) \n\t"
"gssdrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t" "gssdrc1 %[ftmp5], 0x00(%[addr1]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t" "gssdlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t" "gssdrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1200]) VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1200])
"paddsb %[ftmp4], %[ftmp4], %[ftmp1] \n\t" "paddsb %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
"psubsb %[ftmp7], %[ftmp7], %[ftmp1] \n\t" "psubsb %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t" "xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t" "xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t" "gssdlc1 %[ftmp7], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t" "gssdrc1 %[ftmp7], 0x00(%[addr0]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0]) MMI_SLL(%[tmp0], %[src_pixel_step], 0x01)
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) MMI_SUBU(%[addr1], %[src_ptr], %[tmp0])
"gssdlc1 %[ftmp4], 0x07(%[src_ptr]) \n\t" "gssdlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
"gssdrc1 %[ftmp4], 0x00(%[src_ptr]) \n\t" "gssdrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_0900]) VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_0900])
"xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t" MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
"xor %[ftmp8], %[ftmp8], %[ff_pb_80] \n\t" MMI_SUBU(%[addr1], %[addr0], %[tmp0])
"paddsb %[ftmp3], %[ftmp3], %[ftmp1] \n\t" "gsldlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
"psubsb %[ftmp8], %[ftmp8], %[ftmp1] \n\t" "gsldrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
"xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t" MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step])
"xor %[ftmp8], %[ftmp8], %[ff_pb_80] \n\t" "gsldlc1 %[ftmp7], 0x07(%[addr1]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0]) "gsldrc1 %[ftmp7], 0x00(%[addr1]) \n\t"
"gssdlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t" "xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0]) "xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "paddsb %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
"gssdlc1 %[ftmp3], 0x07(%[src_ptr]) \n\t" "psubsb %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
"gssdrc1 %[ftmp3], 0x00(%[src_ptr]) \n\t" "xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp7], 0x07(%[addr1]) \n\t"
"gssdrc1 %[ftmp7], 0x00(%[addr1]) \n\t"
MMI_SUBU(%[addr1], %[addr0], %[tmp0])
"gssdlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
"gssdrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
MMI_ADDIU(%[src_ptr], %[src_ptr], 0x08)
"addiu %[count], %[count], -0x01 \n\t" "addiu %[count], %[count], -0x01 \n\t"
MMI_ADDIU(%[src_ptr], %[src_ptr], 0x08)
"bnez %[count], 1b \n\t" "bnez %[count], 1b \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]),
[ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]), [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
[src_ptr]"+&r"(src_ptr), [count]"+&r"(count) [src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
: [limit]"r"(limit), [blimit]"r"(blimit), : [limit]"r"(limit), [blimit]"r"(blimit),
[thresh]"r"(thresh), [srct]"r"(srct), [thresh]"r"(thresh),
[src_pixel_step]"r"((mips_reg)src_pixel_step), [src_pixel_step]"r"((mips_reg)src_pixel_step),
[ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80), [ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80),
[ff_pb_04]"f"(ff_pb_04), [ff_pb_03]"f"(ff_pb_03), [ff_pb_04]"f"(ff_pb_04), [ff_pb_03]"f"(ff_pb_03),
@@ -678,60 +696,64 @@ void vp8_mbloop_filter_vertical_edge_mmi(
unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit, unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit,
const unsigned char *limit, const unsigned char *thresh, int count) { const unsigned char *limit, const unsigned char *thresh, int count) {
mips_reg tmp[1]; mips_reg tmp[1];
mips_reg addr[2];
DECLARE_ALIGNED(8, const uint64_t, srct[1]); DECLARE_ALIGNED(8, const uint64_t, srct[1]);
double ftmp[14]; double ftmp[13];
__asm__ volatile ( __asm__ volatile (
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0])
MMI_SUBU(%[src_ptr], %[src_ptr], 0x04) MMI_SUBU(%[src_ptr], %[src_ptr], 0x04)
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t" MMI_SLL (%[tmp0], %[src_pixel_step], 0x01)
"gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t" MMI_ADDU(%[addr0], %[src_ptr], %[tmp0])
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t" MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
"gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t" "punpcklbh %[ftmp1], %[ftmp11], %[ftmp12] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "punpckhbh %[ftmp2], %[ftmp11], %[ftmp12] \n\t"
"gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
"punpcklbh %[ftmp11], %[ftmp5], %[ftmp6] \n\t" "gsldlc1 %[ftmp11], 0x07(%[src_ptr]) \n\t"
"punpckhbh %[ftmp12], %[ftmp5], %[ftmp6] \n\t" "gsldrc1 %[ftmp11], 0x00(%[src_ptr]) \n\t"
"punpcklbh %[ftmp9], %[ftmp7], %[ftmp8] \n\t" MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step])
"punpckhbh %[ftmp10], %[ftmp7], %[ftmp8] \n\t" "gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
"punpcklbh %[ftmp3], %[ftmp11], %[ftmp12] \n\t"
"punpckhbh %[ftmp4], %[ftmp11], %[ftmp12] \n\t"
"punpcklhw %[ftmp1], %[ftmp12], %[ftmp10] \n\t" "punpcklhw %[ftmp5], %[ftmp4], %[ftmp2] \n\t"
"punpckhhw %[ftmp2], %[ftmp12], %[ftmp10] \n\t" "punpckhhw %[ftmp6], %[ftmp4], %[ftmp2] \n\t"
"punpcklhw %[ftmp3], %[ftmp11], %[ftmp9] \n\t" "punpcklhw %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
"punpckhhw %[ftmp4], %[ftmp11], %[ftmp9] \n\t" "punpckhhw %[ftmp8], %[ftmp3], %[ftmp1] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) MMI_SLL(%[tmp0], %[src_pixel_step], 0x01)
"gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t" MMI_SUBU(%[addr0], %[src_ptr], %[tmp0])
"gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t" "gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t" MMI_SUBU(%[addr0], %[src_ptr], %[src_pixel_step])
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t" "gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
"gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t" "punpcklbh %[ftmp9], %[ftmp11], %[ftmp12] \n\t"
"gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t" "punpckhbh %[ftmp10], %[ftmp11], %[ftmp12] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
"punpcklbh %[ftmp11], %[ftmp5], %[ftmp6] \n\t" MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
"punpckhbh %[ftmp12], %[ftmp5], %[ftmp6] \n\t" MMI_SUBU(%[addr0], %[src_ptr], %[tmp0])
"punpcklbh %[ftmp9], %[ftmp7], %[ftmp8] \n\t" "gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
"punpckhbh %[ftmp10], %[ftmp7], %[ftmp8] \n\t" "gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
"punpcklbh %[ftmp0], %[ftmp11], %[ftmp12] \n\t"
"punpckhbh %[ftmp11], %[ftmp11], %[ftmp12] \n\t"
"punpcklhw %[ftmp5], %[ftmp12], %[ftmp10] \n\t" "punpcklhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t"
"punpckhhw %[ftmp6], %[ftmp12], %[ftmp10] \n\t" "punpckhhw %[ftmp2], %[ftmp11], %[ftmp10] \n\t"
"punpcklhw %[ftmp7], %[ftmp11], %[ftmp9] \n\t" "punpcklhw %[ftmp3], %[ftmp0], %[ftmp9] \n\t"
"punpckhhw %[ftmp8], %[ftmp11], %[ftmp9] \n\t" "punpckhhw %[ftmp4], %[ftmp0], %[ftmp9] \n\t"
"gsldlc1 %[ftmp13], 0x07(%[limit]) \n\t"
"gsldrc1 %[ftmp13], 0x00(%[limit]) \n\t"
/* ftmp9:q0 ftmp10:q1 */ /* ftmp9:q0 ftmp10:q1 */
"punpcklwd %[ftmp9], %[ftmp1], %[ftmp5] \n\t" "punpcklwd %[ftmp9], %[ftmp1], %[ftmp5] \n\t"
"punpckhwd %[ftmp10], %[ftmp1], %[ftmp5] \n\t" "punpckhwd %[ftmp10], %[ftmp1], %[ftmp5] \n\t"
@@ -749,61 +771,60 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"punpcklwd %[ftmp5], %[ftmp4], %[ftmp8] \n\t" "punpcklwd %[ftmp5], %[ftmp4], %[ftmp8] \n\t"
"punpckhwd %[ftmp6], %[ftmp4], %[ftmp8] \n\t" "punpckhwd %[ftmp6], %[ftmp4], %[ftmp8] \n\t"
"gsldlc1 %[ftmp8], 0x07(%[limit]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[limit]) \n\t"
/* abs (q3-q2) */ /* abs (q3-q2) */
"pasubub %[ftmp7], %[ftmp12], %[ftmp11] \n\t" "pasubub %[ftmp7], %[ftmp12], %[ftmp11] \n\t"
"psubusb %[ftmp0], %[ftmp7], %[ftmp13] \n\t" "psubusb %[ftmp0], %[ftmp7], %[ftmp8] \n\t"
/* abs (q2-q1) */ /* abs (q2-q1) */
"pasubub %[ftmp7], %[ftmp11], %[ftmp10] \n\t" "pasubub %[ftmp7], %[ftmp11], %[ftmp10] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t" "psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* ftmp3: abs(q1-q0) */ /* ftmp3: abs(q1-q0) */
"pasubub %[ftmp3], %[ftmp10], %[ftmp9] \n\t" "pasubub %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
"psubusb %[ftmp7], %[ftmp3], %[ftmp13] \n\t" "psubusb %[ftmp7], %[ftmp3], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* ftmp4: abs(p1-p0) */ /* ftmp4: abs(p1-p0) */
"pasubub %[ftmp4], %[ftmp5], %[ftmp6] \n\t" "pasubub %[ftmp4], %[ftmp5], %[ftmp6] \n\t"
"psubusb %[ftmp7], %[ftmp4], %[ftmp13] \n\t" "psubusb %[ftmp7], %[ftmp4], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* abs (p2-p1) */ /* abs (p2-p1) */
"pasubub %[ftmp7], %[ftmp2], %[ftmp5] \n\t" "pasubub %[ftmp7], %[ftmp2], %[ftmp5] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t" "psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* abs (p3-p2) */ /* abs (p3-p2) */
"pasubub %[ftmp7], %[ftmp1], %[ftmp2] \n\t" "pasubub %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t" "psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* abs (p0-q0) */
"gsldlc1 %[ftmp13], 0x07(%[blimit]) \n\t"
"gsldrc1 %[ftmp13], 0x00(%[blimit]) \n\t"
"gsldlc1 %[ftmp7], 0x07(%[thresh]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[thresh]) \n\t"
/* abs (p0-q0) * 2 */
"pasubub %[ftmp1], %[ftmp9], %[ftmp6] \n\t" "pasubub %[ftmp1], %[ftmp9], %[ftmp6] \n\t"
"paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" "paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
/* abs (p1-q1) / 2 */ /* abs (p1-q1) */
"pasubub %[ftmp12], %[ftmp10], %[ftmp5] \n\t" "pasubub %[ftmp12], %[ftmp10], %[ftmp5] \n\t"
"and %[ftmp12], %[ftmp12], %[ff_pb_fe] \n\t" "and %[ftmp12], %[ftmp12], %[ff_pb_fe] \n\t"
"li %[tmp0], 0x01 \n\t" "li %[tmp0], 0x01 \n\t"
"mtc1 %[tmp0], %[ftmp8] \n\t" "mtc1 %[tmp0], %[ftmp8] \n\t"
"psrlh %[ftmp12], %[ftmp12], %[ftmp8] \n\t" "psrlh %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
"paddusb %[ftmp12], %[ftmp1], %[ftmp12] \n\t" "paddusb %[ftmp12], %[ftmp1], %[ftmp12] \n\t"
"psubusb %[ftmp12], %[ftmp12], %[ftmp13] \n\t"
"gsldlc1 %[ftmp8], 0x07(%[blimit]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[blimit]) \n\t"
"psubusb %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp12] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp12] \n\t"
"xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t" "xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t"
/* ftmp0: mask */
"pcmpeqb %[ftmp0], %[ftmp0], %[ftmp12] \n\t" "pcmpeqb %[ftmp0], %[ftmp0], %[ftmp12] \n\t"
/* abs(p1-p0) - thresh */ "gsldlc1 %[ftmp8], 0x07(%[thresh]) \n\t"
"psubusb %[ftmp4], %[ftmp4], %[ftmp7] \n\t" "gsldrc1 %[ftmp8], 0x00(%[thresh]) \n\t"
/* abs(q1-q0) - thresh */ /* ftmp3: abs(q1-q0) ftmp4: abs(p1-p0) */
"psubusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t" "psubusb %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
"psubusb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
"or %[ftmp3], %[ftmp4], %[ftmp3] \n\t" "or %[ftmp3], %[ftmp4], %[ftmp3] \n\t"
"pcmpeqb %[ftmp3], %[ftmp3], %[ftmp12] \n\t" "pcmpeqb %[ftmp3], %[ftmp3], %[ftmp12] \n\t"
"pcmpeqb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" "pcmpeqb %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
/* ftmp1: hev */
"xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t" "xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t"
/* ftmp2:ps2, ftmp5:ps1, ftmp6:ps0, ftmp9:qs0, ftmp10:qs1, ftmp11:qs2 */
"xor %[ftmp11], %[ftmp11], %[ff_pb_80] \n\t" "xor %[ftmp11], %[ftmp11], %[ff_pb_80] \n\t"
"xor %[ftmp10], %[ftmp10], %[ff_pb_80] \n\t" "xor %[ftmp10], %[ftmp10], %[ff_pb_80] \n\t"
"xor %[ftmp9], %[ftmp9], %[ff_pb_80] \n\t" "xor %[ftmp9], %[ftmp9], %[ff_pb_80] \n\t"
@@ -816,30 +837,30 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
"paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
"paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
/* filter_value &= mask */
"and %[ftmp0], %[ftmp0], %[ftmp3] \n\t" "and %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
/* Filter2 = filter_value & hev */
"and %[ftmp3], %[ftmp1], %[ftmp0] \n\t" "and %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
/* filter_value &= ~hev */
"pandn %[ftmp0], %[ftmp1], %[ftmp0] \n\t" "pandn %[ftmp0], %[ftmp1], %[ftmp0] \n\t"
"paddsb %[ftmp4], %[ftmp3], %[ff_pb_04] \n\t" "paddsb %[ftmp4], %[ftmp3], %[ff_pb_04] \n\t"
"li %[tmp0], 0x0b \n\t" "li %[tmp0], 0x0b \n\t"
"mtc1 %[tmp0], %[ftmp12] \n\t" "mtc1 %[tmp0], %[ftmp12] \n\t"
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"punpcklbh %[ftmp7], %[ftmp7], %[ftmp4] \n\t" "punpcklbh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp4] \n\t" "punpckhbh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
"psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t" "psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
"psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t" "psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
"packsshb %[ftmp4], %[ftmp7], %[ftmp8] \n\t" "packsshb %[ftmp4], %[ftmp7], %[ftmp8] \n\t"
/* ftmp9: qs0 */
"psubsb %[ftmp9], %[ftmp9], %[ftmp4] \n\t" "psubsb %[ftmp9], %[ftmp9], %[ftmp4] \n\t"
"paddsb %[ftmp3], %[ftmp3], %[ff_pb_03] \n\t" "paddsb %[ftmp3], %[ftmp3], %[ff_pb_03] \n\t"
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"punpcklbh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" "punpcklbh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp3] \n\t" "punpckhbh %[ftmp8], %[ftmp8], %[ftmp3] \n\t"
"psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t" "psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
"psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t" "psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
"packsshb %[ftmp3], %[ftmp7], %[ftmp8] \n\t" "packsshb %[ftmp3], %[ftmp7], %[ftmp8] \n\t"
/* ftmp6: ps0 */
"paddsb %[ftmp6], %[ftmp6], %[ftmp3] \n\t" "paddsb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"li %[tmp0], 0x07 \n\t" "li %[tmp0], 0x07 \n\t"
@@ -851,10 +872,8 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"pmulhh %[ftmp8], %[ftmp8], %[ftmp1] \n\t" "pmulhh %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
VP8_MBLOOP_VPSRAB_ADDT VP8_MBLOOP_VPSRAB_ADDT
"psubsb %[ftmp4], %[ftmp9], %[ftmp3] \n\t" "psubsb %[ftmp4], %[ftmp9], %[ftmp3] \n\t"
/* ftmp9: oq0 */
"xor %[ftmp9], %[ftmp4], %[ff_pb_80] \n\t" "xor %[ftmp9], %[ftmp4], %[ff_pb_80] \n\t"
"paddsb %[ftmp4], %[ftmp6], %[ftmp3] \n\t" "paddsb %[ftmp4], %[ftmp6], %[ftmp3] \n\t"
/* ftmp6: op0 */
"xor %[ftmp6], %[ftmp4], %[ff_pb_80] \n\t" "xor %[ftmp6], %[ftmp4], %[ff_pb_80] \n\t"
VP8_MBLOOP_VPSRAB_ADDH VP8_MBLOOP_VPSRAB_ADDH
@@ -863,10 +882,8 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"pmulhh %[ftmp8], %[ftmp8], %[ftmp1] \n\t" "pmulhh %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
VP8_MBLOOP_VPSRAB_ADDT VP8_MBLOOP_VPSRAB_ADDT
"psubsb %[ftmp4], %[ftmp10], %[ftmp3] \n\t" "psubsb %[ftmp4], %[ftmp10], %[ftmp3] \n\t"
/* ftmp10: oq1 */
"xor %[ftmp10], %[ftmp4], %[ff_pb_80] \n\t" "xor %[ftmp10], %[ftmp4], %[ff_pb_80] \n\t"
"paddsb %[ftmp4], %[ftmp5], %[ftmp3] \n\t" "paddsb %[ftmp4], %[ftmp5], %[ftmp3] \n\t"
/* ftmp5: op1 */
"xor %[ftmp5], %[ftmp4], %[ff_pb_80] \n\t" "xor %[ftmp5], %[ftmp4], %[ff_pb_80] \n\t"
VP8_MBLOOP_VPSRAB_ADDH VP8_MBLOOP_VPSRAB_ADDH
@@ -874,10 +891,8 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"pmulhh %[ftmp8], %[ftmp8], %[ff_ph_0900] \n\t" "pmulhh %[ftmp8], %[ftmp8], %[ff_ph_0900] \n\t"
VP8_MBLOOP_VPSRAB_ADDT VP8_MBLOOP_VPSRAB_ADDT
"psubsb %[ftmp4], %[ftmp11], %[ftmp3] \n\t" "psubsb %[ftmp4], %[ftmp11], %[ftmp3] \n\t"
/* ftmp11: oq2 */
"xor %[ftmp11], %[ftmp4], %[ff_pb_80] \n\t" "xor %[ftmp11], %[ftmp4], %[ff_pb_80] \n\t"
"paddsb %[ftmp4], %[ftmp2], %[ftmp3] \n\t" "paddsb %[ftmp4], %[ftmp2], %[ftmp3] \n\t"
/* ftmp2: op2 */
"xor %[ftmp2], %[ftmp4], %[ff_pb_80] \n\t" "xor %[ftmp2], %[ftmp4], %[ff_pb_80] \n\t"
"ldc1 %[ftmp12], 0x00(%[srct]) \n\t" "ldc1 %[ftmp12], 0x00(%[srct]) \n\t"
@@ -901,40 +916,41 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"punpcklhw %[ftmp10], %[ftmp1], %[ftmp3] \n\t" "punpcklhw %[ftmp10], %[ftmp1], %[ftmp3] \n\t"
"punpckhhw %[ftmp11], %[ftmp1], %[ftmp3] \n\t" "punpckhhw %[ftmp11], %[ftmp1], %[ftmp3] \n\t"
"punpcklwd %[ftmp0], %[ftmp7], %[ftmp11] \n\t"
"punpckhwd %[ftmp1], %[ftmp7], %[ftmp11] \n\t"
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
"punpcklwd %[ftmp0], %[ftmp6], %[ftmp10] \n\t" "punpcklwd %[ftmp0], %[ftmp6], %[ftmp10] \n\t"
"punpckhwd %[ftmp1], %[ftmp6], %[ftmp10] \n\t" "punpckhwd %[ftmp1], %[ftmp6], %[ftmp10] \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
"punpcklwd %[ftmp0], %[ftmp7], %[ftmp11] \n\t"
"punpckhwd %[ftmp1], %[ftmp7], %[ftmp11] \n\t"
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
"punpcklwd %[ftmp1], %[ftmp5], %[ftmp9] \n\t" "punpcklwd %[ftmp1], %[ftmp5], %[ftmp9] \n\t"
"punpckhwd %[ftmp0], %[ftmp5], %[ftmp9] \n\t" "punpckhwd %[ftmp0], %[ftmp5], %[ftmp9] \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) MMI_SUBU(%[addr0], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) MMI_SUBU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
"punpcklwd %[ftmp1], %[ftmp4], %[ftmp8] \n\t" "punpcklwd %[ftmp1], %[ftmp4], %[ftmp8] \n\t"
"punpckhwd %[ftmp0], %[ftmp4], %[ftmp8] \n\t" "punpckhwd %[ftmp0], %[ftmp4], %[ftmp8] \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) MMI_SUBU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) MMI_SUBU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
"addiu %[count], %[count], -0x01 \n\t" "addiu %[count], %[count], -0x01 \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x03) MMI_SLL(%[tmp0], %[src_pixel_step], 0x03)
@@ -946,9 +962,9 @@ void vp8_mbloop_filter_vertical_edge_mmi(
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), [ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]),
[tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr), [addr0]"=&r"(addr[0]),
[count]"+&r"(count) [src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
: [limit]"r"(limit), [blimit]"r"(blimit), : [limit]"r"(limit), [blimit]"r"(blimit),
[srct]"r"(srct), [thresh]"r"(thresh), [srct]"r"(srct), [thresh]"r"(thresh),
[src_pixel_step]"r"((mips_reg)src_pixel_step), [src_pixel_step]"r"((mips_reg)src_pixel_step),

View File

@@ -86,7 +86,6 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
register double ftmp8 asm("$f18"); register double ftmp8 asm("$f18");
register double ftmp9 asm("$f20"); register double ftmp9 asm("$f20");
register double ftmp10 asm("$f22"); register double ftmp10 asm("$f22");
register double ftmp11 asm("$f24");
#else #else
register double fzero asm("$f0"); register double fzero asm("$f0");
register double ftmp0 asm("$f1"); register double ftmp0 asm("$f1");
@@ -100,7 +99,6 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
register double ftmp8 asm("$f9"); register double ftmp8 asm("$f9");
register double ftmp9 asm("$f10"); register double ftmp9 asm("$f10");
register double ftmp10 asm("$f11"); register double ftmp10 asm("$f11");
register double ftmp11 asm("$f12");
#endif // _MIPS_SIM == _ABIO32 #endif // _MIPS_SIM == _ABIO32
__asm__ volatile ( __asm__ volatile (
@@ -114,13 +112,11 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
"li %[tmp0], 0x07 \n\t" "li %[tmp0], 0x07 \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t"
"li %[tmp0], 0x08 \n\t" "li %[tmp0], 0x08 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t" "mtc1 %[tmp0], %[ftmp10] \n\t"
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp9], 0x05(%[src_ptr]) \n\t" "gsldlc1 %[ftmp9], 0x05(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp9], -0x02(%[src_ptr]) \n\t" "gsldrc1 %[ftmp9], -0x02(%[src_ptr]) \n\t"
"gsldlc1 %[ftmp10], 0x06(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp10], -0x01(%[src_ptr]) \n\t"
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t" "punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp8], %[ftmp6], %[ftmp0] \n\t" "pmullh %[ftmp8], %[ftmp6], %[ftmp0] \n\t"
@@ -129,21 +125,24 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
"pmullh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t" "gsldlc1 %[ftmp9], 0x06(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp9], -0x01(%[src_ptr]) \n\t"
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"punpckhbh %[ftmp6], %[ftmp10], %[fzero] \n\t" "punpckhbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t" "dsrl %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t" "punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t" "dsrl %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t" "punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
@@ -164,9 +163,8 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
[ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6), [ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6),
[ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8), [ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8),
[ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10), [ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10),
[ftmp11]"=&f"(ftmp11), [tmp0]"=&r"(tmp[0]), [tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr),
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height), [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height)
[src_ptr]"+&r"(src_ptr)
: [src_pixels_per_line]"r"((mips_reg)src_pixels_per_line), : [src_pixels_per_line]"r"((mips_reg)src_pixels_per_line),
[vp8_filter]"r"(vp8_filter), [output_width]"r"(output_width), [vp8_filter]"r"(vp8_filter), [output_width]"r"(output_width),
[ff_ph_40]"f"(ff_ph_40) [ff_ph_40]"f"(ff_ph_40)
@@ -192,11 +190,6 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
register double ftmp6 asm("$f14"); register double ftmp6 asm("$f14");
register double ftmp7 asm("$f16"); register double ftmp7 asm("$f16");
register double ftmp8 asm("$f18"); register double ftmp8 asm("$f18");
register double ftmp9 asm("$f20");
register double ftmp10 asm("$f22");
register double ftmp11 asm("$f24");
register double ftmp12 asm("$f26");
register double ftmp13 asm("$f28");
#else #else
register double fzero asm("$f0"); register double fzero asm("$f0");
register double ftmp0 asm("$f1"); register double ftmp0 asm("$f1");
@@ -208,11 +201,6 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
register double ftmp6 asm("$f7"); register double ftmp6 asm("$f7");
register double ftmp7 asm("$f8"); register double ftmp7 asm("$f8");
register double ftmp8 asm("$f9"); register double ftmp8 asm("$f9");
register double ftmp9 asm("$f10");
register double ftmp10 asm("$f11");
register double ftmp11 asm("$f12");
register double ftmp12 asm("$f13");
register double ftmp13 asm("$f14");
#endif // _MIPS_SIM == _ABIO32 #endif // _MIPS_SIM == _ABIO32
__asm__ volatile ( __asm__ volatile (
@@ -222,56 +210,52 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
"ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t" "ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t"
"ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t" "ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t"
"ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t" "ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[pixels_per_line_x2])
"xor %[fzero], %[fzero], %[fzero] \n\t" "xor %[fzero], %[fzero], %[fzero] \n\t"
"li %[tmp0], 0x07 \n\t" "li %[tmp0], 0x07 \n\t"
"mtc1 %[tmp0], %[ftmp13] \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t"
/* In order to make full use of memory load delay slot,
* Operation of memory loading and calculating has been rearranged.
*/
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t" "gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
"pmullh %[ftmp8], %[ftmp6], %[ftmp0] \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line]) MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line])
"gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t" "gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2]) MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2])
"gsldlc1 %[ftmp8], 0x07(%[addr0]) \n\t" "gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[addr0]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4]) MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4])
"gsldlc1 %[ftmp9], 0x07(%[addr0]) \n\t" "gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp9], 0x00(%[addr0]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line]) MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line])
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2]) MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2])
"gsldlc1 %[ftmp10], 0x07(%[addr0]) \n\t" "gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp10], 0x00(%[addr0]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4]) MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4])
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t" "gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"pmullh %[ftmp12], %[ftmp6], %[ftmp0] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ff_ph_40] \n\t"
"psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
"pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" "packushb %[ftmp8], %[ftmp8], %[fzero] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp7] \n\t" "gsswlc1 %[ftmp8], 0x03(%[output_ptr]) \n\t"
"gsswrc1 %[ftmp8], 0x00(%[output_ptr]) \n\t"
"pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
"pmullh %[ftmp9], %[ftmp9], %[ftmp4] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp9] \n\t"
"pmullh %[ftmp10], %[ftmp10], %[ftmp3] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp10] \n\t"
"pmullh %[ftmp11], %[ftmp11], %[ftmp5] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp11] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ff_ph_40] \n\t"
"psrah %[ftmp12], %[ftmp12], %[ftmp13] \n\t"
"packushb %[ftmp12], %[ftmp12], %[fzero] \n\t"
"gsswlc1 %[ftmp12], 0x03(%[output_ptr]) \n\t"
"gsswrc1 %[ftmp12], 0x00(%[output_ptr]) \n\t"
MMI_ADDIU(%[output_height], %[output_height], -0x01) MMI_ADDIU(%[output_height], %[output_height], -0x01)
MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch]) MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch])
@@ -281,11 +265,9 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
[ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4), [ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4),
[ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6), [ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6),
[ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8), [ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8),
[ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10), [tmp0]"=&r"(tmp[0]), [addr0]"=&r"(addr[0]),
[ftmp11]"=&f"(ftmp11), [ftmp12]"=&f"(ftmp12), [src_ptr]"+&r"(src_ptr), [output_ptr]"+&r"(output_ptr),
[ftmp13]"=&f"(ftmp13), [tmp0]"=&r"(tmp[0]), [output_height]"+&r"(output_height)
[addr0]"=&r"(addr[0]), [src_ptr]"+&r"(src_ptr),
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height)
: [pixels_per_line]"r"((mips_reg)pixels_per_line), : [pixels_per_line]"r"((mips_reg)pixels_per_line),
[pixels_per_line_x2]"r"((mips_reg)(pixels_per_line<<1)), [pixels_per_line_x2]"r"((mips_reg)(pixels_per_line<<1)),
[pixels_per_line_x4]"r"((mips_reg)(pixels_per_line<<2)), [pixels_per_line_x4]"r"((mips_reg)(pixels_per_line<<2)),
@@ -319,7 +301,6 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" "gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixels_per_line])
"punpcklbh %[ftmp1], %[ftmp0], %[fzero] \n\t" "punpcklbh %[ftmp1], %[ftmp0], %[fzero] \n\t"
"gssdlc1 %[ftmp1], 0x07(%[output_ptr]) \n\t" "gssdlc1 %[ftmp1], 0x07(%[output_ptr]) \n\t"
@@ -327,6 +308,7 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
"addiu %[output_height], %[output_height], -0x01 \n\t" "addiu %[output_height], %[output_height], -0x01 \n\t"
MMI_ADDU(%[output_ptr], %[output_ptr], %[output_width]) MMI_ADDU(%[output_ptr], %[output_ptr], %[output_width])
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixels_per_line])
"bnez %[output_height], 1b \n\t" "bnez %[output_height], 1b \n\t"
: [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0), : [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0),
[ftmp1]"=&f"(ftmp1), [src_ptr]"+&r"(src_ptr), [ftmp1]"=&f"(ftmp1), [src_ptr]"+&r"(src_ptr),
@@ -356,12 +338,12 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
"1: \n\t" "1: \n\t"
"gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" "gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line])
MMI_ADDIU(%[output_height], %[output_height], -0x01)
"packushb %[ftmp1], %[ftmp0], %[fzero] \n\t" "packushb %[ftmp1], %[ftmp0], %[fzero] \n\t"
"gsswlc1 %[ftmp1], 0x03(%[output_ptr]) \n\t" "gsswlc1 %[ftmp1], 0x03(%[output_ptr]) \n\t"
"gsswrc1 %[ftmp1], 0x00(%[output_ptr]) \n\t" "gsswrc1 %[ftmp1], 0x00(%[output_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line])
MMI_ADDIU(%[output_height], %[output_height], -0x01)
MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch]) MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch])
"bnez %[output_height], 1b \n\t" "bnez %[output_height], 1b \n\t"
: [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0), : [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0),
@@ -404,7 +386,7 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
} \ } \
} else { \ } else { \
for (i = 0; i < loop; ++i) { \ for (i = 0; i < loop; ++i) { \
vp8_filter_block1dc_v6_mmi(FData2 + i * 4, dst_ptr + i * 4, m, \ vp8_filter_block1dc_v6_mmi(FData2 + n * 2 + i * 4, dst_ptr + i * 4, m, \
dst_pitch, n * 2, VFilter); \ dst_pitch, n * 2, VFilter); \
} \ } \
} \ } \

View File

@@ -11,16 +11,28 @@
#include "entropy.h" #include "entropy.h"
const int vp8_mode_contexts[6][4] = { const int vp8_mode_contexts[6][4] = {
{ /* 0 */ {
7, 1, 1, 143 }, /* 0 */
{ /* 1 */ 7, 1, 1, 143,
14, 18, 14, 107 }, },
{ /* 2 */ {
135, 64, 57, 68 }, /* 1 */
{ /* 3 */ 14, 18, 14, 107,
60, 56, 128, 65 }, },
{ /* 4 */ {
159, 134, 128, 34 }, /* 2 */
{ /* 5 */ 135, 64, 57, 68,
234, 188, 128, 28 }, },
{
/* 3 */
60, 56, 128, 65,
},
{
/* 4 */
159, 134, 128, 34,
},
{
/* 5 */
234, 188, 128, 28,
},
}; };

View File

@@ -1,13 +1,3 @@
##
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
## tree. An additional intellectual property rights grant can be found
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
sub vp8_common_forward_decls() { sub vp8_common_forward_decls() {
print <<EOF print <<EOF
/* /*

View File

@@ -95,7 +95,9 @@ void vp8_sixtap_predict4x4_mmx(unsigned char *src_ptr, int src_pixels_per_line,
void vp8_sixtap_predict16x16_sse2(unsigned char *src_ptr, void vp8_sixtap_predict16x16_sse2(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset, int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr, int yoffset, unsigned char *dst_ptr,
int dst_pitch) { int dst_pitch
) {
DECLARE_ALIGNED(16, unsigned short, DECLARE_ALIGNED(16, unsigned short,
FData2[24 * 24]); /* Temp data bufffer used in filtering */ FData2[24 * 24]); /* Temp data bufffer used in filtering */
@@ -234,7 +236,9 @@ extern void vp8_filter_block1d4_v6_ssse3(unsigned char *src_ptr,
void vp8_sixtap_predict16x16_ssse3(unsigned char *src_ptr, void vp8_sixtap_predict16x16_ssse3(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset, int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr, int yoffset, unsigned char *dst_ptr,
int dst_pitch) { int dst_pitch
) {
DECLARE_ALIGNED(16, unsigned char, FData2[24 * 24]); DECLARE_ALIGNED(16, unsigned char, FData2[24 * 24]);
if (xoffset) { if (xoffset) {

View File

@@ -674,7 +674,7 @@ static unsigned int read_partition_size(VP8D_COMP *pbi,
static int read_is_valid(const unsigned char *start, size_t len, static int read_is_valid(const unsigned char *start, size_t len,
const unsigned char *end) { const unsigned char *end) {
return len != 0 && end > start && len <= (size_t)(end - start); return (start + len > start && start + len <= end);
} }
static unsigned int read_available_partition_size( static unsigned int read_available_partition_size(

View File

@@ -34,9 +34,7 @@ typedef struct {
/* Structure used to hold all the overlaps of a macroblock. The overlaps of a /* Structure used to hold all the overlaps of a macroblock. The overlaps of a
* macroblock is further divided into block overlaps. * macroblock is further divided into block overlaps.
*/ */
typedef struct { typedef struct { B_OVERLAP overlaps[16]; } MB_OVERLAP;
B_OVERLAP overlaps[16];
} MB_OVERLAP;
/* Structure for keeping track of motion vectors and which reference frame they /* Structure for keeping track of motion vectors and which reference frame they
* refer to. Used for motion vector interpolation. * refer to. Used for motion vector interpolation.

View File

@@ -31,9 +31,7 @@ typedef struct {
void *ptr2; void *ptr2;
} DECODETHREAD_DATA; } DECODETHREAD_DATA;
typedef struct { typedef struct { MACROBLOCKD mbd; } MB_ROW_DEC;
MACROBLOCKD mbd;
} MB_ROW_DEC;
typedef struct { typedef struct {
int enabled; int enabled;

View File

@@ -739,21 +739,24 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
/* Allocate memory for above_row buffers. */ /* Allocate memory for above_row buffers. */
CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows); CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i) for (i = 0; i < pc->mb_rows; ++i)
CHECK_MEM_ERROR(pbi->mt_yabove_row[i], CHECK_MEM_ERROR(
vpx_memalign(16, sizeof(unsigned char) * pbi->mt_yabove_row[i],
(width + (VP8BORDERINPIXELS << 1)))); vpx_memalign(
16, sizeof(unsigned char) * (width + (VP8BORDERINPIXELS << 1))));
CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows); CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i) for (i = 0; i < pc->mb_rows; ++i)
CHECK_MEM_ERROR(pbi->mt_uabove_row[i], CHECK_MEM_ERROR(
vpx_memalign(16, sizeof(unsigned char) * pbi->mt_uabove_row[i],
(uv_width + VP8BORDERINPIXELS))); vpx_memalign(16,
sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows); CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i) for (i = 0; i < pc->mb_rows; ++i)
CHECK_MEM_ERROR(pbi->mt_vabove_row[i], CHECK_MEM_ERROR(
vpx_memalign(16, sizeof(unsigned char) * pbi->mt_vabove_row[i],
(uv_width + VP8BORDERINPIXELS))); vpx_memalign(16,
sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
/* Allocate memory for left_col buffers. */ /* Allocate memory for left_col buffers. */
CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows); CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);

View File

@@ -989,8 +989,8 @@ static int estimate_max_q(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats,
bits_per_mb_at_this_q = bits_per_mb_at_this_q =
vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb; vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb;
bits_per_mb_at_this_q = bits_per_mb_at_this_q = (int)(.5 +
(int)(.5 + err_correction_factor * speed_correction * err_correction_factor * speed_correction *
cpi->twopass.est_max_qcorrection_factor * cpi->twopass.est_max_qcorrection_factor *
cpi->twopass.section_max_qfactor * cpi->twopass.section_max_qfactor *
(double)bits_per_mb_at_this_q); (double)bits_per_mb_at_this_q);
@@ -1086,7 +1086,8 @@ static int estimate_cq(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats,
vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb; vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb;
bits_per_mb_at_this_q = bits_per_mb_at_this_q =
(int)(.5 + err_correction_factor * speed_correction * clip_iifactor * (int)(.5 +
err_correction_factor * speed_correction * clip_iifactor *
(double)bits_per_mb_at_this_q); (double)bits_per_mb_at_this_q);
/* Mode and motion overhead */ /* Mode and motion overhead */
@@ -1272,7 +1273,8 @@ void vp8_init_second_pass(VP8_COMP *cpi) {
* sum duration is not. Its calculated based on the actual durations of * sum duration is not. Its calculated based on the actual durations of
* all frames from the first pass. * all frames from the first pass.
*/ */
vp8_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count / vp8_new_framerate(cpi,
10000000.0 * cpi->twopass.total_stats.count /
cpi->twopass.total_stats.duration); cpi->twopass.total_stats.duration);
cpi->output_framerate = cpi->framerate; cpi->output_framerate = cpi->framerate;
@@ -1737,8 +1739,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) {
/* Dont break out very close to a key frame */ /* Dont break out very close to a key frame */
((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) && ((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) &&
((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) && ((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) &&
(!flash_detected) && (!flash_detected) && ((mv_ratio_accumulator > 100.0) ||
((mv_ratio_accumulator > 100.0) ||
(abs_mv_in_out_accumulator > 3.0) || (abs_mv_in_out_accumulator > 3.0) ||
(mv_in_out_accumulator < -2.0) || (mv_in_out_accumulator < -2.0) ||
((boost_score - old_boost_score) < 2.0)))) { ((boost_score - old_boost_score) < 2.0)))) {
@@ -1814,8 +1815,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) {
(next_frame.pcnt_inter > 0.75) && (next_frame.pcnt_inter > 0.75) &&
((mv_in_out_accumulator / (double)i > -0.2) || ((mv_in_out_accumulator / (double)i > -0.2) ||
(mv_in_out_accumulator > -2.0)) && (mv_in_out_accumulator > -2.0)) &&
(cpi->gfu_boost > 100) && (cpi->gfu_boost > 100) && (cpi->twopass.gf_decay_rate <=
(cpi->twopass.gf_decay_rate <=
(ARF_DECAY_THRESH + (cpi->gfu_boost / 200)))) (ARF_DECAY_THRESH + (cpi->gfu_boost / 200))))
#endif #endif
{ {

View File

@@ -2862,6 +2862,7 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
fclose(yframe); fclose(yframe);
} }
#endif #endif
/* return of 0 means drop frame */
#if !CONFIG_REALTIME_ONLY #if !CONFIG_REALTIME_ONLY
/* Function to test for conditions that indeicate we should loop /* Function to test for conditions that indeicate we should loop
@@ -3363,6 +3364,11 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
(LOWER_RES_FRAME_INFO *)cpi->oxcf.mr_low_res_mode_info; (LOWER_RES_FRAME_INFO *)cpi->oxcf.mr_low_res_mode_info;
if (cpi->oxcf.mr_encoder_id) { if (cpi->oxcf.mr_encoder_id) {
// TODO(marpan): This constraint shouldn't be needed, as we would like
// to allow for key frame setting (forced or periodic) defined per
// spatial layer. For now, keep this in.
cm->frame_type = low_res_frame_info->frame_type;
// Check if lower resolution is available for motion vector reuse. // Check if lower resolution is available for motion vector reuse.
if (cm->frame_type != KEY_FRAME) { if (cm->frame_type != KEY_FRAME) {
cpi->mr_low_res_mv_avail = 1; cpi->mr_low_res_mv_avail = 1;
@@ -3387,16 +3393,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
== low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]); == low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]);
*/ */
} }
// Disable motion vector reuse (i.e., disable any usage of the low_res)
// if the previous lower stream is skipped/disabled.
if (low_res_frame_info->skip_encoding_prev_stream) {
cpi->mr_low_res_mv_avail = 0;
} }
}
// This stream is not skipped (i.e., it's being encoded), so set this skip
// flag to 0. This is needed for the next stream (i.e., which is the next
// frame to be encoded).
low_res_frame_info->skip_encoding_prev_stream = 0;
// On a key frame: For the lowest resolution, keep track of the key frame // On a key frame: For the lowest resolution, keep track of the key frame
// counter value. For the higher resolutions, reset the current video // counter value. For the higher resolutions, reset the current video
@@ -4785,6 +4782,8 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
cpi->temporal_pattern_counter++; cpi->temporal_pattern_counter++;
} }
/* reset to normal state now that we are done. */
#if 0 #if 0
{ {
char filename[512]; char filename[512];
@@ -5000,13 +4999,10 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags,
// be received for that high layer, which will yield an incorrect // be received for that high layer, which will yield an incorrect
// frame rate (from time-stamp adjustment in above calculation). // frame rate (from time-stamp adjustment in above calculation).
if (cpi->oxcf.mr_encoder_id) { if (cpi->oxcf.mr_encoder_id) {
if (!low_res_frame_info->skip_encoding_base_stream)
cpi->ref_framerate = low_res_frame_info->low_res_framerate; cpi->ref_framerate = low_res_frame_info->low_res_framerate;
} else { } else {
// Keep track of frame rate for lowest resolution. // Keep track of frame rate for lowest resolution.
low_res_frame_info->low_res_framerate = cpi->ref_framerate; low_res_frame_info->low_res_framerate = cpi->ref_framerate;
// The base stream is being encoded so set skip flag to 0.
low_res_frame_info->skip_encoding_base_stream = 0;
} }
} }
#endif #endif

View File

@@ -1052,7 +1052,8 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) {
* overflow when values are large * overflow when values are large
*/ */
projected_size_based_on_q = projected_size_based_on_q =
(int)(((.5 + rate_correction_factor * (int)(((.5 +
rate_correction_factor *
vp8_bits_per_mb[cpi->common.frame_type][Q]) * vp8_bits_per_mb[cpi->common.frame_type][Q]) *
cpi->common.MBs) / cpi->common.MBs) /
(1 << BPER_MB_NORMBITS)); (1 << BPER_MB_NORMBITS));

View File

@@ -23,7 +23,6 @@
#include "modecosts.h" #include "modecosts.h"
#include "encodeintra.h" #include "encodeintra.h"
#include "pickinter.h" #include "pickinter.h"
#include "vp8/common/common.h"
#include "vp8/common/entropymode.h" #include "vp8/common/entropymode.h"
#include "vp8/common/reconinter.h" #include "vp8/common/reconinter.h"
#include "vp8/common/reconintra.h" #include "vp8/common/reconintra.h"
@@ -770,8 +769,8 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
vp8_quantize_mbuv(x); vp8_quantize_mbuv(x);
rate_to = rd_cost_mbuv(x); rate_to = rd_cost_mbuv(x);
this_rate = this_rate = rate_to +
rate_to + x->intra_uv_mode_cost[xd->frame_type] x->intra_uv_mode_cost[xd->frame_type]
[xd->mode_info_context->mbmi.uv_mode]; [xd->mode_info_context->mbmi.uv_mode];
this_distortion = vp8_mbuverror(x) / 4; this_distortion = vp8_mbuverror(x) / 4;
@@ -960,13 +959,19 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
vp8_variance_fn_ptr_t *v_fn_ptr; vp8_variance_fn_ptr_t *v_fn_ptr;
ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
ENTROPY_CONTEXT_PLANES t_above_b, t_left_b; ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
ENTROPY_CONTEXT *ta_b;
ENTROPY_CONTEXT *tl_b;
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
vp8_zero(t_above_b); ta = (ENTROPY_CONTEXT *)&t_above;
vp8_zero(t_left_b); tl = (ENTROPY_CONTEXT *)&t_left;
ta_b = (ENTROPY_CONTEXT *)&t_above_b;
tl_b = (ENTROPY_CONTEXT *)&t_left_b;
br = 0; br = 0;
bd = 0; bd = 0;
@@ -1146,13 +1151,13 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
mode_selected = this_mode; mode_selected = this_mode;
best_label_rd = this_rd; best_label_rd = this_rd;
memcpy(&t_above_b, &t_above_s, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left_b, &t_left_s, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
} }
} /*for each 4x4 mode*/ } /*for each 4x4 mode*/
memcpy(&t_above, &t_above_b, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, &t_left_b, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
bsi->ref_mv, x->mvcost); bsi->ref_mv, x->mvcost);

View File

@@ -56,7 +56,8 @@ static INLINE unsigned int vp8_cost_branch(const unsigned int ct[2],
static void vp8_treed_write(vp8_writer *const w, vp8_tree t, static void vp8_treed_write(vp8_writer *const w, vp8_tree t,
const vp8_prob *const p, int v, const vp8_prob *const p, int v,
int n) { /* number of bits in v, assumed nonzero */ int n /* number of bits in v, assumed nonzero */
) {
vp8_tree_index i = 0; vp8_tree_index i = 0;
do { do {
@@ -72,7 +73,8 @@ static INLINE void vp8_write_token(vp8_writer *const w, vp8_tree t,
} }
static int vp8_treed_cost(vp8_tree t, const vp8_prob *const p, int v, static int vp8_treed_cost(vp8_tree t, const vp8_prob *const p, int v,
int n) { /* number of bits in v, assumed nonzero */ int n /* number of bits in v, assumed nonzero */
) {
int c = 0; int c = 0;
vp8_tree_index i = 0; vp8_tree_index i = 0;

View File

@@ -802,20 +802,7 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
unsigned long deadline) { unsigned long deadline) {
vpx_codec_err_t res = VPX_CODEC_OK; vpx_codec_err_t res = VPX_CODEC_OK;
if (!ctx->cfg.rc_target_bitrate) { if (!ctx->cfg.rc_target_bitrate) return res;
#if CONFIG_MULTI_RES_ENCODING
if (!ctx->cpi) return VPX_CODEC_ERROR;
if (ctx->cpi->oxcf.mr_total_resolutions > 1) {
LOWER_RES_FRAME_INFO *low_res_frame_info =
(LOWER_RES_FRAME_INFO *)ctx->cpi->oxcf.mr_low_res_mode_info;
if (!low_res_frame_info) return VPX_CODEC_ERROR;
low_res_frame_info->skip_encoding_prev_stream = 1;
if (ctx->cpi->oxcf.mr_encoder_id == 0)
low_res_frame_info->skip_encoding_base_stream = 1;
}
#endif
return res;
}
if (img) res = validate_img(ctx, img); if (img) res = validate_img(ctx, img);
@@ -915,8 +902,6 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
(unsigned long)((delta * ctx->cfg.g_timebase.den + round) / (unsigned long)((delta * ctx->cfg.g_timebase.den + round) /
ctx->cfg.g_timebase.num / 10000000); ctx->cfg.g_timebase.num / 10000000);
pkt.data.frame.flags = lib_flags << 16; pkt.data.frame.flags = lib_flags << 16;
pkt.data.frame.width[0] = cpi->common.Width;
pkt.data.frame.height[0] = cpi->common.Height;
if (lib_flags & FRAMEFLAGS_KEY) { if (lib_flags & FRAMEFLAGS_KEY) {
pkt.data.frame.flags |= VPX_FRAME_IS_KEY; pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
@@ -1274,9 +1259,6 @@ CODEC_INTERFACE(vpx_codec_vp8_cx) = {
vp8e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t cfg_maps; */ vp8e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t cfg_maps; */
vp8e_encode, /* vpx_codec_encode_fn_t encode; */ vp8e_encode, /* vpx_codec_encode_fn_t encode; */
vp8e_get_cxdata, /* vpx_codec_get_cx_data_fn_t get_cx_data; */ vp8e_get_cxdata, /* vpx_codec_get_cx_data_fn_t get_cx_data; */
vp8e_set_config, vp8e_set_config, NULL, vp8e_get_preview, vp8e_mr_alloc_mem,
NULL,
vp8e_get_preview,
vp8e_mr_alloc_mem,
} /* encoder functions */ } /* encoder functions */
}; };

View File

@@ -1,160 +0,0 @@
/*
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include <assert.h>
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/arm/neon/vp9_iht_neon.h"
#include "vpx_dsp/arm/highbd_idct_neon.h"
#include "vpx_dsp/arm/idct_neon.h"
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/txfm_common.h"
static INLINE void highbd_iadst4(int32x4_t *const io) {
const int32_t sinpis[4] = { sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9 };
const int32x4_t sinpi = vld1q_s32(sinpis);
int32x4_t s[8];
s[0] = vmulq_lane_s32(io[0], vget_low_s32(sinpi), 0);
s[1] = vmulq_lane_s32(io[0], vget_low_s32(sinpi), 1);
s[2] = vmulq_lane_s32(io[1], vget_high_s32(sinpi), 0);
s[3] = vmulq_lane_s32(io[2], vget_high_s32(sinpi), 1);
s[4] = vmulq_lane_s32(io[2], vget_low_s32(sinpi), 0);
s[5] = vmulq_lane_s32(io[3], vget_low_s32(sinpi), 1);
s[6] = vmulq_lane_s32(io[3], vget_high_s32(sinpi), 1);
s[7] = vsubq_s32(io[0], io[2]);
s[7] = vaddq_s32(s[7], io[3]);
s[0] = vaddq_s32(s[0], s[3]);
s[0] = vaddq_s32(s[0], s[5]);
s[1] = vsubq_s32(s[1], s[4]);
s[1] = vsubq_s32(s[1], s[6]);
s[3] = s[2];
s[2] = vmulq_lane_s32(s[7], vget_high_s32(sinpi), 0);
io[0] = vaddq_s32(s[0], s[3]);
io[1] = vaddq_s32(s[1], s[3]);
io[2] = s[2];
io[3] = vaddq_s32(s[0], s[1]);
io[3] = vsubq_s32(io[3], s[3]);
io[0] = vrshrq_n_s32(io[0], DCT_CONST_BITS);
io[1] = vrshrq_n_s32(io[1], DCT_CONST_BITS);
io[2] = vrshrq_n_s32(io[2], DCT_CONST_BITS);
io[3] = vrshrq_n_s32(io[3], DCT_CONST_BITS);
}
void vp9_highbd_iht4x4_16_add_neon(const tran_low_t *input, uint16_t *dest,
int stride, int tx_type, int bd) {
const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
int16x8_t a[2];
int32x4_t c[4];
c[0] = vld1q_s32(input);
c[1] = vld1q_s32(input + 4);
c[2] = vld1q_s32(input + 8);
c[3] = vld1q_s32(input + 12);
if (bd == 8) {
a[0] = vcombine_s16(vmovn_s32(c[0]), vmovn_s32(c[1]));
a[1] = vcombine_s16(vmovn_s32(c[2]), vmovn_s32(c[3]));
transpose_s16_4x4q(&a[0], &a[1]);
switch (tx_type) {
case DCT_DCT:
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
transpose_s16_4x4q(&a[0], &a[1]);
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
break;
case ADST_DCT:
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
transpose_s16_4x4q(&a[0], &a[1]);
iadst4(a);
break;
case DCT_ADST:
iadst4(a);
transpose_s16_4x4q(&a[0], &a[1]);
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
break;
default:
assert(tx_type == ADST_ADST);
iadst4(a);
transpose_s16_4x4q(&a[0], &a[1]);
iadst4(a);
break;
}
a[0] = vrshrq_n_s16(a[0], 4);
a[1] = vrshrq_n_s16(a[1], 4);
} else {
switch (tx_type) {
case DCT_DCT: {
const int32x4_t cospis = vld1q_s32(kCospi32);
if (bd == 10) {
idct4x4_16_kernel_bd10(cospis, c);
idct4x4_16_kernel_bd10(cospis, c);
} else {
idct4x4_16_kernel_bd12(cospis, c);
idct4x4_16_kernel_bd12(cospis, c);
}
break;
}
case ADST_DCT: {
const int32x4_t cospis = vld1q_s32(kCospi32);
if (bd == 10) {
idct4x4_16_kernel_bd10(cospis, c);
} else {
idct4x4_16_kernel_bd12(cospis, c);
}
transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
highbd_iadst4(c);
break;
}
case DCT_ADST: {
const int32x4_t cospis = vld1q_s32(kCospi32);
transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
highbd_iadst4(c);
if (bd == 10) {
idct4x4_16_kernel_bd10(cospis, c);
} else {
idct4x4_16_kernel_bd12(cospis, c);
}
break;
}
default: {
assert(tx_type == ADST_ADST);
transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
highbd_iadst4(c);
transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
highbd_iadst4(c);
break;
}
}
a[0] = vcombine_s16(vqrshrn_n_s32(c[0], 4), vqrshrn_n_s32(c[1], 4));
a[1] = vcombine_s16(vqrshrn_n_s32(c[2], 4), vqrshrn_n_s32(c[3], 4));
}
highbd_idct4x4_1_add_kernel1(&dest, stride, a[0], max);
highbd_idct4x4_1_add_kernel1(&dest, stride, a[1], max);
}

View File

@@ -14,63 +14,206 @@
#include "./vp9_rtcd.h" #include "./vp9_rtcd.h"
#include "./vpx_config.h" #include "./vpx_config.h"
#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_common.h"
#include "vp9/common/arm/neon/vp9_iht_neon.h"
#include "vpx_dsp/arm/idct_neon.h"
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/txfm_common.h" #include "vpx_dsp/txfm_common.h"
static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
int32x4_t q8s32, q9s32;
int16x4x2_t d0x2s16, d1x2s16;
int32x4x2_t q0x2s32;
d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16));
d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16));
q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]));
q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]));
q0x2s32 = vtrnq_s32(q8s32, q9s32);
*q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]);
*q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]);
}
static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16,
int16x4_t *d2s16) {
*d0s16 = vdup_n_s16(cospi_8_64);
*d1s16 = vdup_n_s16(cospi_16_64);
*d2s16 = vdup_n_s16(cospi_24_64);
}
static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16,
int16x4_t *d5s16, int16x8_t *q3s16) {
*d3s16 = vdup_n_s16(sinpi_1_9);
*d4s16 = vdup_n_s16(sinpi_2_9);
*q3s16 = vdupq_n_s16(sinpi_3_9);
*d5s16 = vdup_n_s16(sinpi_4_9);
}
static INLINE void IDCT4x4_1D(int16x4_t *d0s16, int16x4_t *d1s16,
int16x4_t *d2s16, int16x8_t *q8s16,
int16x8_t *q9s16) {
int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16;
int16x4_t d26s16, d27s16, d28s16, d29s16;
int32x4_t q10s32, q13s32, q14s32, q15s32;
int16x8_t q13s16, q14s16;
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
d18s16 = vget_low_s16(*q9s16);
d19s16 = vget_high_s16(*q9s16);
d23s16 = vadd_s16(d16s16, d18s16);
d24s16 = vsub_s16(d16s16, d18s16);
q15s32 = vmull_s16(d17s16, *d2s16);
q10s32 = vmull_s16(d17s16, *d0s16);
q13s32 = vmull_s16(d23s16, *d1s16);
q14s32 = vmull_s16(d24s16, *d1s16);
q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16);
q10s32 = vmlal_s16(q10s32, d19s16, *d2s16);
d26s16 = vrshrn_n_s32(q13s32, 14);
d27s16 = vrshrn_n_s32(q14s32, 14);
d29s16 = vrshrn_n_s32(q15s32, 14);
d28s16 = vrshrn_n_s32(q10s32, 14);
q13s16 = vcombine_s16(d26s16, d27s16);
q14s16 = vcombine_s16(d28s16, d29s16);
*q8s16 = vaddq_s16(q13s16, q14s16);
*q9s16 = vsubq_s16(q13s16, q14s16);
*q9s16 = vcombine_s16(vget_high_s16(*q9s16), vget_low_s16(*q9s16)); // vswp
}
static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16,
int16x4_t *d5s16, int16x8_t *q3s16,
int16x8_t *q8s16, int16x8_t *q9s16) {
int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16;
int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
d6s16 = vget_low_s16(*q3s16);
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
d18s16 = vget_low_s16(*q9s16);
d19s16 = vget_high_s16(*q9s16);
q10s32 = vmull_s16(*d3s16, d16s16);
q11s32 = vmull_s16(*d4s16, d16s16);
q12s32 = vmull_s16(d6s16, d17s16);
q13s32 = vmull_s16(*d5s16, d18s16);
q14s32 = vmull_s16(*d3s16, d18s16);
q15s32 = vmovl_s16(d16s16);
q15s32 = vaddw_s16(q15s32, d19s16);
q8s32 = vmull_s16(*d4s16, d19s16);
q15s32 = vsubw_s16(q15s32, d18s16);
q9s32 = vmull_s16(*d5s16, d19s16);
q10s32 = vaddq_s32(q10s32, q13s32);
q10s32 = vaddq_s32(q10s32, q8s32);
q11s32 = vsubq_s32(q11s32, q14s32);
q8s32 = vdupq_n_s32(sinpi_3_9);
q11s32 = vsubq_s32(q11s32, q9s32);
q15s32 = vmulq_s32(q15s32, q8s32);
q13s32 = vaddq_s32(q10s32, q12s32);
q10s32 = vaddq_s32(q10s32, q11s32);
q14s32 = vaddq_s32(q11s32, q12s32);
q10s32 = vsubq_s32(q10s32, q12s32);
d16s16 = vrshrn_n_s32(q13s32, 14);
d17s16 = vrshrn_n_s32(q14s32, 14);
d18s16 = vrshrn_n_s32(q15s32, 14);
d19s16 = vrshrn_n_s32(q10s32, 14);
*q8s16 = vcombine_s16(d16s16, d17s16);
*q9s16 = vcombine_s16(d18s16, d19s16);
}
void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int stride, void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) { int tx_type) {
int16x8_t a[2]; uint8x8_t d26u8, d27u8;
uint8x8_t s[2], d[2]; int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16;
uint16x8_t sum[2]; uint32x2_t d26u32, d27u32;
int16x8_t q3s16, q8s16, q9s16;
uint16x8_t q8u16, q9u16;
assert(!((intptr_t)dest % sizeof(uint32_t))); d26u32 = d27u32 = vdup_n_u32(0);
assert(!(stride % sizeof(uint32_t)));
a[0] = load_tran_low_to_s16q(input); q8s16 = vld1q_s16(input);
a[1] = load_tran_low_to_s16q(input + 8); q9s16 = vld1q_s16(input + 8);
transpose_s16_4x4q(&a[0], &a[1]);
TRANSPOSE4X4(&q8s16, &q9s16);
switch (tx_type) { switch (tx_type) {
case DCT_DCT: case 0: // idct_idct is not supported. Fall back to C
idct4x4_16_kernel_bd8(a); vp9_iht4x4_16_add_c(input, dest, stride, tx_type);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); return;
transpose_s16_4x4q(&a[0], &a[1]); case 1: // iadst_idct
idct4x4_16_kernel_bd8(a); // generate constants
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
break; GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
case ADST_DCT: // first transform rows
idct4x4_16_kernel_bd8(a); IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
transpose_s16_4x4q(&a[0], &a[1]);
iadst4(a);
break;
case DCT_ADST: // transpose the matrix
iadst4(a); TRANSPOSE4X4(&q8s16, &q9s16);
transpose_s16_4x4q(&a[0], &a[1]);
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
break;
default: // then transform columns
assert(tx_type == ADST_ADST); IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
iadst4(a); break;
transpose_s16_4x4q(&a[0], &a[1]); case 2: // idct_iadst
iadst4(a); // generate constantsyy
GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
// first transform rows
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
// transpose the matrix
TRANSPOSE4X4(&q8s16, &q9s16);
// then transform columns
IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
break;
case 3: // iadst_iadst
// generate constants
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
// first transform rows
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
// transpose the matrix
TRANSPOSE4X4(&q8s16, &q9s16);
// then transform columns
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
break;
default: // iadst_idct
assert(0);
break; break;
} }
a[0] = vrshrq_n_s16(a[0], 4); q8s16 = vrshrq_n_s16(q8s16, 4);
a[1] = vrshrq_n_s16(a[1], 4); q9s16 = vrshrq_n_s16(q9s16, 4);
s[0] = load_u8(dest, stride);
s[1] = load_u8(dest + 2 * stride, stride); d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0);
sum[0] = vaddw_u8(vreinterpretq_u16_s16(a[0]), s[0]); dest += stride;
sum[1] = vaddw_u8(vreinterpretq_u16_s16(a[1]), s[1]); d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1);
d[0] = vqmovun_s16(vreinterpretq_s16_u16(sum[0])); dest += stride;
d[1] = vqmovun_s16(vreinterpretq_s16_u16(sum[1])); d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0);
store_u8(dest, stride, d[0]); dest += stride;
store_u8(dest + 2 * stride, stride, d[1]); d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1);
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32));
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32));
d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1);
dest -= stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0);
dest -= stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1);
dest -= stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0);
} }

View File

@@ -14,199 +14,527 @@
#include "./vp9_rtcd.h" #include "./vp9_rtcd.h"
#include "./vpx_config.h" #include "./vpx_config.h"
#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_common.h"
#include "vpx_dsp/arm/idct_neon.h"
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/arm/transpose_neon.h"
static INLINE void iadst_half_butterfly_neon(int16x8_t *const x, static int16_t cospi_2_64 = 16305;
const int16x4_t c) { static int16_t cospi_4_64 = 16069;
const int16x8_t sum = vaddq_s16(x[0], x[1]); static int16_t cospi_6_64 = 15679;
const int16x8_t sub = vsubq_s16(x[0], x[1]); static int16_t cospi_8_64 = 15137;
int32x4_t t0[2], t1[2]; static int16_t cospi_10_64 = 14449;
static int16_t cospi_12_64 = 13623;
static int16_t cospi_14_64 = 12665;
static int16_t cospi_16_64 = 11585;
static int16_t cospi_18_64 = 10394;
static int16_t cospi_20_64 = 9102;
static int16_t cospi_22_64 = 7723;
static int16_t cospi_24_64 = 6270;
static int16_t cospi_26_64 = 4756;
static int16_t cospi_28_64 = 3196;
static int16_t cospi_30_64 = 1606;
t0[0] = vmull_lane_s16(vget_low_s16(sum), c, 0); static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
t0[1] = vmull_lane_s16(vget_high_s16(sum), c, 0); int16x8_t *q10s16, int16x8_t *q11s16,
t1[0] = vmull_lane_s16(vget_low_s16(sub), c, 0); int16x8_t *q12s16, int16x8_t *q13s16,
t1[1] = vmull_lane_s16(vget_high_s16(sub), c, 0); int16x8_t *q14s16, int16x8_t *q15s16) {
x[0] = dct_const_round_shift_low_8(t0); int16x4_t d0s16, d1s16, d2s16, d3s16;
x[1] = dct_const_round_shift_low_8(t1); int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
d0s16 = vdup_n_s16(cospi_28_64);
d1s16 = vdup_n_s16(cospi_4_64);
d2s16 = vdup_n_s16(cospi_12_64);
d3s16 = vdup_n_s16(cospi_20_64);
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
d18s16 = vget_low_s16(*q9s16);
d19s16 = vget_high_s16(*q9s16);
d20s16 = vget_low_s16(*q10s16);
d21s16 = vget_high_s16(*q10s16);
d22s16 = vget_low_s16(*q11s16);
d23s16 = vget_high_s16(*q11s16);
d24s16 = vget_low_s16(*q12s16);
d25s16 = vget_high_s16(*q12s16);
d26s16 = vget_low_s16(*q13s16);
d27s16 = vget_high_s16(*q13s16);
d28s16 = vget_low_s16(*q14s16);
d29s16 = vget_high_s16(*q14s16);
d30s16 = vget_low_s16(*q15s16);
d31s16 = vget_high_s16(*q15s16);
q2s32 = vmull_s16(d18s16, d0s16);
q3s32 = vmull_s16(d19s16, d0s16);
q5s32 = vmull_s16(d26s16, d2s16);
q6s32 = vmull_s16(d27s16, d2s16);
q2s32 = vmlsl_s16(q2s32, d30s16, d1s16);
q3s32 = vmlsl_s16(q3s32, d31s16, d1s16);
q5s32 = vmlsl_s16(q5s32, d22s16, d3s16);
q6s32 = vmlsl_s16(q6s32, d23s16, d3s16);
d8s16 = vrshrn_n_s32(q2s32, 14);
d9s16 = vrshrn_n_s32(q3s32, 14);
d10s16 = vrshrn_n_s32(q5s32, 14);
d11s16 = vrshrn_n_s32(q6s32, 14);
q4s16 = vcombine_s16(d8s16, d9s16);
q5s16 = vcombine_s16(d10s16, d11s16);
q2s32 = vmull_s16(d18s16, d1s16);
q3s32 = vmull_s16(d19s16, d1s16);
q9s32 = vmull_s16(d26s16, d3s16);
q13s32 = vmull_s16(d27s16, d3s16);
q2s32 = vmlal_s16(q2s32, d30s16, d0s16);
q3s32 = vmlal_s16(q3s32, d31s16, d0s16);
q9s32 = vmlal_s16(q9s32, d22s16, d2s16);
q13s32 = vmlal_s16(q13s32, d23s16, d2s16);
d14s16 = vrshrn_n_s32(q2s32, 14);
d15s16 = vrshrn_n_s32(q3s32, 14);
d12s16 = vrshrn_n_s32(q9s32, 14);
d13s16 = vrshrn_n_s32(q13s32, 14);
q6s16 = vcombine_s16(d12s16, d13s16);
q7s16 = vcombine_s16(d14s16, d15s16);
d0s16 = vdup_n_s16(cospi_16_64);
q2s32 = vmull_s16(d16s16, d0s16);
q3s32 = vmull_s16(d17s16, d0s16);
q13s32 = vmull_s16(d16s16, d0s16);
q15s32 = vmull_s16(d17s16, d0s16);
q2s32 = vmlal_s16(q2s32, d24s16, d0s16);
q3s32 = vmlal_s16(q3s32, d25s16, d0s16);
q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
d0s16 = vdup_n_s16(cospi_24_64);
d1s16 = vdup_n_s16(cospi_8_64);
d18s16 = vrshrn_n_s32(q2s32, 14);
d19s16 = vrshrn_n_s32(q3s32, 14);
d22s16 = vrshrn_n_s32(q13s32, 14);
d23s16 = vrshrn_n_s32(q15s32, 14);
*q9s16 = vcombine_s16(d18s16, d19s16);
*q11s16 = vcombine_s16(d22s16, d23s16);
q2s32 = vmull_s16(d20s16, d0s16);
q3s32 = vmull_s16(d21s16, d0s16);
q8s32 = vmull_s16(d20s16, d1s16);
q12s32 = vmull_s16(d21s16, d1s16);
q2s32 = vmlsl_s16(q2s32, d28s16, d1s16);
q3s32 = vmlsl_s16(q3s32, d29s16, d1s16);
q8s32 = vmlal_s16(q8s32, d28s16, d0s16);
q12s32 = vmlal_s16(q12s32, d29s16, d0s16);
d26s16 = vrshrn_n_s32(q2s32, 14);
d27s16 = vrshrn_n_s32(q3s32, 14);
d30s16 = vrshrn_n_s32(q8s32, 14);
d31s16 = vrshrn_n_s32(q12s32, 14);
*q13s16 = vcombine_s16(d26s16, d27s16);
*q15s16 = vcombine_s16(d30s16, d31s16);
q0s16 = vaddq_s16(*q9s16, *q15s16);
q1s16 = vaddq_s16(*q11s16, *q13s16);
q2s16 = vsubq_s16(*q11s16, *q13s16);
q3s16 = vsubq_s16(*q9s16, *q15s16);
*q13s16 = vsubq_s16(q4s16, q5s16);
q4s16 = vaddq_s16(q4s16, q5s16);
*q14s16 = vsubq_s16(q7s16, q6s16);
q7s16 = vaddq_s16(q7s16, q6s16);
d26s16 = vget_low_s16(*q13s16);
d27s16 = vget_high_s16(*q13s16);
d28s16 = vget_low_s16(*q14s16);
d29s16 = vget_high_s16(*q14s16);
d16s16 = vdup_n_s16(cospi_16_64);
q9s32 = vmull_s16(d28s16, d16s16);
q10s32 = vmull_s16(d29s16, d16s16);
q11s32 = vmull_s16(d28s16, d16s16);
q12s32 = vmull_s16(d29s16, d16s16);
q9s32 = vmlsl_s16(q9s32, d26s16, d16s16);
q10s32 = vmlsl_s16(q10s32, d27s16, d16s16);
q11s32 = vmlal_s16(q11s32, d26s16, d16s16);
q12s32 = vmlal_s16(q12s32, d27s16, d16s16);
d10s16 = vrshrn_n_s32(q9s32, 14);
d11s16 = vrshrn_n_s32(q10s32, 14);
d12s16 = vrshrn_n_s32(q11s32, 14);
d13s16 = vrshrn_n_s32(q12s32, 14);
q5s16 = vcombine_s16(d10s16, d11s16);
q6s16 = vcombine_s16(d12s16, d13s16);
*q8s16 = vaddq_s16(q0s16, q7s16);
*q9s16 = vaddq_s16(q1s16, q6s16);
*q10s16 = vaddq_s16(q2s16, q5s16);
*q11s16 = vaddq_s16(q3s16, q4s16);
*q12s16 = vsubq_s16(q3s16, q4s16);
*q13s16 = vsubq_s16(q2s16, q5s16);
*q14s16 = vsubq_s16(q1s16, q6s16);
*q15s16 = vsubq_s16(q0s16, q7s16);
} }
static INLINE void iadst_butterfly_lane_0_1_neon(const int16x8_t in0, static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
const int16x8_t in1, int16x8_t *q10s16, int16x8_t *q11s16,
const int16x4_t c, int16x8_t *q12s16, int16x8_t *q13s16,
int32x4_t *const s0, int16x8_t *q14s16, int16x8_t *q15s16) {
int32x4_t *const s1) { int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16;
s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 0); int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 0); int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 1); int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 1); int16x8_t q2s16, q4s16, q5s16, q6s16;
int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32;
int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 1); d16s16 = vget_low_s16(*q8s16);
s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 1); d17s16 = vget_high_s16(*q8s16);
s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 0); d18s16 = vget_low_s16(*q9s16);
s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 0); d19s16 = vget_high_s16(*q9s16);
} d20s16 = vget_low_s16(*q10s16);
d21s16 = vget_high_s16(*q10s16);
d22s16 = vget_low_s16(*q11s16);
d23s16 = vget_high_s16(*q11s16);
d24s16 = vget_low_s16(*q12s16);
d25s16 = vget_high_s16(*q12s16);
d26s16 = vget_low_s16(*q13s16);
d27s16 = vget_high_s16(*q13s16);
d28s16 = vget_low_s16(*q14s16);
d29s16 = vget_high_s16(*q14s16);
d30s16 = vget_low_s16(*q15s16);
d31s16 = vget_high_s16(*q15s16);
static INLINE void iadst_butterfly_lane_2_3_neon(const int16x8_t in0, d14s16 = vdup_n_s16(cospi_2_64);
const int16x8_t in1, d15s16 = vdup_n_s16(cospi_30_64);
const int16x4_t c,
int32x4_t *const s0,
int32x4_t *const s1) {
s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 3); q1s32 = vmull_s16(d30s16, d14s16);
s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 3); q2s32 = vmull_s16(d31s16, d14s16);
s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 2); q3s32 = vmull_s16(d30s16, d15s16);
s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 2); q4s32 = vmull_s16(d31s16, d15s16);
}
static INLINE void iadst_butterfly_lane_3_2_neon(const int16x8_t in0, d30s16 = vdup_n_s16(cospi_18_64);
const int16x8_t in1, d31s16 = vdup_n_s16(cospi_14_64);
const int16x4_t c,
int32x4_t *const s0,
int32x4_t *const s1) {
s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 2); q1s32 = vmlal_s16(q1s32, d16s16, d15s16);
s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 2); q2s32 = vmlal_s16(q2s32, d17s16, d15s16);
s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 3); q3s32 = vmlsl_s16(q3s32, d16s16, d14s16);
s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 3); q4s32 = vmlsl_s16(q4s32, d17s16, d14s16);
}
static INLINE int16x8_t add_dct_const_round_shift_low_8( q5s32 = vmull_s16(d22s16, d30s16);
const int32x4_t *const in0, const int32x4_t *const in1) { q6s32 = vmull_s16(d23s16, d30s16);
int32x4_t sum[2]; q7s32 = vmull_s16(d22s16, d31s16);
q8s32 = vmull_s16(d23s16, d31s16);
sum[0] = vaddq_s32(in0[0], in1[0]); q5s32 = vmlal_s16(q5s32, d24s16, d31s16);
sum[1] = vaddq_s32(in0[1], in1[1]); q6s32 = vmlal_s16(q6s32, d25s16, d31s16);
return dct_const_round_shift_low_8(sum); q7s32 = vmlsl_s16(q7s32, d24s16, d30s16);
} q8s32 = vmlsl_s16(q8s32, d25s16, d30s16);
static INLINE int16x8_t sub_dct_const_round_shift_low_8( q11s32 = vaddq_s32(q1s32, q5s32);
const int32x4_t *const in0, const int32x4_t *const in1) { q12s32 = vaddq_s32(q2s32, q6s32);
int32x4_t sum[2]; q1s32 = vsubq_s32(q1s32, q5s32);
q2s32 = vsubq_s32(q2s32, q6s32);
sum[0] = vsubq_s32(in0[0], in1[0]); d22s16 = vrshrn_n_s32(q11s32, 14);
sum[1] = vsubq_s32(in0[1], in1[1]); d23s16 = vrshrn_n_s32(q12s32, 14);
return dct_const_round_shift_low_8(sum); *q11s16 = vcombine_s16(d22s16, d23s16);
}
static INLINE void iadst8(int16x8_t *const io) { q12s32 = vaddq_s32(q3s32, q7s32);
const int16x4_t c0 = q15s32 = vaddq_s32(q4s32, q8s32);
create_s16x4_neon(cospi_2_64, cospi_30_64, cospi_10_64, cospi_22_64); q3s32 = vsubq_s32(q3s32, q7s32);
const int16x4_t c1 = q4s32 = vsubq_s32(q4s32, q8s32);
create_s16x4_neon(cospi_18_64, cospi_14_64, cospi_26_64, cospi_6_64);
const int16x4_t c2 =
create_s16x4_neon(cospi_16_64, 0, cospi_8_64, cospi_24_64);
int16x8_t x[8], t[4];
int32x4_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
x[0] = io[7]; d2s16 = vrshrn_n_s32(q1s32, 14);
x[1] = io[0]; d3s16 = vrshrn_n_s32(q2s32, 14);
x[2] = io[5]; d24s16 = vrshrn_n_s32(q12s32, 14);
x[3] = io[2]; d25s16 = vrshrn_n_s32(q15s32, 14);
x[4] = io[3]; d6s16 = vrshrn_n_s32(q3s32, 14);
x[5] = io[4]; d7s16 = vrshrn_n_s32(q4s32, 14);
x[6] = io[1]; *q12s16 = vcombine_s16(d24s16, d25s16);
x[7] = io[6];
// stage 1 d0s16 = vdup_n_s16(cospi_10_64);
iadst_butterfly_lane_0_1_neon(x[0], x[1], c0, s0, s1); d1s16 = vdup_n_s16(cospi_22_64);
iadst_butterfly_lane_2_3_neon(x[2], x[3], c0, s2, s3); q4s32 = vmull_s16(d26s16, d0s16);
iadst_butterfly_lane_0_1_neon(x[4], x[5], c1, s4, s5); q5s32 = vmull_s16(d27s16, d0s16);
iadst_butterfly_lane_2_3_neon(x[6], x[7], c1, s6, s7); q2s32 = vmull_s16(d26s16, d1s16);
q6s32 = vmull_s16(d27s16, d1s16);
x[0] = add_dct_const_round_shift_low_8(s0, s4); d30s16 = vdup_n_s16(cospi_26_64);
x[1] = add_dct_const_round_shift_low_8(s1, s5); d31s16 = vdup_n_s16(cospi_6_64);
x[2] = add_dct_const_round_shift_low_8(s2, s6);
x[3] = add_dct_const_round_shift_low_8(s3, s7);
x[4] = sub_dct_const_round_shift_low_8(s0, s4);
x[5] = sub_dct_const_round_shift_low_8(s1, s5);
x[6] = sub_dct_const_round_shift_low_8(s2, s6);
x[7] = sub_dct_const_round_shift_low_8(s3, s7);
// stage 2 q4s32 = vmlal_s16(q4s32, d20s16, d1s16);
t[0] = x[0]; q5s32 = vmlal_s16(q5s32, d21s16, d1s16);
t[1] = x[1]; q2s32 = vmlsl_s16(q2s32, d20s16, d0s16);
t[2] = x[2]; q6s32 = vmlsl_s16(q6s32, d21s16, d0s16);
t[3] = x[3];
iadst_butterfly_lane_2_3_neon(x[4], x[5], c2, s4, s5);
iadst_butterfly_lane_3_2_neon(x[7], x[6], c2, s7, s6);
x[0] = vaddq_s16(t[0], t[2]); q0s32 = vmull_s16(d18s16, d30s16);
x[1] = vaddq_s16(t[1], t[3]); q13s32 = vmull_s16(d19s16, d30s16);
x[2] = vsubq_s16(t[0], t[2]);
x[3] = vsubq_s16(t[1], t[3]);
x[4] = add_dct_const_round_shift_low_8(s4, s6);
x[5] = add_dct_const_round_shift_low_8(s5, s7);
x[6] = sub_dct_const_round_shift_low_8(s4, s6);
x[7] = sub_dct_const_round_shift_low_8(s5, s7);
// stage 3 q0s32 = vmlal_s16(q0s32, d28s16, d31s16);
iadst_half_butterfly_neon(x + 2, c2); q13s32 = vmlal_s16(q13s32, d29s16, d31s16);
iadst_half_butterfly_neon(x + 6, c2);
io[0] = x[0]; q10s32 = vmull_s16(d18s16, d31s16);
io[1] = vnegq_s16(x[4]); q9s32 = vmull_s16(d19s16, d31s16);
io[2] = x[6];
io[3] = vnegq_s16(x[2]); q10s32 = vmlsl_s16(q10s32, d28s16, d30s16);
io[4] = x[3]; q9s32 = vmlsl_s16(q9s32, d29s16, d30s16);
io[5] = vnegq_s16(x[7]);
io[6] = x[5]; q14s32 = vaddq_s32(q2s32, q10s32);
io[7] = vnegq_s16(x[1]); q15s32 = vaddq_s32(q6s32, q9s32);
q2s32 = vsubq_s32(q2s32, q10s32);
q6s32 = vsubq_s32(q6s32, q9s32);
d28s16 = vrshrn_n_s32(q14s32, 14);
d29s16 = vrshrn_n_s32(q15s32, 14);
d4s16 = vrshrn_n_s32(q2s32, 14);
d5s16 = vrshrn_n_s32(q6s32, 14);
*q14s16 = vcombine_s16(d28s16, d29s16);
q9s32 = vaddq_s32(q4s32, q0s32);
q10s32 = vaddq_s32(q5s32, q13s32);
q4s32 = vsubq_s32(q4s32, q0s32);
q5s32 = vsubq_s32(q5s32, q13s32);
d30s16 = vdup_n_s16(cospi_8_64);
d31s16 = vdup_n_s16(cospi_24_64);
d18s16 = vrshrn_n_s32(q9s32, 14);
d19s16 = vrshrn_n_s32(q10s32, 14);
d8s16 = vrshrn_n_s32(q4s32, 14);
d9s16 = vrshrn_n_s32(q5s32, 14);
*q9s16 = vcombine_s16(d18s16, d19s16);
q5s32 = vmull_s16(d2s16, d30s16);
q6s32 = vmull_s16(d3s16, d30s16);
q7s32 = vmull_s16(d2s16, d31s16);
q0s32 = vmull_s16(d3s16, d31s16);
q5s32 = vmlal_s16(q5s32, d6s16, d31s16);
q6s32 = vmlal_s16(q6s32, d7s16, d31s16);
q7s32 = vmlsl_s16(q7s32, d6s16, d30s16);
q0s32 = vmlsl_s16(q0s32, d7s16, d30s16);
q1s32 = vmull_s16(d4s16, d30s16);
q3s32 = vmull_s16(d5s16, d30s16);
q10s32 = vmull_s16(d4s16, d31s16);
q2s32 = vmull_s16(d5s16, d31s16);
q1s32 = vmlsl_s16(q1s32, d8s16, d31s16);
q3s32 = vmlsl_s16(q3s32, d9s16, d31s16);
q10s32 = vmlal_s16(q10s32, d8s16, d30s16);
q2s32 = vmlal_s16(q2s32, d9s16, d30s16);
*q8s16 = vaddq_s16(*q11s16, *q9s16);
*q11s16 = vsubq_s16(*q11s16, *q9s16);
q4s16 = vaddq_s16(*q12s16, *q14s16);
*q12s16 = vsubq_s16(*q12s16, *q14s16);
q14s32 = vaddq_s32(q5s32, q1s32);
q15s32 = vaddq_s32(q6s32, q3s32);
q5s32 = vsubq_s32(q5s32, q1s32);
q6s32 = vsubq_s32(q6s32, q3s32);
d18s16 = vrshrn_n_s32(q14s32, 14);
d19s16 = vrshrn_n_s32(q15s32, 14);
d10s16 = vrshrn_n_s32(q5s32, 14);
d11s16 = vrshrn_n_s32(q6s32, 14);
*q9s16 = vcombine_s16(d18s16, d19s16);
q1s32 = vaddq_s32(q7s32, q10s32);
q3s32 = vaddq_s32(q0s32, q2s32);
q7s32 = vsubq_s32(q7s32, q10s32);
q0s32 = vsubq_s32(q0s32, q2s32);
d28s16 = vrshrn_n_s32(q1s32, 14);
d29s16 = vrshrn_n_s32(q3s32, 14);
d14s16 = vrshrn_n_s32(q7s32, 14);
d15s16 = vrshrn_n_s32(q0s32, 14);
*q14s16 = vcombine_s16(d28s16, d29s16);
d30s16 = vdup_n_s16(cospi_16_64);
d22s16 = vget_low_s16(*q11s16);
d23s16 = vget_high_s16(*q11s16);
q2s32 = vmull_s16(d22s16, d30s16);
q3s32 = vmull_s16(d23s16, d30s16);
q13s32 = vmull_s16(d22s16, d30s16);
q1s32 = vmull_s16(d23s16, d30s16);
d24s16 = vget_low_s16(*q12s16);
d25s16 = vget_high_s16(*q12s16);
q2s32 = vmlal_s16(q2s32, d24s16, d30s16);
q3s32 = vmlal_s16(q3s32, d25s16, d30s16);
q13s32 = vmlsl_s16(q13s32, d24s16, d30s16);
q1s32 = vmlsl_s16(q1s32, d25s16, d30s16);
d4s16 = vrshrn_n_s32(q2s32, 14);
d5s16 = vrshrn_n_s32(q3s32, 14);
d24s16 = vrshrn_n_s32(q13s32, 14);
d25s16 = vrshrn_n_s32(q1s32, 14);
q2s16 = vcombine_s16(d4s16, d5s16);
*q12s16 = vcombine_s16(d24s16, d25s16);
q13s32 = vmull_s16(d10s16, d30s16);
q1s32 = vmull_s16(d11s16, d30s16);
q11s32 = vmull_s16(d10s16, d30s16);
q0s32 = vmull_s16(d11s16, d30s16);
q13s32 = vmlal_s16(q13s32, d14s16, d30s16);
q1s32 = vmlal_s16(q1s32, d15s16, d30s16);
q11s32 = vmlsl_s16(q11s32, d14s16, d30s16);
q0s32 = vmlsl_s16(q0s32, d15s16, d30s16);
d20s16 = vrshrn_n_s32(q13s32, 14);
d21s16 = vrshrn_n_s32(q1s32, 14);
d12s16 = vrshrn_n_s32(q11s32, 14);
d13s16 = vrshrn_n_s32(q0s32, 14);
*q10s16 = vcombine_s16(d20s16, d21s16);
q6s16 = vcombine_s16(d12s16, d13s16);
q5s16 = vdupq_n_s16(0);
*q9s16 = vsubq_s16(q5s16, *q9s16);
*q11s16 = vsubq_s16(q5s16, q2s16);
*q13s16 = vsubq_s16(q5s16, q6s16);
*q15s16 = vsubq_s16(q5s16, q4s16);
} }
void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride, void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) { int tx_type) {
const int16x8_t cospis = vld1q_s16(kCospi); int i;
const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 uint8_t *d1, *d2;
const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 uint8x8_t d0u8, d1u8, d2u8, d3u8;
int16x8_t a[8]; uint64x1_t d0u64, d1u64, d2u64, d3u64;
int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
uint16x8_t q8u16, q9u16, q10u16, q11u16;
a[0] = load_tran_low_to_s16q(input + 0 * 8); q8s16 = vld1q_s16(input);
a[1] = load_tran_low_to_s16q(input + 1 * 8); q9s16 = vld1q_s16(input + 8);
a[2] = load_tran_low_to_s16q(input + 2 * 8); q10s16 = vld1q_s16(input + 8 * 2);
a[3] = load_tran_low_to_s16q(input + 3 * 8); q11s16 = vld1q_s16(input + 8 * 3);
a[4] = load_tran_low_to_s16q(input + 4 * 8); q12s16 = vld1q_s16(input + 8 * 4);
a[5] = load_tran_low_to_s16q(input + 5 * 8); q13s16 = vld1q_s16(input + 8 * 5);
a[6] = load_tran_low_to_s16q(input + 6 * 8); q14s16 = vld1q_s16(input + 8 * 6);
a[7] = load_tran_low_to_s16q(input + 7 * 8); q15s16 = vld1q_s16(input + 8 * 7);
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
switch (tx_type) { switch (tx_type) {
case DCT_DCT: case 0: // idct_idct is not supported. Fall back to C
idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a); vp9_iht8x8_64_add_c(input, dest, stride, tx_type);
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); return;
idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a); case 1: // iadst_idct
break; // generate IDCT constants
// GENERATE_IDCT_CONSTANTS
case ADST_DCT: // first transform rows
idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a); IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); &q15s16);
iadst8(a);
break;
case DCT_ADST: // transpose the matrix
iadst8(a); transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); &q14s16, &q15s16);
idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
break;
default: // generate IADST constants
assert(tx_type == ADST_ADST); // GENERATE_IADST_CONSTANTS
iadst8(a);
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); // then transform columns
iadst8(a); IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
break;
case 2: // idct_iadst
// generate IADST constants
// GENERATE_IADST_CONSTANTS
// first transform rows
IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
// transpose the matrix
transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
&q14s16, &q15s16);
// generate IDCT constants
// GENERATE_IDCT_CONSTANTS
// then transform columns
IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
break;
case 3: // iadst_iadst
// generate IADST constants
// GENERATE_IADST_CONSTANTS
// first transform rows
IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
// transpose the matrix
transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
&q14s16, &q15s16);
// then transform columns
IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
break;
default: // iadst_idct
assert(0);
break; break;
} }
idct8x8_add8x8_neon(a, dest, stride); q8s16 = vrshrq_n_s16(q8s16, 5);
q9s16 = vrshrq_n_s16(q9s16, 5);
q10s16 = vrshrq_n_s16(q10s16, 5);
q11s16 = vrshrq_n_s16(q11s16, 5);
q12s16 = vrshrq_n_s16(q12s16, 5);
q13s16 = vrshrq_n_s16(q13s16, 5);
q14s16 = vrshrq_n_s16(q14s16, 5);
q15s16 = vrshrq_n_s16(q15s16, 5);
for (d1 = d2 = dest, i = 0; i < 2; i++) {
if (i != 0) {
q8s16 = q12s16;
q9s16 = q13s16;
q10s16 = q14s16;
q11s16 = q15s16;
}
d0u64 = vld1_u64((uint64_t *)d1);
d1 += stride;
d1u64 = vld1_u64((uint64_t *)d1);
d1 += stride;
d2u64 = vld1_u64((uint64_t *)d1);
d1 += stride;
d3u64 = vld1_u64((uint64_t *)d1);
d1 += stride;
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
q10u16 =
vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
q11u16 =
vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
d2 += stride;
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
d2 += stride;
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
d2 += stride;
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
d2 += stride;
}
} }

View File

@@ -1,60 +0,0 @@
/*
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
#define VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
#include <arm_neon.h>
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
#include "vpx_dsp/arm/idct_neon.h"
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/txfm_common.h"
static INLINE void iadst4(int16x8_t *const io) {
const int32x4_t c3 = vdupq_n_s32(sinpi_3_9);
int16x4_t x[4];
int32x4_t s[8], output[4];
const int16x4_t c =
create_s16x4_neon(sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9);
x[0] = vget_low_s16(io[0]);
x[1] = vget_low_s16(io[1]);
x[2] = vget_high_s16(io[0]);
x[3] = vget_high_s16(io[1]);
s[0] = vmull_lane_s16(x[0], c, 0);
s[1] = vmull_lane_s16(x[0], c, 1);
s[2] = vmull_lane_s16(x[1], c, 2);
s[3] = vmull_lane_s16(x[2], c, 3);
s[4] = vmull_lane_s16(x[2], c, 0);
s[5] = vmull_lane_s16(x[3], c, 1);
s[6] = vmull_lane_s16(x[3], c, 3);
s[7] = vaddl_s16(x[0], x[3]);
s[7] = vsubw_s16(s[7], x[2]);
s[0] = vaddq_s32(s[0], s[3]);
s[0] = vaddq_s32(s[0], s[5]);
s[1] = vsubq_s32(s[1], s[4]);
s[1] = vsubq_s32(s[1], s[6]);
s[3] = s[2];
s[2] = vmulq_s32(c3, s[7]);
output[0] = vaddq_s32(s[0], s[3]);
output[1] = vaddq_s32(s[1], s[3]);
output[2] = s[2];
output[3] = vaddq_s32(s[0], s[1]);
output[3] = vsubq_s32(output[3], s[3]);
dct_const_round_shift_low_8_dual(output, &io[0], &io[1]);
}
#endif // VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_

View File

@@ -42,7 +42,6 @@ const vpx_prob vp9_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254,
177, 153, 140, 133, 130, 129 }; 177, 153, 140, 133, 130, 129 };
#endif #endif
/* clang-format off */
const uint8_t vp9_coefband_trans_8x8plus[1024] = { const uint8_t vp9_coefband_trans_8x8plus[1024] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
// beyond MAXBAND_INDEX+1 all values are filled as 5 // beyond MAXBAND_INDEX+1 all values are filled as 5
@@ -86,7 +85,6 @@ const uint8_t vp9_coefband_trans_8x8plus[1024] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
}; };
/* clang-format on */
const uint8_t vp9_coefband_trans_4x4[16] = { const uint8_t vp9_coefband_trans_4x4[16] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,

View File

@@ -137,6 +137,7 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
// 128 lists of probabilities are stored for the following ONE node probs: // 128 lists of probabilities are stored for the following ONE node probs:
// 1, 3, 5, 7, ..., 253, 255 // 1, 3, 5, 7, ..., 253, 255
// In between probabilities are interpolated linearly // In between probabilities are interpolated linearly
#define COEFF_PROB_MODELS 255 #define COEFF_PROB_MODELS 255
#define UNCONSTRAINED_NODES 3 #define UNCONSTRAINED_NODES 3

View File

@@ -186,19 +186,16 @@ const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] =
{ 93, 24, 99 }, // a split, l not split { 93, 24, 99 }, // a split, l not split
{ 85, 119, 44 }, // l split, a not split { 85, 119, 44 }, // l split, a not split
{ 62, 59, 67 }, // a/l both split { 62, 59, 67 }, // a/l both split
// 16x16 -> 8x8 // 16x16 -> 8x8
{ 149, 53, 53 }, // a/l both not split { 149, 53, 53 }, // a/l both not split
{ 94, 20, 48 }, // a split, l not split { 94, 20, 48 }, // a split, l not split
{ 83, 53, 24 }, // l split, a not split { 83, 53, 24 }, // l split, a not split
{ 52, 18, 18 }, // a/l both split { 52, 18, 18 }, // a/l both split
// 32x32 -> 16x16 // 32x32 -> 16x16
{ 150, 40, 39 }, // a/l both not split { 150, 40, 39 }, // a/l both not split
{ 78, 12, 26 }, // a split, l not split { 78, 12, 26 }, // a split, l not split
{ 67, 33, 11 }, // l split, a not split { 67, 33, 11 }, // l split, a not split
{ 24, 7, 5 }, // a/l both split { 24, 7, 5 }, // a/l both split
// 64x64 -> 32x32 // 64x64 -> 32x32
{ 174, 35, 49 }, // a/l both not split { 174, 35, 49 }, // a/l both not split
{ 68, 11, 27 }, // a split, l not split { 68, 11, 27 }, // a split, l not split

View File

@@ -22,7 +22,9 @@ const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = {
18, -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10, 18, -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10,
}; };
const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { -0, -1 }; const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = {
-0, -1,
};
const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1, const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1,
4, -2, -3 }; 4, -2, -3 };

View File

@@ -1174,7 +1174,7 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
} }
// Disable filtering on the leftmost column // Disable filtering on the leftmost column
border_mask = ~(mi_col == 0 ? 1 : 0); border_mask = ~(mi_col == 0);
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) { if (cm->use_highbitdepth) {
highbd_filter_selectively_vert( highbd_filter_selectively_vert(

View File

@@ -229,7 +229,8 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
else else
pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME);
} else { } else {
pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || pred_context = 1 +
2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
edge_mi->ref_frame[1] == GOLDEN_FRAME); edge_mi->ref_frame[1] == GOLDEN_FRAME);
} }
} else { // inter/inter } else { // inter/inter

View File

@@ -1,13 +1,3 @@
##
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
## tree. An additional intellectual property rights grant can be found
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
sub vp9_common_forward_decls() { sub vp9_common_forward_decls() {
print <<EOF print <<EOF
/* /*
@@ -67,13 +57,13 @@ add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *outp
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
# Note that there are more specializations appended when # Note that there are more specializations appended when
# CONFIG_VP9_HIGHBITDEPTH is off. # CONFIG_VP9_HIGHBITDEPTH is off.
specialize qw/vp9_iht4x4_16_add neon sse2/; specialize qw/vp9_iht4x4_16_add sse2/;
specialize qw/vp9_iht8x8_64_add sse2/; specialize qw/vp9_iht8x8_64_add sse2/;
specialize qw/vp9_iht16x16_256_add sse2/; specialize qw/vp9_iht16x16_256_add sse2/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") { if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
# Note that these specializations are appended to the above ones. # Note that these specializations are appended to the above ones.
specialize qw/vp9_iht4x4_16_add dspr2 msa/; specialize qw/vp9_iht4x4_16_add neon dspr2 msa/;
specialize qw/vp9_iht8x8_64_add dspr2 msa/; specialize qw/vp9_iht8x8_64_add neon dspr2 msa/;
specialize qw/vp9_iht16x16_256_add dspr2 msa/; specialize qw/vp9_iht16x16_256_add dspr2 msa/;
} }
} }
@@ -101,12 +91,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd"; add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd";
add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *output, int pitch, int tx_type, int bd"; add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *output, int pitch, int tx_type, int bd";
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
specialize qw/vp9_highbd_iht4x4_16_add neon sse4_1/;
specialize qw/vp9_highbd_iht8x8_64_add sse4_1/;
specialize qw/vp9_highbd_iht16x16_256_add sse4_1/;
}
} }
# #
@@ -129,7 +113,7 @@ add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_
add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size"; add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size";
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp neon sse2 avx2/, "$ssse3_x86_64"; specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64";
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp_32x32 neon/, "$ssse3_x86_64"; specialize qw/vp9_quantize_fp_32x32 neon/, "$ssse3_x86_64";

View File

@@ -1,419 +0,0 @@
/*
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h"
#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/transpose_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in,
const int c,
__m128i *const s) {
const __m128i pair_c = pair_set_epi32(4 * c, 0);
__m128i x[2];
extend_64bit(in, x);
s[0] = _mm_mul_epi32(pair_c, x[0]);
s[1] = _mm_mul_epi32(pair_c, x[1]);
}
static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0,
const __m128i in1,
const int c0, const int c1,
__m128i *const s0,
__m128i *const s1) {
const __m128i pair_c0 = pair_set_epi32(4 * c0, 0);
const __m128i pair_c1 = pair_set_epi32(4 * c1, 0);
__m128i t00[2], t01[2], t10[2], t11[2];
__m128i x0[2], x1[2];
extend_64bit(in0, x0);
extend_64bit(in1, x1);
t00[0] = _mm_mul_epi32(pair_c0, x0[0]);
t00[1] = _mm_mul_epi32(pair_c0, x0[1]);
t01[0] = _mm_mul_epi32(pair_c0, x1[0]);
t01[1] = _mm_mul_epi32(pair_c0, x1[1]);
t10[0] = _mm_mul_epi32(pair_c1, x0[0]);
t10[1] = _mm_mul_epi32(pair_c1, x0[1]);
t11[0] = _mm_mul_epi32(pair_c1, x1[0]);
t11[1] = _mm_mul_epi32(pair_c1, x1[1]);
s0[0] = _mm_add_epi64(t00[0], t11[0]);
s0[1] = _mm_add_epi64(t00[1], t11[1]);
s1[0] = _mm_sub_epi64(t10[0], t01[0]);
s1[1] = _mm_sub_epi64(t10[1], t01[1]);
}
static void highbd_iadst16_4col_sse4_1(__m128i *const io /*io[16]*/) {
__m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2], s8[2], s9[2],
s10[2], s11[2], s12[2], s13[2], s14[2], s15[2];
__m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2], x8[2], x9[2],
x10[2], x11[2], x12[2], x13[2], x14[2], x15[2];
// stage 1
highbd_iadst_butterfly_sse4_1(io[15], io[0], cospi_1_64, cospi_31_64, s0, s1);
highbd_iadst_butterfly_sse4_1(io[13], io[2], cospi_5_64, cospi_27_64, s2, s3);
highbd_iadst_butterfly_sse4_1(io[11], io[4], cospi_9_64, cospi_23_64, s4, s5);
highbd_iadst_butterfly_sse4_1(io[9], io[6], cospi_13_64, cospi_19_64, s6, s7);
highbd_iadst_butterfly_sse4_1(io[7], io[8], cospi_17_64, cospi_15_64, s8, s9);
highbd_iadst_butterfly_sse4_1(io[5], io[10], cospi_21_64, cospi_11_64, s10,
s11);
highbd_iadst_butterfly_sse4_1(io[3], io[12], cospi_25_64, cospi_7_64, s12,
s13);
highbd_iadst_butterfly_sse4_1(io[1], io[14], cospi_29_64, cospi_3_64, s14,
s15);
x0[0] = _mm_add_epi64(s0[0], s8[0]);
x0[1] = _mm_add_epi64(s0[1], s8[1]);
x1[0] = _mm_add_epi64(s1[0], s9[0]);
x1[1] = _mm_add_epi64(s1[1], s9[1]);
x2[0] = _mm_add_epi64(s2[0], s10[0]);
x2[1] = _mm_add_epi64(s2[1], s10[1]);
x3[0] = _mm_add_epi64(s3[0], s11[0]);
x3[1] = _mm_add_epi64(s3[1], s11[1]);
x4[0] = _mm_add_epi64(s4[0], s12[0]);
x4[1] = _mm_add_epi64(s4[1], s12[1]);
x5[0] = _mm_add_epi64(s5[0], s13[0]);
x5[1] = _mm_add_epi64(s5[1], s13[1]);
x6[0] = _mm_add_epi64(s6[0], s14[0]);
x6[1] = _mm_add_epi64(s6[1], s14[1]);
x7[0] = _mm_add_epi64(s7[0], s15[0]);
x7[1] = _mm_add_epi64(s7[1], s15[1]);
x8[0] = _mm_sub_epi64(s0[0], s8[0]);
x8[1] = _mm_sub_epi64(s0[1], s8[1]);
x9[0] = _mm_sub_epi64(s1[0], s9[0]);
x9[1] = _mm_sub_epi64(s1[1], s9[1]);
x10[0] = _mm_sub_epi64(s2[0], s10[0]);
x10[1] = _mm_sub_epi64(s2[1], s10[1]);
x11[0] = _mm_sub_epi64(s3[0], s11[0]);
x11[1] = _mm_sub_epi64(s3[1], s11[1]);
x12[0] = _mm_sub_epi64(s4[0], s12[0]);
x12[1] = _mm_sub_epi64(s4[1], s12[1]);
x13[0] = _mm_sub_epi64(s5[0], s13[0]);
x13[1] = _mm_sub_epi64(s5[1], s13[1]);
x14[0] = _mm_sub_epi64(s6[0], s14[0]);
x14[1] = _mm_sub_epi64(s6[1], s14[1]);
x15[0] = _mm_sub_epi64(s7[0], s15[0]);
x15[1] = _mm_sub_epi64(s7[1], s15[1]);
x0[0] = dct_const_round_shift_64bit(x0[0]);
x0[1] = dct_const_round_shift_64bit(x0[1]);
x1[0] = dct_const_round_shift_64bit(x1[0]);
x1[1] = dct_const_round_shift_64bit(x1[1]);
x2[0] = dct_const_round_shift_64bit(x2[0]);
x2[1] = dct_const_round_shift_64bit(x2[1]);
x3[0] = dct_const_round_shift_64bit(x3[0]);
x3[1] = dct_const_round_shift_64bit(x3[1]);
x4[0] = dct_const_round_shift_64bit(x4[0]);
x4[1] = dct_const_round_shift_64bit(x4[1]);
x5[0] = dct_const_round_shift_64bit(x5[0]);
x5[1] = dct_const_round_shift_64bit(x5[1]);
x6[0] = dct_const_round_shift_64bit(x6[0]);
x6[1] = dct_const_round_shift_64bit(x6[1]);
x7[0] = dct_const_round_shift_64bit(x7[0]);
x7[1] = dct_const_round_shift_64bit(x7[1]);
x8[0] = dct_const_round_shift_64bit(x8[0]);
x8[1] = dct_const_round_shift_64bit(x8[1]);
x9[0] = dct_const_round_shift_64bit(x9[0]);
x9[1] = dct_const_round_shift_64bit(x9[1]);
x10[0] = dct_const_round_shift_64bit(x10[0]);
x10[1] = dct_const_round_shift_64bit(x10[1]);
x11[0] = dct_const_round_shift_64bit(x11[0]);
x11[1] = dct_const_round_shift_64bit(x11[1]);
x12[0] = dct_const_round_shift_64bit(x12[0]);
x12[1] = dct_const_round_shift_64bit(x12[1]);
x13[0] = dct_const_round_shift_64bit(x13[0]);
x13[1] = dct_const_round_shift_64bit(x13[1]);
x14[0] = dct_const_round_shift_64bit(x14[0]);
x14[1] = dct_const_round_shift_64bit(x14[1]);
x15[0] = dct_const_round_shift_64bit(x15[0]);
x15[1] = dct_const_round_shift_64bit(x15[1]);
x0[0] = pack_4(x0[0], x0[1]);
x1[0] = pack_4(x1[0], x1[1]);
x2[0] = pack_4(x2[0], x2[1]);
x3[0] = pack_4(x3[0], x3[1]);
x4[0] = pack_4(x4[0], x4[1]);
x5[0] = pack_4(x5[0], x5[1]);
x6[0] = pack_4(x6[0], x6[1]);
x7[0] = pack_4(x7[0], x7[1]);
x8[0] = pack_4(x8[0], x8[1]);
x9[0] = pack_4(x9[0], x9[1]);
x10[0] = pack_4(x10[0], x10[1]);
x11[0] = pack_4(x11[0], x11[1]);
x12[0] = pack_4(x12[0], x12[1]);
x13[0] = pack_4(x13[0], x13[1]);
x14[0] = pack_4(x14[0], x14[1]);
x15[0] = pack_4(x15[0], x15[1]);
// stage 2
s0[0] = x0[0];
s1[0] = x1[0];
s2[0] = x2[0];
s3[0] = x3[0];
s4[0] = x4[0];
s5[0] = x5[0];
s6[0] = x6[0];
s7[0] = x7[0];
x0[0] = _mm_add_epi32(s0[0], s4[0]);
x1[0] = _mm_add_epi32(s1[0], s5[0]);
x2[0] = _mm_add_epi32(s2[0], s6[0]);
x3[0] = _mm_add_epi32(s3[0], s7[0]);
x4[0] = _mm_sub_epi32(s0[0], s4[0]);
x5[0] = _mm_sub_epi32(s1[0], s5[0]);
x6[0] = _mm_sub_epi32(s2[0], s6[0]);
x7[0] = _mm_sub_epi32(s3[0], s7[0]);
highbd_iadst_butterfly_sse4_1(x8[0], x9[0], cospi_4_64, cospi_28_64, s8, s9);
highbd_iadst_butterfly_sse4_1(x10[0], x11[0], cospi_20_64, cospi_12_64, s10,
s11);
highbd_iadst_butterfly_sse4_1(x13[0], x12[0], cospi_28_64, cospi_4_64, s13,
s12);
highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_12_64, cospi_20_64, s15,
s14);
x8[0] = _mm_add_epi64(s8[0], s12[0]);
x8[1] = _mm_add_epi64(s8[1], s12[1]);
x9[0] = _mm_add_epi64(s9[0], s13[0]);
x9[1] = _mm_add_epi64(s9[1], s13[1]);
x10[0] = _mm_add_epi64(s10[0], s14[0]);
x10[1] = _mm_add_epi64(s10[1], s14[1]);
x11[0] = _mm_add_epi64(s11[0], s15[0]);
x11[1] = _mm_add_epi64(s11[1], s15[1]);
x12[0] = _mm_sub_epi64(s8[0], s12[0]);
x12[1] = _mm_sub_epi64(s8[1], s12[1]);
x13[0] = _mm_sub_epi64(s9[0], s13[0]);
x13[1] = _mm_sub_epi64(s9[1], s13[1]);
x14[0] = _mm_sub_epi64(s10[0], s14[0]);
x14[1] = _mm_sub_epi64(s10[1], s14[1]);
x15[0] = _mm_sub_epi64(s11[0], s15[0]);
x15[1] = _mm_sub_epi64(s11[1], s15[1]);
x8[0] = dct_const_round_shift_64bit(x8[0]);
x8[1] = dct_const_round_shift_64bit(x8[1]);
x9[0] = dct_const_round_shift_64bit(x9[0]);
x9[1] = dct_const_round_shift_64bit(x9[1]);
x10[0] = dct_const_round_shift_64bit(x10[0]);
x10[1] = dct_const_round_shift_64bit(x10[1]);
x11[0] = dct_const_round_shift_64bit(x11[0]);
x11[1] = dct_const_round_shift_64bit(x11[1]);
x12[0] = dct_const_round_shift_64bit(x12[0]);
x12[1] = dct_const_round_shift_64bit(x12[1]);
x13[0] = dct_const_round_shift_64bit(x13[0]);
x13[1] = dct_const_round_shift_64bit(x13[1]);
x14[0] = dct_const_round_shift_64bit(x14[0]);
x14[1] = dct_const_round_shift_64bit(x14[1]);
x15[0] = dct_const_round_shift_64bit(x15[0]);
x15[1] = dct_const_round_shift_64bit(x15[1]);
x8[0] = pack_4(x8[0], x8[1]);
x9[0] = pack_4(x9[0], x9[1]);
x10[0] = pack_4(x10[0], x10[1]);
x11[0] = pack_4(x11[0], x11[1]);
x12[0] = pack_4(x12[0], x12[1]);
x13[0] = pack_4(x13[0], x13[1]);
x14[0] = pack_4(x14[0], x14[1]);
x15[0] = pack_4(x15[0], x15[1]);
// stage 3
s0[0] = x0[0];
s1[0] = x1[0];
s2[0] = x2[0];
s3[0] = x3[0];
highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5);
highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6);
s8[0] = x8[0];
s9[0] = x9[0];
s10[0] = x10[0];
s11[0] = x11[0];
highbd_iadst_butterfly_sse4_1(x12[0], x13[0], cospi_8_64, cospi_24_64, s12,
s13);
highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_24_64, cospi_8_64, s15,
s14);
x0[0] = _mm_add_epi32(s0[0], s2[0]);
x1[0] = _mm_add_epi32(s1[0], s3[0]);
x2[0] = _mm_sub_epi32(s0[0], s2[0]);
x3[0] = _mm_sub_epi32(s1[0], s3[0]);
x4[0] = _mm_add_epi64(s4[0], s6[0]);
x4[1] = _mm_add_epi64(s4[1], s6[1]);
x5[0] = _mm_add_epi64(s5[0], s7[0]);
x5[1] = _mm_add_epi64(s5[1], s7[1]);
x6[0] = _mm_sub_epi64(s4[0], s6[0]);
x6[1] = _mm_sub_epi64(s4[1], s6[1]);
x7[0] = _mm_sub_epi64(s5[0], s7[0]);
x7[1] = _mm_sub_epi64(s5[1], s7[1]);
x4[0] = dct_const_round_shift_64bit(x4[0]);
x4[1] = dct_const_round_shift_64bit(x4[1]);
x5[0] = dct_const_round_shift_64bit(x5[0]);
x5[1] = dct_const_round_shift_64bit(x5[1]);
x6[0] = dct_const_round_shift_64bit(x6[0]);
x6[1] = dct_const_round_shift_64bit(x6[1]);
x7[0] = dct_const_round_shift_64bit(x7[0]);
x7[1] = dct_const_round_shift_64bit(x7[1]);
x4[0] = pack_4(x4[0], x4[1]);
x5[0] = pack_4(x5[0], x5[1]);
x6[0] = pack_4(x6[0], x6[1]);
x7[0] = pack_4(x7[0], x7[1]);
x8[0] = _mm_add_epi32(s8[0], s10[0]);
x9[0] = _mm_add_epi32(s9[0], s11[0]);
x10[0] = _mm_sub_epi32(s8[0], s10[0]);
x11[0] = _mm_sub_epi32(s9[0], s11[0]);
x12[0] = _mm_add_epi64(s12[0], s14[0]);
x12[1] = _mm_add_epi64(s12[1], s14[1]);
x13[0] = _mm_add_epi64(s13[0], s15[0]);
x13[1] = _mm_add_epi64(s13[1], s15[1]);
x14[0] = _mm_sub_epi64(s12[0], s14[0]);
x14[1] = _mm_sub_epi64(s12[1], s14[1]);
x15[0] = _mm_sub_epi64(s13[0], s15[0]);
x15[1] = _mm_sub_epi64(s13[1], s15[1]);
x12[0] = dct_const_round_shift_64bit(x12[0]);
x12[1] = dct_const_round_shift_64bit(x12[1]);
x13[0] = dct_const_round_shift_64bit(x13[0]);
x13[1] = dct_const_round_shift_64bit(x13[1]);
x14[0] = dct_const_round_shift_64bit(x14[0]);
x14[1] = dct_const_round_shift_64bit(x14[1]);
x15[0] = dct_const_round_shift_64bit(x15[0]);
x15[1] = dct_const_round_shift_64bit(x15[1]);
x12[0] = pack_4(x12[0], x12[1]);
x13[0] = pack_4(x13[0], x13[1]);
x14[0] = pack_4(x14[0], x14[1]);
x15[0] = pack_4(x15[0], x15[1]);
// stage 4
s2[0] = _mm_add_epi32(x2[0], x3[0]);
s3[0] = _mm_sub_epi32(x2[0], x3[0]);
s6[0] = _mm_add_epi32(x7[0], x6[0]);
s7[0] = _mm_sub_epi32(x7[0], x6[0]);
s10[0] = _mm_add_epi32(x11[0], x10[0]);
s11[0] = _mm_sub_epi32(x11[0], x10[0]);
s14[0] = _mm_add_epi32(x14[0], x15[0]);
s15[0] = _mm_sub_epi32(x14[0], x15[0]);
highbd_iadst_half_butterfly_sse4_1(s2[0], -cospi_16_64, s2);
highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3);
highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6);
highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7);
highbd_iadst_half_butterfly_sse4_1(s10[0], cospi_16_64, s10);
highbd_iadst_half_butterfly_sse4_1(s11[0], cospi_16_64, s11);
highbd_iadst_half_butterfly_sse4_1(s14[0], -cospi_16_64, s14);
highbd_iadst_half_butterfly_sse4_1(s15[0], cospi_16_64, s15);
x2[0] = dct_const_round_shift_64bit(s2[0]);
x2[1] = dct_const_round_shift_64bit(s2[1]);
x3[0] = dct_const_round_shift_64bit(s3[0]);
x3[1] = dct_const_round_shift_64bit(s3[1]);
x6[0] = dct_const_round_shift_64bit(s6[0]);
x6[1] = dct_const_round_shift_64bit(s6[1]);
x7[0] = dct_const_round_shift_64bit(s7[0]);
x7[1] = dct_const_round_shift_64bit(s7[1]);
x10[0] = dct_const_round_shift_64bit(s10[0]);
x10[1] = dct_const_round_shift_64bit(s10[1]);
x11[0] = dct_const_round_shift_64bit(s11[0]);
x11[1] = dct_const_round_shift_64bit(s11[1]);
x14[0] = dct_const_round_shift_64bit(s14[0]);
x14[1] = dct_const_round_shift_64bit(s14[1]);
x15[0] = dct_const_round_shift_64bit(s15[0]);
x15[1] = dct_const_round_shift_64bit(s15[1]);
x2[0] = pack_4(x2[0], x2[1]);
x3[0] = pack_4(x3[0], x3[1]);
x6[0] = pack_4(x6[0], x6[1]);
x7[0] = pack_4(x7[0], x7[1]);
x10[0] = pack_4(x10[0], x10[1]);
x11[0] = pack_4(x11[0], x11[1]);
x14[0] = pack_4(x14[0], x14[1]);
x15[0] = pack_4(x15[0], x15[1]);
io[0] = x0[0];
io[1] = _mm_sub_epi32(_mm_setzero_si128(), x8[0]);
io[2] = x12[0];
io[3] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]);
io[4] = x6[0];
io[5] = x14[0];
io[6] = x10[0];
io[7] = x2[0];
io[8] = x3[0];
io[9] = x11[0];
io[10] = x15[0];
io[11] = x7[0];
io[12] = x5[0];
io[13] = _mm_sub_epi32(_mm_setzero_si128(), x13[0]);
io[14] = x9[0];
io[15] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]);
}
void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t *input, uint16_t *dest,
int stride, int tx_type, int bd) {
int i;
__m128i out[16], *in;
if (bd == 8) {
__m128i l[16], r[16];
in = l;
for (i = 0; i < 2; i++) {
highbd_load_pack_transpose_32bit_8x8(&input[0], 16, &in[0]);
highbd_load_pack_transpose_32bit_8x8(&input[8], 16, &in[8]);
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
idct16_8col(in, in);
} else {
vpx_iadst16_8col_sse2(in);
}
in = r;
input += 128;
}
for (i = 0; i < 16; i += 8) {
int j;
transpose_16bit_8x8(l + i, out);
transpose_16bit_8x8(r + i, out + 8);
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
idct16_8col(out, out);
} else {
vpx_iadst16_8col_sse2(out);
}
for (j = 0; j < 16; ++j) {
highbd_write_buffer_8(dest + j * stride, out[j], bd);
}
dest += 8;
}
} else {
__m128i all[4][16];
for (i = 0; i < 4; i++) {
in = all[i];
highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]);
highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]);
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
vpx_highbd_idct16_4col_sse4_1(in);
} else {
highbd_iadst16_4col_sse4_1(in);
}
input += 4 * 16;
}
for (i = 0; i < 16; i += 4) {
int j;
transpose_32bit_4x4(all[0] + i, out + 0);
transpose_32bit_4x4(all[1] + i, out + 4);
transpose_32bit_4x4(all[2] + i, out + 8);
transpose_32bit_4x4(all[3] + i, out + 12);
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
vpx_highbd_idct16_4col_sse4_1(out);
} else {
highbd_iadst16_4col_sse4_1(out);
}
for (j = 0; j < 16; ++j) {
highbd_write_buffer_4(dest + j * stride, out[j], bd);
}
dest += 4;
}
}
}

View File

@@ -1,131 +0,0 @@
/*
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h"
#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/transpose_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
static INLINE void highbd_iadst4_sse4_1(__m128i *const io) {
const __m128i pair_c1 = pair_set_epi32(4 * sinpi_1_9, 0);
const __m128i pair_c2 = pair_set_epi32(4 * sinpi_2_9, 0);
const __m128i pair_c3 = pair_set_epi32(4 * sinpi_3_9, 0);
const __m128i pair_c4 = pair_set_epi32(4 * sinpi_4_9, 0);
__m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], t0[2], t1[2], t2[2];
__m128i temp[2];
transpose_32bit_4x4(io, io);
extend_64bit(io[0], temp);
s0[0] = _mm_mul_epi32(pair_c1, temp[0]);
s0[1] = _mm_mul_epi32(pair_c1, temp[1]);
s1[0] = _mm_mul_epi32(pair_c2, temp[0]);
s1[1] = _mm_mul_epi32(pair_c2, temp[1]);
extend_64bit(io[1], temp);
s2[0] = _mm_mul_epi32(pair_c3, temp[0]);
s2[1] = _mm_mul_epi32(pair_c3, temp[1]);
extend_64bit(io[2], temp);
s3[0] = _mm_mul_epi32(pair_c4, temp[0]);
s3[1] = _mm_mul_epi32(pair_c4, temp[1]);
s4[0] = _mm_mul_epi32(pair_c1, temp[0]);
s4[1] = _mm_mul_epi32(pair_c1, temp[1]);
extend_64bit(io[3], temp);
s5[0] = _mm_mul_epi32(pair_c2, temp[0]);
s5[1] = _mm_mul_epi32(pair_c2, temp[1]);
s6[0] = _mm_mul_epi32(pair_c4, temp[0]);
s6[1] = _mm_mul_epi32(pair_c4, temp[1]);
t0[0] = _mm_add_epi64(s0[0], s3[0]);
t0[1] = _mm_add_epi64(s0[1], s3[1]);
t0[0] = _mm_add_epi64(t0[0], s5[0]);
t0[1] = _mm_add_epi64(t0[1], s5[1]);
t1[0] = _mm_sub_epi64(s1[0], s4[0]);
t1[1] = _mm_sub_epi64(s1[1], s4[1]);
t1[0] = _mm_sub_epi64(t1[0], s6[0]);
t1[1] = _mm_sub_epi64(t1[1], s6[1]);
temp[0] = _mm_sub_epi32(io[0], io[2]);
temp[0] = _mm_add_epi32(temp[0], io[3]);
extend_64bit(temp[0], temp);
t2[0] = _mm_mul_epi32(pair_c3, temp[0]);
t2[1] = _mm_mul_epi32(pair_c3, temp[1]);
s0[0] = _mm_add_epi64(t0[0], s2[0]);
s0[1] = _mm_add_epi64(t0[1], s2[1]);
s1[0] = _mm_add_epi64(t1[0], s2[0]);
s1[1] = _mm_add_epi64(t1[1], s2[1]);
s3[0] = _mm_add_epi64(t0[0], t1[0]);
s3[1] = _mm_add_epi64(t0[1], t1[1]);
s3[0] = _mm_sub_epi64(s3[0], s2[0]);
s3[1] = _mm_sub_epi64(s3[1], s2[1]);
s0[0] = dct_const_round_shift_64bit(s0[0]);
s0[1] = dct_const_round_shift_64bit(s0[1]);
s1[0] = dct_const_round_shift_64bit(s1[0]);
s1[1] = dct_const_round_shift_64bit(s1[1]);
s2[0] = dct_const_round_shift_64bit(t2[0]);
s2[1] = dct_const_round_shift_64bit(t2[1]);
s3[0] = dct_const_round_shift_64bit(s3[0]);
s3[1] = dct_const_round_shift_64bit(s3[1]);
io[0] = pack_4(s0[0], s0[1]);
io[1] = pack_4(s1[0], s1[1]);
io[2] = pack_4(s2[0], s2[1]);
io[3] = pack_4(s3[0], s3[1]);
}
void vp9_highbd_iht4x4_16_add_sse4_1(const tran_low_t *input, uint16_t *dest,
int stride, int tx_type, int bd) {
__m128i io[4];
io[0] = _mm_load_si128((const __m128i *)(input + 0));
io[1] = _mm_load_si128((const __m128i *)(input + 4));
io[2] = _mm_load_si128((const __m128i *)(input + 8));
io[3] = _mm_load_si128((const __m128i *)(input + 12));
if (bd == 8) {
__m128i io_short[2];
io_short[0] = _mm_packs_epi32(io[0], io[1]);
io_short[1] = _mm_packs_epi32(io[2], io[3]);
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
idct4_sse2(io_short);
} else {
iadst4_sse2(io_short);
}
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
idct4_sse2(io_short);
} else {
iadst4_sse2(io_short);
}
io_short[0] = _mm_add_epi16(io_short[0], _mm_set1_epi16(8));
io_short[1] = _mm_add_epi16(io_short[1], _mm_set1_epi16(8));
io[0] = _mm_srai_epi16(io_short[0], 4);
io[1] = _mm_srai_epi16(io_short[1], 4);
} else {
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
highbd_idct4_sse4_1(io);
} else {
highbd_iadst4_sse4_1(io);
}
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
highbd_idct4_sse4_1(io);
} else {
highbd_iadst4_sse4_1(io);
}
io[0] = wraplow_16bit_shift4(io[0], io[1], _mm_set1_epi32(8));
io[1] = wraplow_16bit_shift4(io[2], io[3], _mm_set1_epi32(8));
}
recon_and_store_4x4(io, dest, stride, bd);
}

View File

@@ -1,255 +0,0 @@
/*
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h"
#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/transpose_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in,
const int c,
__m128i *const s) {
const __m128i pair_c = pair_set_epi32(4 * c, 0);
__m128i x[2];
extend_64bit(in, x);
s[0] = _mm_mul_epi32(pair_c, x[0]);
s[1] = _mm_mul_epi32(pair_c, x[1]);
}
static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0,
const __m128i in1,
const int c0, const int c1,
__m128i *const s0,
__m128i *const s1) {
const __m128i pair_c0 = pair_set_epi32(4 * c0, 0);
const __m128i pair_c1 = pair_set_epi32(4 * c1, 0);
__m128i t00[2], t01[2], t10[2], t11[2];
__m128i x0[2], x1[2];
extend_64bit(in0, x0);
extend_64bit(in1, x1);
t00[0] = _mm_mul_epi32(pair_c0, x0[0]);
t00[1] = _mm_mul_epi32(pair_c0, x0[1]);
t01[0] = _mm_mul_epi32(pair_c0, x1[0]);
t01[1] = _mm_mul_epi32(pair_c0, x1[1]);
t10[0] = _mm_mul_epi32(pair_c1, x0[0]);
t10[1] = _mm_mul_epi32(pair_c1, x0[1]);
t11[0] = _mm_mul_epi32(pair_c1, x1[0]);
t11[1] = _mm_mul_epi32(pair_c1, x1[1]);
s0[0] = _mm_add_epi64(t00[0], t11[0]);
s0[1] = _mm_add_epi64(t00[1], t11[1]);
s1[0] = _mm_sub_epi64(t10[0], t01[0]);
s1[1] = _mm_sub_epi64(t10[1], t01[1]);
}
static void highbd_iadst8_sse4_1(__m128i *const io) {
__m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
__m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2];
transpose_32bit_4x4x2(io, io);
// stage 1
highbd_iadst_butterfly_sse4_1(io[7], io[0], cospi_2_64, cospi_30_64, s0, s1);
highbd_iadst_butterfly_sse4_1(io[3], io[4], cospi_18_64, cospi_14_64, s4, s5);
x0[0] = _mm_add_epi64(s0[0], s4[0]);
x0[1] = _mm_add_epi64(s0[1], s4[1]);
x1[0] = _mm_add_epi64(s1[0], s5[0]);
x1[1] = _mm_add_epi64(s1[1], s5[1]);
x4[0] = _mm_sub_epi64(s0[0], s4[0]);
x4[1] = _mm_sub_epi64(s0[1], s4[1]);
x5[0] = _mm_sub_epi64(s1[0], s5[0]);
x5[1] = _mm_sub_epi64(s1[1], s5[1]);
highbd_iadst_butterfly_sse4_1(io[5], io[2], cospi_10_64, cospi_22_64, s2, s3);
highbd_iadst_butterfly_sse4_1(io[1], io[6], cospi_26_64, cospi_6_64, s6, s7);
x2[0] = _mm_add_epi64(s2[0], s6[0]);
x2[1] = _mm_add_epi64(s2[1], s6[1]);
x3[0] = _mm_add_epi64(s3[0], s7[0]);
x3[1] = _mm_add_epi64(s3[1], s7[1]);
x6[0] = _mm_sub_epi64(s2[0], s6[0]);
x6[1] = _mm_sub_epi64(s2[1], s6[1]);
x7[0] = _mm_sub_epi64(s3[0], s7[0]);
x7[1] = _mm_sub_epi64(s3[1], s7[1]);
x0[0] = dct_const_round_shift_64bit(x0[0]);
x0[1] = dct_const_round_shift_64bit(x0[1]);
x1[0] = dct_const_round_shift_64bit(x1[0]);
x1[1] = dct_const_round_shift_64bit(x1[1]);
x2[0] = dct_const_round_shift_64bit(x2[0]);
x2[1] = dct_const_round_shift_64bit(x2[1]);
x3[0] = dct_const_round_shift_64bit(x3[0]);
x3[1] = dct_const_round_shift_64bit(x3[1]);
x4[0] = dct_const_round_shift_64bit(x4[0]);
x4[1] = dct_const_round_shift_64bit(x4[1]);
x5[0] = dct_const_round_shift_64bit(x5[0]);
x5[1] = dct_const_round_shift_64bit(x5[1]);
x6[0] = dct_const_round_shift_64bit(x6[0]);
x6[1] = dct_const_round_shift_64bit(x6[1]);
x7[0] = dct_const_round_shift_64bit(x7[0]);
x7[1] = dct_const_round_shift_64bit(x7[1]);
s0[0] = pack_4(x0[0], x0[1]); // s0 = x0;
s1[0] = pack_4(x1[0], x1[1]); // s1 = x1;
s2[0] = pack_4(x2[0], x2[1]); // s2 = x2;
s3[0] = pack_4(x3[0], x3[1]); // s3 = x3;
x4[0] = pack_4(x4[0], x4[1]);
x5[0] = pack_4(x5[0], x5[1]);
x6[0] = pack_4(x6[0], x6[1]);
x7[0] = pack_4(x7[0], x7[1]);
// stage 2
x0[0] = _mm_add_epi32(s0[0], s2[0]);
x1[0] = _mm_add_epi32(s1[0], s3[0]);
x2[0] = _mm_sub_epi32(s0[0], s2[0]);
x3[0] = _mm_sub_epi32(s1[0], s3[0]);
highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5);
highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6);
x4[0] = _mm_add_epi64(s4[0], s6[0]);
x4[1] = _mm_add_epi64(s4[1], s6[1]);
x5[0] = _mm_add_epi64(s5[0], s7[0]);
x5[1] = _mm_add_epi64(s5[1], s7[1]);
x6[0] = _mm_sub_epi64(s4[0], s6[0]);
x6[1] = _mm_sub_epi64(s4[1], s6[1]);
x7[0] = _mm_sub_epi64(s5[0], s7[0]);
x7[1] = _mm_sub_epi64(s5[1], s7[1]);
x4[0] = dct_const_round_shift_64bit(x4[0]);
x4[1] = dct_const_round_shift_64bit(x4[1]);
x5[0] = dct_const_round_shift_64bit(x5[0]);
x5[1] = dct_const_round_shift_64bit(x5[1]);
x6[0] = dct_const_round_shift_64bit(x6[0]);
x6[1] = dct_const_round_shift_64bit(x6[1]);
x7[0] = dct_const_round_shift_64bit(x7[0]);
x7[1] = dct_const_round_shift_64bit(x7[1]);
x4[0] = pack_4(x4[0], x4[1]);
x5[0] = pack_4(x5[0], x5[1]);
x6[0] = pack_4(x6[0], x6[1]);
x7[0] = pack_4(x7[0], x7[1]);
// stage 3
s2[0] = _mm_add_epi32(x2[0], x3[0]);
s3[0] = _mm_sub_epi32(x2[0], x3[0]);
s6[0] = _mm_add_epi32(x6[0], x7[0]);
s7[0] = _mm_sub_epi32(x6[0], x7[0]);
highbd_iadst_half_butterfly_sse4_1(s2[0], cospi_16_64, s2);
highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3);
highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6);
highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7);
x2[0] = dct_const_round_shift_64bit(s2[0]);
x2[1] = dct_const_round_shift_64bit(s2[1]);
x3[0] = dct_const_round_shift_64bit(s3[0]);
x3[1] = dct_const_round_shift_64bit(s3[1]);
x6[0] = dct_const_round_shift_64bit(s6[0]);
x6[1] = dct_const_round_shift_64bit(s6[1]);
x7[0] = dct_const_round_shift_64bit(s7[0]);
x7[1] = dct_const_round_shift_64bit(s7[1]);
x2[0] = pack_4(x2[0], x2[1]);
x3[0] = pack_4(x3[0], x3[1]);
x6[0] = pack_4(x6[0], x6[1]);
x7[0] = pack_4(x7[0], x7[1]);
io[0] = x0[0];
io[1] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]);
io[2] = x6[0];
io[3] = _mm_sub_epi32(_mm_setzero_si128(), x2[0]);
io[4] = x3[0];
io[5] = _mm_sub_epi32(_mm_setzero_si128(), x7[0]);
io[6] = x5[0];
io[7] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]);
}
void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest,
int stride, int tx_type, int bd) {
__m128i io[16];
io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0));
io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4));
io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0));
io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4));
io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0));
io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4));
io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0));
io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4));
io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0));
io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4));
io[9] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 0));
io[13] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 4));
io[10] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 0));
io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4));
io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0));
io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4));
if (bd == 8) {
__m128i io_short[8];
io_short[0] = _mm_packs_epi32(io[0], io[4]);
io_short[1] = _mm_packs_epi32(io[1], io[5]);
io_short[2] = _mm_packs_epi32(io[2], io[6]);
io_short[3] = _mm_packs_epi32(io[3], io[7]);
io_short[4] = _mm_packs_epi32(io[8], io[12]);
io_short[5] = _mm_packs_epi32(io[9], io[13]);
io_short[6] = _mm_packs_epi32(io[10], io[14]);
io_short[7] = _mm_packs_epi32(io[11], io[15]);
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
vpx_idct8_sse2(io_short);
} else {
iadst8_sse2(io_short);
}
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
vpx_idct8_sse2(io_short);
} else {
iadst8_sse2(io_short);
}
round_shift_8x8(io_short, io);
} else {
__m128i temp[4];
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
vpx_highbd_idct8x8_half1d_sse4_1(io);
vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
} else {
highbd_iadst8_sse4_1(io);
highbd_iadst8_sse4_1(&io[8]);
}
temp[0] = io[4];
temp[1] = io[5];
temp[2] = io[6];
temp[3] = io[7];
io[4] = io[8];
io[5] = io[9];
io[6] = io[10];
io[7] = io[11];
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
vpx_highbd_idct8x8_half1d_sse4_1(io);
io[8] = temp[0];
io[9] = temp[1];
io[10] = temp[2];
io[11] = temp[3];
vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
} else {
highbd_iadst8_sse4_1(io);
io[8] = temp[0];
io[9] = temp[1];
io[10] = temp[2];
io[11] = temp[3];
highbd_iadst8_sse4_1(&io[8]);
}
highbd_idct8x8_final_round(io);
}
recon_and_store_8x8(io, dest, stride, bd);
}

View File

@@ -10,6 +10,8 @@
#include "./vp9_rtcd.h" #include "./vp9_rtcd.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) { int tx_type) {
@@ -20,23 +22,23 @@ void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in[1] = load_input_data8(input + 8); in[1] = load_input_data8(input + 8);
switch (tx_type) { switch (tx_type) {
case DCT_DCT: case 0: // DCT_DCT
idct4_sse2(in); idct4_sse2(in);
idct4_sse2(in); idct4_sse2(in);
break; break;
case ADST_DCT: case 1: // ADST_DCT
idct4_sse2(in); idct4_sse2(in);
iadst4_sse2(in); iadst4_sse2(in);
break; break;
case DCT_ADST: case 2: // DCT_ADST
iadst4_sse2(in); iadst4_sse2(in);
idct4_sse2(in); idct4_sse2(in);
break; break;
default: case 3: // ADST_ADST
assert(tx_type == ADST_ADST);
iadst4_sse2(in); iadst4_sse2(in);
iadst4_sse2(in); iadst4_sse2(in);
break; break;
default: assert(0); break;
} }
// Final round and shift // Final round and shift
@@ -65,23 +67,23 @@ void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in[7] = load_input_data8(input + 8 * 7); in[7] = load_input_data8(input + 8 * 7);
switch (tx_type) { switch (tx_type) {
case DCT_DCT: case 0: // DCT_DCT
vpx_idct8_sse2(in); idct8_sse2(in);
vpx_idct8_sse2(in); idct8_sse2(in);
break; break;
case ADST_DCT: case 1: // ADST_DCT
vpx_idct8_sse2(in); idct8_sse2(in);
iadst8_sse2(in); iadst8_sse2(in);
break; break;
case DCT_ADST: case 2: // DCT_ADST
iadst8_sse2(in); iadst8_sse2(in);
vpx_idct8_sse2(in); idct8_sse2(in);
break; break;
default: case 3: // ADST_ADST
assert(tx_type == ADST_ADST);
iadst8_sse2(in); iadst8_sse2(in);
iadst8_sse2(in); iadst8_sse2(in);
break; break;
default: assert(0); break;
} }
// Final rounding and shift // Final rounding and shift
@@ -199,23 +201,23 @@ void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
load_buffer_8x16(input, in1); load_buffer_8x16(input, in1);
switch (tx_type) { switch (tx_type) {
case DCT_DCT: case 0: // DCT_DCT
idct16_sse2(in0, in1); idct16_sse2(in0, in1);
idct16_sse2(in0, in1); idct16_sse2(in0, in1);
break; break;
case ADST_DCT: case 1: // ADST_DCT
idct16_sse2(in0, in1); idct16_sse2(in0, in1);
iadst16_sse2(in0, in1); iadst16_sse2(in0, in1);
break; break;
case DCT_ADST: case 2: // DCT_ADST
iadst16_sse2(in0, in1); iadst16_sse2(in0, in1);
idct16_sse2(in0, in1); idct16_sse2(in0, in1);
break; break;
default: case 3: // ADST_ADST
assert(tx_type == ADST_ADST);
iadst16_sse2(in0, in1); iadst16_sse2(in0, in1);
iadst16_sse2(in0, in1); iadst16_sse2(in0, in1);
break; break;
default: assert(0); break;
} }
write_buffer_8x16(dest, in0, stride); write_buffer_8x16(dest, in0, stride);

View File

@@ -464,6 +464,10 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
cr->rate_ratio_qdelta = VPXMAX(cr->rate_ratio_qdelta, 2.5); cr->rate_ratio_qdelta = VPXMAX(cr->rate_ratio_qdelta, 2.5);
} }
} }
if (cpi->svc.spatial_layer_id > 0) {
cr->motion_thresh = 4;
cr->rate_boost_fac = 12;
}
if (cpi->oxcf.rc_mode == VPX_VBR) { if (cpi->oxcf.rc_mode == VPX_VBR) {
// To be adjusted for VBR mode, e.g., based on gf period and boost. // To be adjusted for VBR mode, e.g., based on gf period and boost.
// For now use smaller qp-delta (than CBR), no second boosted seg, and // For now use smaller qp-delta (than CBR), no second boosted seg, and

View File

@@ -12,10 +12,7 @@
#include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_encoder.h"
static const BLOCK_SIZE square[] = { static const BLOCK_SIZE square[] = {
BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64,
BLOCK_16X16,
BLOCK_32X32,
BLOCK_64X64,
}; };
static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk, static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,

View File

@@ -189,12 +189,11 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx, int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx,
int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv, int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv,
int num_spatial_layers, int width, int lst_fb_idx, int gld_fb_idx, int num_spatial_layers, int width, int lst_fb_idx, int gld_fb_idx,
int use_svc, int spatial_layer) { int use_svc) {
const int sse_diff = (ctx->newmv_sse == UINT_MAX) const int sse_diff = (ctx->newmv_sse == UINT_MAX)
? 0 ? 0
: ((int)ctx->zeromv_sse - (int)ctx->newmv_sse); : ((int)ctx->zeromv_sse - (int)ctx->newmv_sse);
int frame; int frame;
int denoise_layer_idx = 0;
MACROBLOCKD *filter_mbd = &mb->e_mbd; MACROBLOCKD *filter_mbd = &mb->e_mbd;
MODE_INFO *mi = filter_mbd->mi[0]; MODE_INFO *mi = filter_mbd->mi[0];
MODE_INFO saved_mi; MODE_INFO saved_mi;
@@ -255,10 +254,6 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
frame = lst_fb_idx + 1; frame = lst_fb_idx + 1;
else if (frame == GOLDEN_FRAME) else if (frame == GOLDEN_FRAME)
frame = gld_fb_idx + 1; frame = gld_fb_idx + 1;
// Shift for the second spatial layer.
if (num_spatial_layers - spatial_layer == 2)
frame = frame + denoiser->num_ref_frames;
denoise_layer_idx = num_spatial_layers - spatial_layer - 1;
} }
if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) { if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) {
@@ -294,21 +289,18 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col); denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col);
filter_mbd->plane[2].pre[0].stride = denoiser->running_avg_y[frame].uv_stride; filter_mbd->plane[2].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;
filter_mbd->plane[0].dst.buf = block_start( filter_mbd->plane[0].dst.buf =
denoiser->mc_running_avg_y[denoise_layer_idx].y_buffer, block_start(denoiser->mc_running_avg_y.y_buffer,
denoiser->mc_running_avg_y[denoise_layer_idx].y_stride, mi_row, mi_col); denoiser->mc_running_avg_y.y_stride, mi_row, mi_col);
filter_mbd->plane[0].dst.stride = filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride;
denoiser->mc_running_avg_y[denoise_layer_idx].y_stride; filter_mbd->plane[1].dst.buf =
filter_mbd->plane[1].dst.buf = block_start( block_start(denoiser->mc_running_avg_y.u_buffer,
denoiser->mc_running_avg_y[denoise_layer_idx].u_buffer, denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col); filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride;
filter_mbd->plane[1].dst.stride = filter_mbd->plane[2].dst.buf =
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride; block_start(denoiser->mc_running_avg_y.v_buffer,
filter_mbd->plane[2].dst.buf = block_start( denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
denoiser->mc_running_avg_y[denoise_layer_idx].v_buffer, filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride;
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col);
filter_mbd->plane[2].dst.stride =
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride;
set_ref_ptrs(cm, filter_mbd, saved_frame, NONE); set_ref_ptrs(cm, filter_mbd, saved_frame, NONE);
vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs); vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs);
@@ -332,17 +324,9 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
int zeromv_filter = 0; int zeromv_filter = 0;
VP9_DENOISER *denoiser = &cpi->denoiser; VP9_DENOISER *denoiser = &cpi->denoiser;
VP9_DENOISER_DECISION decision = COPY_BLOCK; VP9_DENOISER_DECISION decision = COPY_BLOCK;
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
const int shift = YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2
? denoiser->num_ref_frames
: 0;
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME + shift];
const int denoise_layer_index =
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id - 1;
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y[denoise_layer_index];
uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col); uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
uint8_t *mc_avg_start = uint8_t *mc_avg_start =
block_start(mc_avg.y_buffer, mc_avg.y_stride, mi_row, mi_col); block_start(mc_avg.y_buffer, mc_avg.y_stride, mi_row, mi_col);
struct buf_2d src = mb->plane[0].src; struct buf_2d src = mb->plane[0].src;
@@ -397,7 +381,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
&cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx, &cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,
motion_magnitude, is_skin, &zeromv_filter, consec_zeromv, motion_magnitude, is_skin, &zeromv_filter, consec_zeromv,
cpi->svc.number_spatial_layers, cpi->Source->y_width, cpi->lst_fb_idx, cpi->svc.number_spatial_layers, cpi->Source->y_width, cpi->lst_fb_idx,
cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id); cpi->gld_fb_idx, cpi->use_svc);
if (decision == FILTER_BLOCK) { if (decision == FILTER_BLOCK) {
decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start, decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start,
@@ -448,8 +432,7 @@ void vp9_denoiser_update_frame_info(
VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame, int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,
int svc_base_is_key, int second_spatial_layer) { int svc_base_is_key) {
const int shift = second_spatial_layer ? denoiser->num_ref_frames : 0;
// Copy source into denoised reference buffers on KEY_FRAME or // Copy source into denoised reference buffers on KEY_FRAME or
// if the just encoded frame was resized. For SVC, copy source if the base // if the just encoded frame was resized. For SVC, copy source if the base
// spatial layer was key frame. // spatial layer was key frame.
@@ -458,8 +441,8 @@ void vp9_denoiser_update_frame_info(
int i; int i;
// Start at 1 so as not to overwrite the INTRA_FRAME // Start at 1 so as not to overwrite the INTRA_FRAME
for (i = 1; i < denoiser->num_ref_frames; ++i) { for (i = 1; i < denoiser->num_ref_frames; ++i) {
if (denoiser->running_avg_y[i + shift].buffer_alloc != NULL) if (denoiser->running_avg_y[i].buffer_alloc != NULL)
copy_frame(&denoiser->running_avg_y[i + shift], &src); copy_frame(&denoiser->running_avg_y[i], &src);
} }
denoiser->reset = 0; denoiser->reset = 0;
return; return;
@@ -468,29 +451,29 @@ void vp9_denoiser_update_frame_info(
// If more than one refresh occurs, must copy frame buffer. // If more than one refresh occurs, must copy frame buffer.
if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > 1) { if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > 1) {
if (refresh_alt_ref_frame) { if (refresh_alt_ref_frame) {
copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1 + shift], copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME + shift]); &denoiser->running_avg_y[INTRA_FRAME]);
} }
if (refresh_golden_frame) { if (refresh_golden_frame) {
copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1 + shift], copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME + shift]); &denoiser->running_avg_y[INTRA_FRAME]);
} }
if (refresh_last_frame) { if (refresh_last_frame) {
copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1 + shift], copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME + shift]); &denoiser->running_avg_y[INTRA_FRAME]);
} }
} else { } else {
if (refresh_alt_ref_frame) { if (refresh_alt_ref_frame) {
swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1 + shift], swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME + shift]); &denoiser->running_avg_y[INTRA_FRAME]);
} }
if (refresh_golden_frame) { if (refresh_golden_frame) {
swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1 + shift], swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME + shift]); &denoiser->running_avg_y[INTRA_FRAME]);
} }
if (refresh_last_frame) { if (refresh_last_frame) {
swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1 + shift], swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME + shift]); &denoiser->running_avg_y[INTRA_FRAME]);
} }
} }
} }
@@ -539,75 +522,44 @@ static int vp9_denoiser_realloc_svc_helper(VP9_COMMON *cm,
} }
int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser, int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser,
int svc_buf_shift, int refresh_alt, int refresh_alt, int refresh_gld, int refresh_lst,
int refresh_gld, int refresh_lst, int alt_fb_idx, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx) {
int gld_fb_idx, int lst_fb_idx) {
int fail = 0; int fail = 0;
if (refresh_alt) { if (refresh_alt) {
// Increase the frame buffer index by 1 to map it to the buffer index in the // Increase the frame buffer index by 1 to map it to the buffer index in the
// denoiser. // denoiser.
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, alt_fb_idx + 1);
alt_fb_idx + 1 + svc_buf_shift);
if (fail) return 1; if (fail) return 1;
} }
if (refresh_gld) { if (refresh_gld) {
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, gld_fb_idx + 1);
gld_fb_idx + 1 + svc_buf_shift);
if (fail) return 1; if (fail) return 1;
} }
if (refresh_lst) { if (refresh_lst) {
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, lst_fb_idx + 1);
lst_fb_idx + 1 + svc_buf_shift);
if (fail) return 1; if (fail) return 1;
} }
return 0; return 0;
} }
int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser, int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser,
int use_svc, int noise_sen, int width, int height, int width, int height, int ssx, int ssy,
int ssx, int ssy,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth, int use_highbitdepth,
#endif #endif
int border) { int border) {
int i, layer, fail, init_num_ref_frames; int i, fail, init_num_ref_frames;
const int legacy_byte_alignment = 0; const int legacy_byte_alignment = 0;
int num_layers = 1;
int scaled_width = width;
int scaled_height = height;
if (use_svc) {
LAYER_CONTEXT *lc = &svc->layer_context[svc->spatial_layer_id *
svc->number_temporal_layers +
svc->temporal_layer_id];
get_layer_resolution(width, height, lc->scaling_factor_num,
lc->scaling_factor_den, &scaled_width, &scaled_height);
// For SVC: only denoise at most 2 spatial (highest) layers.
if (noise_sen >= 2)
// Denoise from one spatial layer below the top.
svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 2, 0);
else
// Only denoise the top spatial layer.
svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 1, 0);
num_layers = svc->number_spatial_layers - svc->first_layer_denoise;
}
assert(denoiser != NULL); assert(denoiser != NULL);
denoiser->num_ref_frames = use_svc ? SVC_REF_FRAMES : NONSVC_REF_FRAMES; denoiser->num_ref_frames = use_svc ? SVC_REF_FRAMES : NONSVC_REF_FRAMES;
init_num_ref_frames = use_svc ? MAX_REF_FRAMES : NONSVC_REF_FRAMES; init_num_ref_frames = use_svc ? MAX_REF_FRAMES : NONSVC_REF_FRAMES;
denoiser->num_layers = num_layers;
CHECK_MEM_ERROR(cm, denoiser->running_avg_y,
vpx_calloc(denoiser->num_ref_frames * num_layers,
sizeof(denoiser->running_avg_y[0])));
CHECK_MEM_ERROR( CHECK_MEM_ERROR(
cm, denoiser->mc_running_avg_y, cm, denoiser->running_avg_y,
vpx_calloc(num_layers, sizeof(denoiser->mc_running_avg_y[0]))); vpx_calloc(denoiser->num_ref_frames, sizeof(denoiser->running_avg_y[0])));
for (layer = 0; layer < num_layers; ++layer) {
const int denoise_width = (layer == 0) ? width : scaled_width;
const int denoise_height = (layer == 0) ? height : scaled_height;
for (i = 0; i < init_num_ref_frames; ++i) { for (i = 0; i < init_num_ref_frames; ++i) {
fail = vpx_alloc_frame_buffer( fail = vpx_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height,
&denoiser->running_avg_y[i + denoiser->num_ref_frames * layer], ssx, ssy,
denoise_width, denoise_height, ssx, ssy,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth, use_highbitdepth,
#endif #endif
@@ -621,8 +573,8 @@ int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
#endif #endif
} }
fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y[layer], fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height, ssx,
denoise_width, denoise_height, ssx, ssy, ssy,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth, use_highbitdepth,
#endif #endif
@@ -631,10 +583,7 @@ int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
vp9_denoiser_free(denoiser); vp9_denoiser_free(denoiser);
return 1; return 1;
} }
}
// denoiser->last_source only used for noise_estimation, so only for top
// layer.
fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height, ssx, ssy, fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height, ssx, ssy,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth, use_highbitdepth,
@@ -660,18 +609,12 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser) {
return; return;
} }
denoiser->frame_buffer_initialized = 0; denoiser->frame_buffer_initialized = 0;
for (i = 0; i < denoiser->num_ref_frames * denoiser->num_layers; ++i) { for (i = 0; i < denoiser->num_ref_frames; ++i) {
vpx_free_frame_buffer(&denoiser->running_avg_y[i]); vpx_free_frame_buffer(&denoiser->running_avg_y[i]);
} }
vpx_free(denoiser->running_avg_y); vpx_free(denoiser->running_avg_y);
denoiser->running_avg_y = NULL; denoiser->running_avg_y = NULL;
vpx_free_frame_buffer(&denoiser->mc_running_avg_y);
for (i = 0; i < denoiser->num_layers; ++i) {
vpx_free_frame_buffer(&denoiser->mc_running_avg_y[i]);
}
vpx_free(denoiser->mc_running_avg_y);
denoiser->mc_running_avg_y = NULL;
vpx_free_frame_buffer(&denoiser->last_source); vpx_free_frame_buffer(&denoiser->last_source);
} }

View File

@@ -44,12 +44,11 @@ typedef enum vp9_denoiser_level {
typedef struct vp9_denoiser { typedef struct vp9_denoiser {
YV12_BUFFER_CONFIG *running_avg_y; YV12_BUFFER_CONFIG *running_avg_y;
YV12_BUFFER_CONFIG *mc_running_avg_y; YV12_BUFFER_CONFIG mc_running_avg_y;
YV12_BUFFER_CONFIG last_source; YV12_BUFFER_CONFIG last_source;
int frame_buffer_initialized; int frame_buffer_initialized;
int reset; int reset;
int num_ref_frames; int num_ref_frames;
int num_layers;
VP9_DENOISER_LEVEL denoising_level; VP9_DENOISER_LEVEL denoising_level;
VP9_DENOISER_LEVEL prev_denoising_level; VP9_DENOISER_LEVEL prev_denoising_level;
} VP9_DENOISER; } VP9_DENOISER;
@@ -67,13 +66,12 @@ typedef struct {
} VP9_PICKMODE_CTX_DEN; } VP9_PICKMODE_CTX_DEN;
struct VP9_COMP; struct VP9_COMP;
struct SVC;
void vp9_denoiser_update_frame_info( void vp9_denoiser_update_frame_info(
VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame, int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,
int svc_base_is_key, int second_spatial_layer); int svc_base_is_key);
void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,
@@ -86,13 +84,11 @@ void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse,
PICK_MODE_CONTEXT *ctx); PICK_MODE_CONTEXT *ctx);
int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser, int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser,
int svc_buf_shift, int refresh_alt, int refresh_alt, int refresh_gld, int refresh_lst,
int refresh_gld, int refresh_lst, int alt_fb_idx, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx);
int gld_fb_idx, int lst_fb_idx);
int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser, int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser,
int use_svc, int noise_sen, int width, int height, int width, int height, int ssx, int ssy,
int ssx, int ssy,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth, int use_highbitdepth,
#endif #endif

View File

@@ -1513,8 +1513,8 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
} }
} }
} }
if (is_key_frame || if (is_key_frame || (low_res &&
(low_res && vt.split[i].split[j].part_variances.none.variance > vt.split[i].split[j].part_variances.none.variance >
threshold_4x4avg)) { threshold_4x4avg)) {
force_split[split_index] = 0; force_split[split_index] = 0;
// Go down to 4x4 down-sampling for variance. // Go down to 4x4 down-sampling for variance.
@@ -3403,8 +3403,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
// Rate and distortion based partition search termination clause. // Rate and distortion based partition search termination clause.
if (!cpi->sf.ml_partition_search_early_termination && if (!cpi->sf.ml_partition_search_early_termination &&
!x->e_mbd.lossless && !x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
((best_rdc.dist < (dist_breakout_thr >> 2)) ||
(best_rdc.dist < dist_breakout_thr && (best_rdc.dist < dist_breakout_thr &&
best_rdc.rate < rate_breakout_thr))) { best_rdc.rate < rate_breakout_thr))) {
do_rect = 0; do_rect = 0;
@@ -4621,9 +4620,8 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) { if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
if (cpi->tile_data != NULL) vpx_free(cpi->tile_data); if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
CHECK_MEM_ERROR( CHECK_MEM_ERROR(cm, cpi->tile_data, vpx_malloc(tile_cols * tile_rows *
cm, cpi->tile_data, sizeof(*cpi->tile_data)));
vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
cpi->allocated_tiles = tile_cols * tile_rows; cpi->allocated_tiles = tile_cols * tile_rows;
for (tile_row = 0; tile_row < tile_rows; ++tile_row) for (tile_row = 0; tile_row < tile_rows; ++tile_row)

View File

@@ -50,8 +50,7 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
} }
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
{ 10, 6 }, { 10, 6 }, { 8, 5 },
{ 8, 5 },
}; };
// 'num' can be negative, but 'shift' must be non-negative. // 'num' can be negative, but 'shift' must be non-negative.
@@ -201,8 +200,8 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
const int band_next = band_translate[i + 1]; const int band_next = band_translate[i + 1];
const int token_next = const int token_next =
(i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN; (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS] unsigned int(
[ENTROPY_TOKENS] = *const token_costs_next)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
token_costs + band_next; token_costs + band_next;
token_cache[rc] = vp9_pt_energy_class[t0]; token_cache[rc] = vp9_pt_energy_class[t0];
ctx_next = get_coef_context(nb, token_cache, i + 1); ctx_next = get_coef_context(nb, token_cache, i + 1);

View File

@@ -65,12 +65,12 @@
#define AM_SEGMENT_ID_INACTIVE 7 #define AM_SEGMENT_ID_INACTIVE 7
#define AM_SEGMENT_ID_ACTIVE 0 #define AM_SEGMENT_ID_ACTIVE 0
// Whether to use high precision mv for altref computation. #define ALTREF_HIGH_PRECISION_MV 1 // Whether to use high precision mv
#define ALTREF_HIGH_PRECISION_MV 1 // for altref computation.
#define HIGH_PRECISION_MV_QTHRESH 200 // Q threshold for high precision
// Q threshold for high precision mv. Choose a very high value for now so that // mv. Choose a very high value for
// HIGH_PRECISION is always chosen. // now so that HIGH_PRECISION is always
#define HIGH_PRECISION_MV_QTHRESH 200 // chosen.
#define FRAME_SIZE_FACTOR 128 // empirical params for context model threshold #define FRAME_SIZE_FACTOR 128 // empirical params for context model threshold
#define FRAME_RATE_FACTOR 8 #define FRAME_RATE_FACTOR 8
@@ -437,37 +437,34 @@ static int is_psnr_calc_enabled(VP9_COMP *cpi) {
/* clang-format off */ /* clang-format off */
const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = { const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
// sample rate size breadth bitrate cpb { LEVEL_1, 829440, 36864, 200, 400, 2, 1, 4, 8 },
{ LEVEL_1, 829440, 36864, 512, 200, 400, 2, 1, 4, 8 }, { LEVEL_1_1, 2764800, 73728, 800, 1000, 2, 1, 4, 8 },
{ LEVEL_1_1, 2764800, 73728, 768, 800, 1000, 2, 1, 4, 8 }, { LEVEL_2, 4608000, 122880, 1800, 1500, 2, 1, 4, 8 },
{ LEVEL_2, 4608000, 122880, 960, 1800, 1500, 2, 1, 4, 8 }, { LEVEL_2_1, 9216000, 245760, 3600, 2800, 2, 2, 4, 8 },
{ LEVEL_2_1, 9216000, 245760, 1344, 3600, 2800, 2, 2, 4, 8 }, { LEVEL_3, 20736000, 552960, 7200, 6000, 2, 4, 4, 8 },
{ LEVEL_3, 20736000, 552960, 2048, 7200, 6000, 2, 4, 4, 8 }, { LEVEL_3_1, 36864000, 983040, 12000, 10000, 2, 4, 4, 8 },
{ LEVEL_3_1, 36864000, 983040, 2752, 12000, 10000, 2, 4, 4, 8 }, { LEVEL_4, 83558400, 2228224, 18000, 16000, 4, 4, 4, 8 },
{ LEVEL_4, 83558400, 2228224, 4160, 18000, 16000, 4, 4, 4, 8 }, { LEVEL_4_1, 160432128, 2228224, 30000, 18000, 4, 4, 5, 6 },
{ LEVEL_4_1, 160432128, 2228224, 4160, 30000, 18000, 4, 4, 5, 6 }, { LEVEL_5, 311951360, 8912896, 60000, 36000, 6, 8, 6, 4 },
{ LEVEL_5, 311951360, 8912896, 8384, 60000, 36000, 6, 8, 6, 4 }, { LEVEL_5_1, 588251136, 8912896, 120000, 46000, 8, 8, 10, 4 },
{ LEVEL_5_1, 588251136, 8912896, 8384, 120000, 46000, 8, 8, 10, 4 },
// TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
// they are finalized (currently tentative). // they are finalized (currently tentative).
{ LEVEL_5_2, 1176502272, 8912896, 8384, 180000, 90000, 8, 8, 10, 4 }, { LEVEL_5_2, 1176502272, 8912896, 180000, 90000, 8, 8, 10, 4 },
{ LEVEL_6, 1176502272, 35651584, 16832, 180000, 90000, 8, 16, 10, 4 }, { LEVEL_6, 1176502272, 35651584, 180000, 90000, 8, 16, 10, 4 },
{ LEVEL_6_1, 2353004544u, 35651584, 16832, 240000, 180000, 8, 16, 10, 4 }, { LEVEL_6_1, 2353004544u, 35651584, 240000, 180000, 8, 16, 10, 4 },
{ LEVEL_6_2, 4706009088u, 35651584, 16832, 480000, 360000, 8, 16, 10, 4 }, { LEVEL_6_2, 4706009088u, 35651584, 480000, 360000, 8, 16, 10, 4 },
}; };
/* clang-format on */ /* clang-format on */
static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] = { static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] =
"The average bit-rate is too high.", { "The average bit-rate is too high.",
"The picture size is too large.", "The picture size is too large.",
"The picture width/height is too large.",
"The luma sample rate is too large.", "The luma sample rate is too large.",
"The CPB size is too large.", "The CPB size is too large.",
"The compression ratio is too small", "The compression ratio is too small",
"Too many column tiles are used.", "Too many column tiles are used.",
"The alt-ref distance is too small.", "The alt-ref distance is too small.",
"Too many reference buffers are used." "Too many reference buffers are used." };
};
static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
switch (mode) { switch (mode) {
@@ -547,74 +544,6 @@ static void apply_active_map(VP9_COMP *cpi) {
} }
} }
static void apply_roi_map(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
struct segmentation *const seg = &cm->seg;
vpx_roi_map_t *roi = &cpi->roi;
const int *delta_q = roi->delta_q;
const int *delta_lf = roi->delta_lf;
const int *skip = roi->skip;
int ref_frame[8];
int internal_delta_q[MAX_SEGMENTS];
int i;
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
// TODO(jianj): Investigate why ROI not working in speed < 5 or in non
// realtime mode.
if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return;
if (!roi->enabled) return;
memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame));
vp9_enable_segmentation(seg);
vp9_clearall_segfeatures(seg);
// Select delta coding method;
seg->abs_delta = SEGMENT_DELTADATA;
memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols));
for (i = 0; i < MAX_SEGMENTS; ++i) {
// Translate the external delta q values to internal values.
internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i]));
if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i];
vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
if (internal_delta_q[i] != 0) {
vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]);
}
if (delta_lf[i] != 0) {
vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]);
}
if (skip[i] != 0) {
vp9_enable_segfeature(seg, i, SEG_LVL_SKIP);
vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]);
}
if (ref_frame[i] >= 0) {
int valid_ref = 1;
// ALTREF is not used as reference for nonrd_pickmode with 0 lag.
if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode)
valid_ref = 0;
// If GOLDEN is selected, make sure it's set as reference.
if (ref_frame[i] == GOLDEN_FRAME &&
!(cpi->ref_frame_flags & flag_list[ref_frame[i]])) {
valid_ref = 0;
}
// GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are
// same reference.
if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0)
ref_frame[i] = LAST_FRAME;
if (valid_ref) {
vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME);
vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]);
}
}
}
roi->enabled = 1;
}
static void init_level_info(Vp9LevelInfo *level_info) { static void init_level_info(Vp9LevelInfo *level_info) {
Vp9LevelStats *const level_stats = &level_info->level_stats; Vp9LevelStats *const level_stats = &level_info->level_stats;
Vp9LevelSpec *const level_spec = &level_info->level_spec; Vp9LevelSpec *const level_spec = &level_info->level_spec;
@@ -625,13 +554,6 @@ static void init_level_info(Vp9LevelInfo *level_info) {
level_spec->min_altref_distance = INT_MAX; level_spec->min_altref_distance = INT_MAX;
} }
static int check_seg_range(int seg_data[8], int range) {
return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
abs(seg_data[6]) > range || abs(seg_data[7]) > range);
}
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) { VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
int i; int i;
const Vp9LevelSpec *this_level; const Vp9LevelSpec *this_level;
@@ -644,8 +566,6 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
(double)this_level->max_luma_sample_rate * (double)this_level->max_luma_sample_rate *
(1 + SAMPLE_RATE_GRACE_P) || (1 + SAMPLE_RATE_GRACE_P) ||
level_spec->max_luma_picture_size > this_level->max_luma_picture_size || level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
level_spec->max_luma_picture_breadth >
this_level->max_luma_picture_breadth ||
level_spec->average_bitrate > this_level->average_bitrate || level_spec->average_bitrate > this_level->average_bitrate ||
level_spec->max_cpb_size > this_level->max_cpb_size || level_spec->max_cpb_size > this_level->max_cpb_size ||
level_spec->compression_ratio < this_level->compression_ratio || level_spec->compression_ratio < this_level->compression_ratio ||
@@ -658,61 +578,6 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level; return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
} }
int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
unsigned int cols, int delta_q[8], int delta_lf[8],
int skip[8], int ref_frame[8]) {
VP9_COMMON *cm = &cpi->common;
vpx_roi_map_t *roi = &cpi->roi;
const int range = 63;
const int ref_frame_range = 3; // Alt-ref
const int skip_range = 1;
const int frame_rows = cpi->common.mi_rows;
const int frame_cols = cpi->common.mi_cols;
// Check number of rows and columns match
if (frame_rows != (int)rows || frame_cols != (int)cols) {
return -1;
}
if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
!check_seg_range(ref_frame, ref_frame_range) ||
!check_seg_range(skip, skip_range))
return -1;
// Also disable segmentation if no deltas are specified.
if (!map ||
(!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] |
delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] |
delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] |
delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] |
skip[5] | skip[6] | skip[7]) &&
(ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 &&
ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 &&
ref_frame[6] == -1 && ref_frame[7] == -1))) {
vp9_disable_segmentation(&cm->seg);
cpi->roi.enabled = 0;
return 0;
}
if (roi->roi_map) {
vpx_free(roi->roi_map);
roi->roi_map = NULL;
}
CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
// Copy to ROI sturcture in the compressor.
memcpy(roi->roi_map, map, rows * cols);
memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0]));
memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0]));
memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0]));
memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0]));
roi->enabled = 1;
roi->rows = rows;
roi->cols = cols;
return 0;
}
int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows, int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
int cols) { int cols) {
if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) { if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
@@ -947,9 +812,6 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vpx_free(cpi->active_map.map); vpx_free(cpi->active_map.map);
cpi->active_map.map = NULL; cpi->active_map.map = NULL;
vpx_free(cpi->roi.roi_map);
cpi->roi.roi_map = NULL;
vpx_free(cpi->consec_zero_mv); vpx_free(cpi->consec_zero_mv);
cpi->consec_zero_mv = NULL; cpi->consec_zero_mv = NULL;
@@ -1254,9 +1116,8 @@ static void alloc_util_frame_buffers(VP9_COMP *cpi) {
// For 1 pass cbr: allocate scaled_frame that may be used as an intermediate // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
// buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
// target of 1/4x1/4. number_spatial_layers must be greater than 2. // target of 1/4x1/4.
if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc && if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc) {
cpi->svc.number_spatial_layers > 2) {
cpi->svc.scaled_temp_is_alloc = 1; cpi->svc.scaled_temp_is_alloc = 1;
if (vpx_realloc_frame_buffer( if (vpx_realloc_frame_buffer(
&cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1, &cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1,
@@ -1358,8 +1219,8 @@ static void set_tile_limits(VP9_COMP *cpi) {
} }
if (cpi->oxcf.target_level == LEVEL_AUTO) { if (cpi->oxcf.target_level == LEVEL_AUTO) {
const int level_tile_cols = const uint32_t pic_size = cpi->common.width * cpi->common.height;
log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height); const int level_tile_cols = log_tile_cols_from_picsize_level(pic_size);
if (cm->log2_tile_cols > level_tile_cols) { if (cm->log2_tile_cols > level_tile_cols) {
cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols); cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
} }
@@ -1987,8 +1848,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv)); cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_reset_resize(cpi); vp9_cyclic_refresh_reset_resize(cpi);
rc->rc_1_frame = 0;
rc->rc_2_frame = 0;
} }
if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
@@ -1999,24 +1858,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
(int)cpi->oxcf.target_bandwidth); (int)cpi->oxcf.target_bandwidth);
} }
// Check for resetting the rc flags (rc_1_frame, rc_2_frame) if the
// configuration change has a large change in avg_frame_bandwidth.
// For SVC check for resetting based on spatial layer average bandwidth.
// Also reset buffer level to optimal level.
if (cm->current_video_frame > 0) {
if (cpi->use_svc) {
vp9_svc_check_reset_layer_rc_flag(cpi);
} else {
if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
rc->rc_1_frame = 0;
rc->rc_2_frame = 0;
rc->bits_off_target = rc->optimal_buffer_level;
rc->buffer_level = rc->optimal_buffer_level;
}
}
}
cpi->alt_ref_source = NULL; cpi->alt_ref_source = NULL;
rc->is_src_frame_alt_ref = 0; rc->is_src_frame_alt_ref = 0;
@@ -2151,9 +1992,8 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
realloc_segmentation_maps(cpi); realloc_segmentation_maps(cpi);
CHECK_MEM_ERROR( CHECK_MEM_ERROR(cm, cpi->skin_map, vpx_calloc(cm->mi_rows * cm->mi_cols,
cm, cpi->skin_map, sizeof(cpi->skin_map[0])));
vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create()); CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
@@ -3016,26 +2856,18 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
cpi->denoiser.denoising_level > kDenLowLow) { cpi->denoiser.denoising_level > kDenLowLow) {
int svc_base_is_key = 0; int svc_base_is_key = 0;
int denoise_svc_second_layer = 0;
if (cpi->use_svc) { if (cpi->use_svc) {
int realloc_fail = 0; int realloc_fail = 0;
const int svc_buf_shift =
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2
? cpi->denoiser.num_ref_frames
: 0;
int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
cpi->svc.temporal_layer_id, cpi->svc.temporal_layer_id,
cpi->svc.number_temporal_layers); cpi->svc.number_temporal_layers);
LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
svc_base_is_key = lc->is_key_frame; svc_base_is_key = lc->is_key_frame;
denoise_svc_second_layer =
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 ? 1 // Check if we need to allocate extra buffers in the denoiser for
: 0;
// Check if we need to allocate extra buffers in the denoiser
// for
// refreshed frames. // refreshed frames.
realloc_fail = vp9_denoiser_realloc_svc( realloc_fail = vp9_denoiser_realloc_svc(
cm, &cpi->denoiser, svc_buf_shift, cpi->refresh_alt_ref_frame, cm, &cpi->denoiser, cpi->refresh_alt_ref_frame,
cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx,
cpi->gld_fb_idx, cpi->lst_fb_idx); cpi->gld_fb_idx, cpi->lst_fb_idx);
if (realloc_fail) if (realloc_fail)
@@ -3046,8 +2878,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
&cpi->denoiser, *cpi->Source, cpi->common.frame_type, &cpi->denoiser, *cpi->Source, cpi->common.frame_type,
cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame, cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame,
cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx, cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx,
cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key, cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key);
denoise_svc_second_layer);
} }
#endif #endif
if (is_one_pass_cbr_svc(cpi)) { if (is_one_pass_cbr_svc(cpi)) {
@@ -3482,9 +3313,8 @@ static void setup_denoiser_buffer(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common; VP9_COMMON *const cm = &cpi->common;
if (cpi->oxcf.noise_sensitivity > 0 && if (cpi->oxcf.noise_sensitivity > 0 &&
!cpi->denoiser.frame_buffer_initialized) { !cpi->denoiser.frame_buffer_initialized) {
if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc, if (vp9_denoiser_alloc(cm, cpi->use_svc, &cpi->denoiser, cm->width,
cpi->oxcf.noise_sensitivity, cm->width, cm->height, cm->height, cm->subsampling_x, cm->subsampling_y,
cm->subsampling_x, cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth, cm->use_highbitdepth,
#endif #endif
@@ -3765,8 +3595,6 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// it may be pretty bad for rate-control, // it may be pretty bad for rate-control,
// and I should handle it somehow // and I should handle it somehow
vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi); vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
} else if (cpi->roi.enabled && cm->frame_type != KEY_FRAME) {
apply_roi_map(cpi);
} }
apply_active_map(cpi); apply_active_map(cpi);
@@ -4497,15 +4325,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
struct segmentation *const seg = &cm->seg; struct segmentation *const seg = &cm->seg;
TX_SIZE t; TX_SIZE t;
// SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
!cpi->svc.rc_drop_superframe && cpi->oxcf.target_bandwidth == 0) {
cpi->svc.skip_enhancement_layer = 1;
vp9_rc_postencode_update_drop_frame(cpi);
cpi->ext_refresh_frame_flags_pending = 0;
return;
}
set_ext_overrides(cpi); set_ext_overrides(cpi);
vpx_clear_system_state(); vpx_clear_system_state();
@@ -4597,6 +4416,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
if (vp9_rc_drop_frame(cpi) || if (vp9_rc_drop_frame(cpi) ||
(is_one_pass_cbr_svc(cpi) && cpi->svc.rc_drop_superframe == 1)) { (is_one_pass_cbr_svc(cpi) && cpi->svc.rc_drop_superframe == 1)) {
vp9_rc_postencode_update_drop_frame(cpi); vp9_rc_postencode_update_drop_frame(cpi);
++cm->current_video_frame;
cpi->ext_refresh_frame_flags_pending = 0; cpi->ext_refresh_frame_flags_pending = 0;
cpi->svc.rc_drop_superframe = 1; cpi->svc.rc_drop_superframe = 1;
cpi->last_frame_dropped = 1; cpi->last_frame_dropped = 1;
@@ -5009,7 +4829,6 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
int i, idx; int i, idx;
uint64_t luma_samples, dur_end; uint64_t luma_samples, dur_end;
const uint32_t luma_pic_size = cm->width * cm->height; const uint32_t luma_pic_size = cm->width * cm->height;
const uint32_t luma_pic_breadth = VPXMAX(cm->width, cm->height);
LevelConstraint *const level_constraint = &cpi->level_constraint; LevelConstraint *const level_constraint = &cpi->level_constraint;
const int8_t level_index = level_constraint->level_index; const int8_t level_index = level_constraint->level_index;
double cpb_data_size; double cpb_data_size;
@@ -5113,11 +4932,6 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
level_spec->max_luma_picture_size = luma_pic_size; level_spec->max_luma_picture_size = luma_pic_size;
} }
// update max_luma_picture_breadth
if (luma_pic_breadth > level_spec->max_luma_picture_breadth) {
level_spec->max_luma_picture_breadth = luma_pic_breadth;
}
// update compression_ratio // update compression_ratio
level_spec->compression_ratio = (double)level_stats->total_uncompressed_size * level_spec->compression_ratio = (double)level_stats->total_uncompressed_size *
cm->bit_depth / cm->bit_depth /
@@ -5138,15 +4952,6 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]); level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]);
} }
if (level_spec->max_luma_picture_breadth >
vp9_level_defs[level_index].max_luma_picture_breadth) {
level_constraint->fail_flag |= (1 << LUMA_PIC_BREADTH_TOO_LARGE);
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Failed to encode to the target level %d. %s",
vp9_level_defs[level_index].level,
level_fail_messages[LUMA_PIC_BREADTH_TOO_LARGE]);
}
if ((double)level_spec->max_luma_sample_rate > if ((double)level_spec->max_luma_sample_rate >
(double)vp9_level_defs[level_index].max_luma_sample_rate * (double)vp9_level_defs[level_index].max_luma_sample_rate *
(1 + SAMPLE_RATE_GRACE_P)) { (1 + SAMPLE_RATE_GRACE_P)) {
@@ -5347,6 +5152,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cm->intra_only = 0; cm->intra_only = 0;
// if the flags indicate intra frame, but if the current picture is for // if the flags indicate intra frame, but if the current picture is for
// non-zero spatial layer, it should not be an intra picture. // non-zero spatial layer, it should not be an intra picture.
// TODO(Won Kap): this needs to change if per-layer intra frame is
// allowed.
if ((source->flags & VPX_EFLAG_FORCE_KF) && if ((source->flags & VPX_EFLAG_FORCE_KF) &&
cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) { cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF); source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
@@ -5479,6 +5286,21 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
} }
#endif // CONFIG_REALTIME_ONLY #endif // CONFIG_REALTIME_ONLY
#if 1
{
VP9_COMMON *const cm = &cpi->common;
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
printf("Frame=%d, gf_group_update_type[gf_group_index=%d]=%d, "
"show_frame=%d\n",
cm->current_video_frame, gf_group->index,
gf_group->update_type[gf_group->index],
cm->show_frame);
}
#endif // 0
if (cm->refresh_frame_context) if (cm->refresh_frame_context)
cm->frame_contexts[cm->frame_context_idx] = *cm->fc; cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
@@ -5513,6 +5335,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
double samples = 0.0; double samples = 0.0;
cpi->bytes += (int)(*size); cpi->bytes += (int)(*size);
#if 1
{
printf("Frame %d: rate: %d\n",
cm->current_video_frame, (int)(*size));
}
#endif // 0
if (cm->show_frame) { if (cm->show_frame) {
uint32_t bit_depth = 8; uint32_t bit_depth = 8;
uint32_t in_bit_depth = 8; uint32_t in_bit_depth = 8;
@@ -5542,6 +5371,19 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cpi->total_samples += psnr.samples[0]; cpi->total_samples += psnr.samples[0];
samples = psnr.samples[0]; samples = psnr.samples[0];
#if 1
{
const int rddiv = cpi->rd.RDDIV;
const int rdmult = cpi->rd.RDMULT;
const int64_t rdcost = RDCOST(
rdmult, rddiv, (int)(*size) * 8, psnr.sse[0]);
printf("Frame %d: distortion: %" PRIu64 " rdcost: %" PRId64 "\n",
cm->current_video_frame, psnr.sse[0], rdcost);
printf("%d %d\n", rddiv, rdmult);
}
#endif // 0
{ {
PSNR_STATS psnr2; PSNR_STATS psnr2;
double frame_ssim2 = 0, weight = 0; double frame_ssim2 = 0, weight = 0;

View File

@@ -383,7 +383,6 @@ typedef struct {
VP9_LEVEL level; VP9_LEVEL level;
uint64_t max_luma_sample_rate; uint64_t max_luma_sample_rate;
uint32_t max_luma_picture_size; uint32_t max_luma_picture_size;
uint32_t max_luma_picture_breadth;
double average_bitrate; // in kilobits per second double average_bitrate; // in kilobits per second
double max_cpb_size; // in kilobits double max_cpb_size; // in kilobits
double compression_ratio; double compression_ratio;
@@ -423,15 +422,14 @@ typedef struct {
typedef enum { typedef enum {
BITRATE_TOO_LARGE = 0, BITRATE_TOO_LARGE = 0,
LUMA_PIC_SIZE_TOO_LARGE, LUMA_PIC_SIZE_TOO_LARGE = 1,
LUMA_PIC_BREADTH_TOO_LARGE, LUMA_SAMPLE_RATE_TOO_LARGE = 2,
LUMA_SAMPLE_RATE_TOO_LARGE, CPB_TOO_LARGE = 3,
CPB_TOO_LARGE, COMPRESSION_RATIO_TOO_SMALL = 4,
COMPRESSION_RATIO_TOO_SMALL, TOO_MANY_COLUMN_TILE = 5,
TOO_MANY_COLUMN_TILE, ALTREF_DIST_TOO_SMALL = 6,
ALTREF_DIST_TOO_SMALL, TOO_MANY_REF_BUFFER = 7,
TOO_MANY_REF_BUFFER, TARGET_LEVEL_FAIL_IDS = 8
TARGET_LEVEL_FAIL_IDS
} TARGET_LEVEL_FAIL_ID; } TARGET_LEVEL_FAIL_ID;
typedef struct { typedef struct {
@@ -723,8 +721,6 @@ typedef struct VP9_COMP {
uint8_t *count_arf_frame_usage; uint8_t *count_arf_frame_usage;
uint8_t *count_lastgolden_frame_usage; uint8_t *count_lastgolden_frame_usage;
vpx_roi_map_t roi;
} VP9_COMP; } VP9_COMP;
void vp9_initialize_enc(void); void vp9_initialize_enc(void);
@@ -870,8 +866,9 @@ static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) {
#if CONFIG_VP9_TEMPORAL_DENOISING #if CONFIG_VP9_TEMPORAL_DENOISING
static INLINE int denoise_svc(const struct VP9_COMP *const cpi) { static INLINE int denoise_svc(const struct VP9_COMP *const cpi) {
return (!cpi->use_svc || (cpi->use_svc && cpi->svc.spatial_layer_id >= return (!cpi->use_svc ||
cpi->svc.first_layer_denoise)); (cpi->use_svc &&
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1));
} }
#endif #endif
@@ -923,14 +920,10 @@ static INLINE int get_level_index(VP9_LEVEL level) {
// Return the log2 value of max column tiles corresponding to the level that // Return the log2 value of max column tiles corresponding to the level that
// the picture size fits into. // the picture size fits into.
static INLINE int log_tile_cols_from_picsize_level(uint32_t width, static INLINE int log_tile_cols_from_picsize_level(uint32_t pic_size) {
uint32_t height) {
int i; int i;
const uint32_t pic_size = width * height;
const uint32_t pic_breadth = VPXMAX(width, height);
for (i = LEVEL_1; i < LEVEL_MAX; ++i) { for (i = LEVEL_1; i < LEVEL_MAX; ++i) {
if (vp9_level_defs[i].max_luma_picture_size >= pic_size && if (vp9_level_defs[i].max_luma_picture_size > pic_size) {
vp9_level_defs[i].max_luma_picture_breadth >= pic_breadth) {
return get_msb(vp9_level_defs[i].max_col_tiles); return get_msb(vp9_level_defs[i].max_col_tiles);
} }
} }
@@ -939,10 +932,6 @@ static INLINE int log_tile_cols_from_picsize_level(uint32_t width,
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec); VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);
int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
unsigned int cols, int delta_q[8], int delta_lf[8],
int skip[8], int ref_frame[8]);
void vp9_new_framerate(VP9_COMP *cpi, double framerate); void vp9_new_framerate(VP9_COMP *cpi, double framerate);
void vp9_set_row_mt(VP9_COMP *cpi); void vp9_set_row_mt(VP9_COMP *cpi);

View File

@@ -66,8 +66,8 @@ static int get_max_tile_cols(VP9_COMP *cpi) {
log2_tile_cols = log2_tile_cols =
clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols); clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
if (cpi->oxcf.target_level == LEVEL_AUTO) { if (cpi->oxcf.target_level == LEVEL_AUTO) {
const int level_tile_cols = const uint32_t pic_size = cpi->common.width * cpi->common.height;
log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height); const int level_tile_cols = log_tile_cols_from_picsize_level(pic_size);
if (log2_tile_cols > level_tile_cols) { if (log2_tile_cols > level_tile_cols) {
log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols); log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
} }
@@ -390,9 +390,8 @@ void vp9_row_mt_sync_write_dummy(VP9RowMTSync *const row_mt_sync, int r, int c,
} }
#if !CONFIG_REALTIME_ONLY #if !CONFIG_REALTIME_ONLY
static int first_pass_worker_hook(void *arg1, void *arg2) { static int first_pass_worker_hook(EncWorkerData *const thread_data,
EncWorkerData *const thread_data = (EncWorkerData *)arg1; MultiThreadHandle *multi_thread_ctxt) {
MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
VP9_COMP *const cpi = thread_data->cpi; VP9_COMP *const cpi = thread_data->cpi;
const VP9_COMMON *const cm = &cpi->common; const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols; const int tile_cols = 1 << cm->log2_tile_cols;
@@ -471,8 +470,8 @@ void vp9_encode_fp_row_mt(VP9_COMP *cpi) {
} }
} }
launch_enc_workers(cpi, first_pass_worker_hook, multi_thread_ctxt, launch_enc_workers(cpi, (VPxWorkerHook)first_pass_worker_hook,
num_workers); multi_thread_ctxt, num_workers);
first_tile_col = &cpi->tile_data[0]; first_tile_col = &cpi->tile_data[0];
for (i = 1; i < tile_cols; i++) { for (i = 1; i < tile_cols; i++) {
@@ -481,9 +480,8 @@ void vp9_encode_fp_row_mt(VP9_COMP *cpi) {
} }
} }
static int temporal_filter_worker_hook(void *arg1, void *arg2) { static int temporal_filter_worker_hook(EncWorkerData *const thread_data,
EncWorkerData *const thread_data = (EncWorkerData *)arg1; MultiThreadHandle *multi_thread_ctxt) {
MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
VP9_COMP *const cpi = thread_data->cpi; VP9_COMP *const cpi = thread_data->cpi;
const VP9_COMMON *const cm = &cpi->common; const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols; const int tile_cols = 1 << cm->log2_tile_cols;
@@ -555,14 +553,13 @@ void vp9_temporal_filter_row_mt(VP9_COMP *cpi) {
} }
} }
launch_enc_workers(cpi, temporal_filter_worker_hook, multi_thread_ctxt, launch_enc_workers(cpi, (VPxWorkerHook)temporal_filter_worker_hook,
num_workers); multi_thread_ctxt, num_workers);
} }
#endif // !CONFIG_REALTIME_ONLY #endif // !CONFIG_REALTIME_ONLY
static int enc_row_mt_worker_hook(void *arg1, void *arg2) { static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,
EncWorkerData *const thread_data = (EncWorkerData *)arg1; MultiThreadHandle *multi_thread_ctxt) {
MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
VP9_COMP *const cpi = thread_data->cpi; VP9_COMP *const cpi = thread_data->cpi;
const VP9_COMMON *const cm = &cpi->common; const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols; const int tile_cols = 1 << cm->log2_tile_cols;
@@ -651,8 +648,8 @@ void vp9_encode_tiles_row_mt(VP9_COMP *cpi) {
} }
} }
launch_enc_workers(cpi, enc_row_mt_worker_hook, multi_thread_ctxt, launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook,
num_workers); multi_thread_ctxt, num_workers);
for (i = 0; i < num_workers; i++) { for (i = 0; i < num_workers; i++) {
VPxWorker *const worker = &cpi->workers[i]; VPxWorker *const worker = &cpi->workers[i];

View File

@@ -44,6 +44,7 @@
#define COMPLEXITY_STATS_OUTPUT 0 #define COMPLEXITY_STATS_OUTPUT 0
#define FIRST_PASS_Q 10.0 #define FIRST_PASS_Q 10.0
#define GF_MAX_BOOST 96.0
#define INTRA_MODE_PENALTY 1024 #define INTRA_MODE_PENALTY 1024
#define MIN_ARF_GF_BOOST 240 #define MIN_ARF_GF_BOOST 240
#define MIN_DECAY_FACTOR 0.01 #define MIN_DECAY_FACTOR 0.01
@@ -731,7 +732,8 @@ static void first_pass_stat_calc(VP9_COMP *cpi, FIRSTPASS_STATS *fps,
// Exclude any image dead zone // Exclude any image dead zone
if (fp_acc_data->image_data_start_row > 0) { if (fp_acc_data->image_data_start_row > 0) {
fp_acc_data->intra_skip_count = fp_acc_data->intra_skip_count =
VPXMAX(0, fp_acc_data->intra_skip_count - VPXMAX(0,
fp_acc_data->intra_skip_count -
(fp_acc_data->image_data_start_row * cm->mb_cols * 2)); (fp_acc_data->image_data_start_row * cm->mb_cols * 2));
} }
@@ -1947,7 +1949,6 @@ static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats,
} }
#define BASELINE_ERR_PER_MB 12500.0 #define BASELINE_ERR_PER_MB 12500.0
#define GF_MAX_BOOST 96.0
static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame, static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame,
double this_frame_mv_in_out) { double this_frame_mv_in_out) {
double frame_boost; double frame_boost;
@@ -2237,6 +2238,9 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
} }
gf_group->arf_update_idx[0] = arf_buffer_indices[0]; gf_group->arf_update_idx[0] = arf_buffer_indices[0];
gf_group->arf_ref_idx[0] = arf_buffer_indices[0]; gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
// Step over the golden frame / overlay frame
if (EOF == input_stats(twopass, &frame_stats)) return;
} }
// Deduct the boost bits for arf (or gf if it is not a key frame) // Deduct the boost bits for arf (or gf if it is not a key frame)
@@ -2281,8 +2285,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
// Define middle frame // Define middle frame
mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1; mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
normal_frames = normal_frames = (rc->baseline_gf_interval - rc->source_alt_ref_pending);
rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending);
if (normal_frames > 1) if (normal_frames > 1)
normal_frame_bits = (int)(total_group_bits / normal_frames); normal_frame_bits = (int)(total_group_bits / normal_frames);
else else
@@ -2380,8 +2383,6 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise,
// Analyse and define a gf/arf group. // Analyse and define a gf/arf group.
#define ARF_DECAY_BREAKOUT 0.10 #define ARF_DECAY_BREAKOUT 0.10
#define ARF_ABS_ZOOM_THRESH 4.0
static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
VP9_COMMON *const cm = &cpi->common; VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc; RATE_CONTROL *const rc = &cpi->rc;
@@ -2410,6 +2411,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double mv_in_out_accumulator = 0.0; double mv_in_out_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0;
double mv_ratio_accumulator_thresh; double mv_ratio_accumulator_thresh;
double mv_in_out_thresh;
double abs_mv_in_out_thresh; double abs_mv_in_out_thresh;
double sr_accumulator = 0.0; double sr_accumulator = 0.0;
const double av_err = get_distribution_av_err(cpi, twopass); const double av_err = get_distribution_av_err(cpi, twopass);
@@ -2455,7 +2457,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Motion breakout threshold for loop below depends on image size. // Motion breakout threshold for loop below depends on image size.
mv_ratio_accumulator_thresh = mv_ratio_accumulator_thresh =
(cpi->initial_height + cpi->initial_width) / 4.0; (cpi->initial_height + cpi->initial_width) / 4.0;
abs_mv_in_out_thresh = ARF_ABS_ZOOM_THRESH; mv_in_out_thresh = (cpi->initial_height + cpi->initial_width) / 300.0;
abs_mv_in_out_thresh = (cpi->initial_height + cpi->initial_width) / 200.0;
// Set a maximum and minimum interval for the GF group. // Set a maximum and minimum interval for the GF group.
// If the image appears almost completely static we can extend beyond this. // If the image appears almost completely static we can extend beyond this.
@@ -2540,17 +2543,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Update the accumulator for second ref error difference. // Update the accumulator for second ref error difference.
// This is intended to give an indication of how much the coded error is // This is intended to give an indication of how much the coded error is
// increasing over time. // increasing over time.
if (i == 1) {
sr_accumulator += next_frame.coded_error;
} else {
sr_accumulator += (next_frame.sr_coded_error - next_frame.coded_error); sr_accumulator += (next_frame.sr_coded_error - next_frame.coded_error);
} sr_accumulator = VPXMAX(0.0, sr_accumulator);
} }
// Break out conditions. // Break out conditions.
// Break at maximum of active_max_gf_interval unless almost totally static. if (
if (((twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) && // Break at active_max_gf_interval unless almost totally static.
(i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) || ((i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) ||
( (
// Don't break out with a very short interval. // Don't break out with a very short interval.
(i >= active_min_gf_interval) && (i >= active_min_gf_interval) &&
@@ -2559,6 +2559,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
(!flash_detected) && (!flash_detected) &&
((mv_ratio_accumulator > mv_ratio_accumulator_thresh) || ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
(abs_mv_in_out_accumulator > abs_mv_in_out_thresh) || (abs_mv_in_out_accumulator > abs_mv_in_out_thresh) ||
(mv_in_out_accumulator < -mv_in_out_thresh) ||
(sr_accumulator > next_frame.intra_error)))) { (sr_accumulator > next_frame.intra_error)))) {
break; break;
} }
@@ -2570,8 +2571,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0; rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
// Should we use the alternate reference frame. // Should we use the alternate reference frame.
if ((twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) && allow_alt_ref && if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) &&
(i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) { (i >= rc->min_gf_interval)) {
const int forward_frames = (rc->frames_to_key - i >= i - 1) const int forward_frames = (rc->frames_to_key - i >= i - 1)
? i - 1 ? i - 1
: VPXMAX(0, rc->frames_to_key - i); : VPXMAX(0, rc->frames_to_key - i);
@@ -2599,10 +2600,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#endif #endif
// Set the interval until the next gf. // Set the interval until the next gf.
rc->baseline_gf_interval = // rc->baseline_gf_interval = 8;
(twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
? (i - (is_key_frame || rc->source_alt_ref_pending))
: i;
// Only encode alt reference frame in temporal base layer. So // Only encode alt reference frame in temporal base layer. So
// baseline_gf_interval should be multiple of a temporal layer group // baseline_gf_interval should be multiple of a temporal layer group
@@ -2700,26 +2699,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#endif #endif
} }
// Intra / Inter threshold very low
#define VERY_LOW_II 1.5
// Clean slide transitions we expect a sharp single frame spike in error.
#define ERROR_SPIKE 5.0
// Slide show transition detection.
// Tests for case where there is very low error either side of the current frame
// but much higher just for this frame. This can help detect key frames in
// slide shows even where the slides are pictures of different sizes.
// Also requires that intra and inter errors are very similar to help eliminate
// harmful false positives.
// It will not help if the transition is a fade or other multi-frame effect.
static int slide_transition(const FIRSTPASS_STATS *this_frame,
const FIRSTPASS_STATS *last_frame,
const FIRSTPASS_STATS *next_frame) {
return (this_frame->intra_error < (this_frame->coded_error * VERY_LOW_II)) &&
(this_frame->coded_error > (last_frame->coded_error * ERROR_SPIKE)) &&
(this_frame->coded_error > (next_frame->coded_error * ERROR_SPIKE));
}
// Threshold for use of the lagging second reference frame. High second ref // Threshold for use of the lagging second reference frame. High second ref
// usage may point to a transient event like a flash or occlusion rather than // usage may point to a transient event like a flash or occlusion rather than
// a real scene cut. // a real scene cut.
@@ -2764,7 +2743,6 @@ static int test_candidate_kf(TWO_PASS *twopass,
if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
(next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && (next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) || ((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) ||
(slide_transition(this_frame, last_frame, next_frame)) ||
((pcnt_intra > MIN_INTRA_LEVEL) && ((pcnt_intra > MIN_INTRA_LEVEL) &&
(pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) && (pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) &&
((this_frame->intra_error / ((this_frame->intra_error /
@@ -2836,7 +2814,6 @@ static int test_candidate_kf(TWO_PASS *twopass,
#define FRAMES_TO_CHECK_DECAY 8 #define FRAMES_TO_CHECK_DECAY 8
#define MIN_KF_TOT_BOOST 300 #define MIN_KF_TOT_BOOST 300
#define KF_BOOST_SCAN_MAX_FRAMES 32 #define KF_BOOST_SCAN_MAX_FRAMES 32
#define KF_ABS_ZOOM_THRESH 6.0
#ifdef AGGRESSIVE_VBR #ifdef AGGRESSIVE_VBR
#define KF_MAX_FRAME_BOOST 80.0 #define KF_MAX_FRAME_BOOST 80.0
@@ -2864,7 +2841,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double kf_group_err = 0.0; double kf_group_err = 0.0;
double recent_loop_decay[FRAMES_TO_CHECK_DECAY]; double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
double sr_accumulator = 0.0; double sr_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
const double av_err = get_distribution_av_err(cpi, twopass); const double av_err = get_distribution_av_err(cpi, twopass);
vp9_zero(next_frame); vp9_zero(next_frame);
@@ -3029,14 +3005,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double zm_factor; double zm_factor;
// Monitor for static sections. // Monitor for static sections.
// First frame in kf group the second ref indicator is invalid.
if (i > 0) {
zero_motion_accumulator = VPXMIN( zero_motion_accumulator = VPXMIN(
zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
} else {
zero_motion_accumulator =
next_frame.pcnt_inter - next_frame.pcnt_motion;
}
// Factor 0.75-1.25 based on how much of frame is static. // Factor 0.75-1.25 based on how much of frame is static.
zm_factor = (0.75 + (zero_motion_accumulator / 2.0)); zm_factor = (0.75 + (zero_motion_accumulator / 2.0));
@@ -3050,14 +3020,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
KF_MAX_FRAME_BOOST * zm_factor); KF_MAX_FRAME_BOOST * zm_factor);
boost_score += frame_boost; boost_score += frame_boost;
if (frame_boost < 25.00) break;
// Measure of zoom. Large zoom tends to indicate reduced boost.
abs_mv_in_out_accumulator +=
fabs(next_frame.mv_in_out_count * next_frame.pcnt_motion);
if ((frame_boost < 25.00) ||
(abs_mv_in_out_accumulator > KF_ABS_ZOOM_THRESH))
break;
} else { } else {
break; break;
} }
@@ -3072,16 +3035,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->section_intra_rating = calculate_section_intra_ratio( twopass->section_intra_rating = calculate_section_intra_ratio(
start_position, twopass->stats_in_end, rc->frames_to_key); start_position, twopass->stats_in_end, rc->frames_to_key);
// Special case for static / slide show content but dont apply
// if the kf group is very short.
if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) {
rc->kf_boost = VPXMAX((rc->frames_to_key * 100), MAX_KF_TOT_BOOST);
} else {
// Apply various clamps for min and max boost // Apply various clamps for min and max boost
rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3)); rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST); rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST); rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
}
// Work out how many bits to allocate for the key frame itself. // Work out how many bits to allocate for the key frame itself.
kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost, kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost,

View File

@@ -120,12 +120,12 @@ typedef enum {
typedef struct { typedef struct {
unsigned char index; unsigned char index;
unsigned char first_inter_index; unsigned char first_inter_index;
RATE_FACTOR_LEVEL rf_level[MAX_STATIC_GF_GROUP_LENGTH + 1]; RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 1]; FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1];
unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 1]; unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
unsigned char arf_update_idx[MAX_STATIC_GF_GROUP_LENGTH + 1]; unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1];
unsigned char arf_ref_idx[MAX_STATIC_GF_GROUP_LENGTH + 1]; unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1];
int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 1]; int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1];
} GF_GROUP; } GF_GROUP;
typedef struct { typedef struct {

View File

@@ -25,9 +25,7 @@ typedef struct {
} ref[MAX_REF_FRAMES]; } ref[MAX_REF_FRAMES];
} MBGRAPH_MB_STATS; } MBGRAPH_MB_STATS;
typedef struct { typedef struct { MBGRAPH_MB_STATS *mb_stats; } MBGRAPH_FRAME_STATS;
MBGRAPH_MB_STATS *mb_stats;
} MBGRAPH_FRAME_STATS;
struct VP9_COMP; struct VP9_COMP;

View File

@@ -1785,10 +1785,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
} }
static const MV search_pos[4] = { static const MV search_pos[4] = {
{ -1, 0 }, { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
{ 0, -1 },
{ 0, 1 },
{ 1, 0 },
}; };
unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
@@ -1879,10 +1876,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
{ {
const uint8_t *const pos[4] = { const uint8_t *const pos[4] = {
ref_buf - ref_stride, ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
ref_buf - 1,
ref_buf + 1,
ref_buf + ref_stride,
}; };
cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad); cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);

View File

@@ -21,15 +21,6 @@
#include "vp9/encoder/vp9_noise_estimate.h" #include "vp9/encoder/vp9_noise_estimate.h"
#include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_encoder.h"
#if CONFIG_VP9_TEMPORAL_DENOISING
// For SVC: only do noise estimation on top spatial layer.
static INLINE int noise_est_svc(const struct VP9_COMP *const cpi) {
return (!cpi->use_svc ||
(cpi->use_svc &&
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1));
}
#endif
void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) { void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) {
ne->enabled = 0; ne->enabled = 0;
ne->level = kLowLow; ne->level = kLowLow;
@@ -54,7 +45,7 @@ static int enable_noise_estimation(VP9_COMP *const cpi) {
#endif #endif
// Enable noise estimation if denoising is on. // Enable noise estimation if denoising is on.
#if CONFIG_VP9_TEMPORAL_DENOISING #if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) && if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
cpi->common.width >= 320 && cpi->common.height >= 180) cpi->common.width >= 320 && cpi->common.height >= 180)
return 1; return 1;
#endif #endif
@@ -120,7 +111,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
// Estimate is between current source and last source. // Estimate is between current source and last source.
YV12_BUFFER_CONFIG *last_source = cpi->Last_Source; YV12_BUFFER_CONFIG *last_source = cpi->Last_Source;
#if CONFIG_VP9_TEMPORAL_DENOISING #if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) { if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) {
last_source = &cpi->denoiser.last_source; last_source = &cpi->denoiser.last_source;
// Tune these thresholds for different resolutions when denoising is // Tune these thresholds for different resolutions when denoising is
// enabled. // enabled.
@@ -140,7 +131,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
(cpi->svc.number_spatial_layers == 1 && (cpi->svc.number_spatial_layers == 1 &&
(ne->last_w != cm->width || ne->last_h != cm->height))) { (ne->last_w != cm->width || ne->last_h != cm->height))) {
#if CONFIG_VP9_TEMPORAL_DENOISING #if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))
copy_frame(&cpi->denoiser.last_source, cpi->Source); copy_frame(&cpi->denoiser.last_source, cpi->Source);
#endif #endif
if (last_source != NULL) { if (last_source != NULL) {
@@ -155,7 +146,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
ne->count = 0; ne->count = 0;
ne->num_frames_estimate = 10; ne->num_frames_estimate = 10;
#if CONFIG_VP9_TEMPORAL_DENOISING #if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) && if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
cpi->svc.current_superframe > 1) { cpi->svc.current_superframe > 1) {
vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level); vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
copy_frame(&cpi->denoiser.last_source, cpi->Source); copy_frame(&cpi->denoiser.last_source, cpi->Source);
@@ -258,7 +249,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
// Normalize. // Normalize.
avg_est = avg_est / num_samples; avg_est = avg_est / num_samples;
// Update noise estimate. // Update noise estimate.
ne->value = (int)((3 * ne->value + avg_est) >> 2); ne->value = (int)((15 * ne->value + avg_est) >> 4);
ne->count++; ne->count++;
if (ne->count == ne->num_frames_estimate) { if (ne->count == ne->num_frames_estimate) {
// Reset counter and check noise level condition. // Reset counter and check noise level condition.
@@ -266,14 +257,14 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
ne->count = 0; ne->count = 0;
ne->level = vp9_noise_estimate_extract_level(ne); ne->level = vp9_noise_estimate_extract_level(ne);
#if CONFIG_VP9_TEMPORAL_DENOISING #if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))
vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level); vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
#endif #endif
} }
} }
} }
#if CONFIG_VP9_TEMPORAL_DENOISING #if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))
copy_frame(&cpi->denoiser.last_source, cpi->Source); copy_frame(&cpi->denoiser.last_source, cpi->Source);
#endif #endif
} }

View File

@@ -1488,6 +1488,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int skip_ref_find_pred[4] = { 0 }; int skip_ref_find_pred[4] = { 0 };
unsigned int sse_zeromv_normalized = UINT_MAX; unsigned int sse_zeromv_normalized = UINT_MAX;
unsigned int best_sse_sofar = UINT_MAX; unsigned int best_sse_sofar = UINT_MAX;
unsigned int thresh_svc_skip_golden = 500;
#if CONFIG_VP9_TEMPORAL_DENOISING #if CONFIG_VP9_TEMPORAL_DENOISING
VP9_PICKMODE_CTX_DEN ctx_den; VP9_PICKMODE_CTX_DEN ctx_den;
int64_t zero_last_cost_orig = INT64_MAX; int64_t zero_last_cost_orig = INT64_MAX;
@@ -1495,23 +1496,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
#endif #endif
INTERP_FILTER filter_gf_svc = EIGHTTAP; INTERP_FILTER filter_gf_svc = EIGHTTAP;
MV_REFERENCE_FRAME best_second_ref_frame = NONE; MV_REFERENCE_FRAME best_second_ref_frame = NONE;
const struct segmentation *const seg = &cm->seg;
int comp_modes = 0; int comp_modes = 0;
int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES; int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES;
int flag_svc_subpel = 0;
int svc_mv_col = 0;
int svc_mv_row = 0;
unsigned int thresh_svc_skip_golden = 500;
// Lower the skip threshold if lower spatial layer is better quality relative
// to current layer.
if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex > 150 &&
cm->base_qindex > cpi->svc.lower_layer_qindex + 15)
thresh_svc_skip_golden = 100;
// Increase skip threshold if lower spatial layer is lower quality relative
// to current layer.
else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex < 140 &&
cm->base_qindex < cpi->svc.lower_layer_qindex - 20)
thresh_svc_skip_golden = 1000;
init_ref_frame_cost(cm, xd, ref_frame_cost); init_ref_frame_cost(cm, xd, ref_frame_cost);
@@ -1649,16 +1635,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
cpi->sf.use_compound_nonrd_pickmode && usable_ref_frame == ALTREF_FRAME) cpi->sf.use_compound_nonrd_pickmode && usable_ref_frame == ALTREF_FRAME)
comp_modes = 2; comp_modes = 2;
// If the segment reference frame feature is enabled and it's set to GOLDEN
// reference, then make sure we don't skip checking GOLDEN, this is to
// prevent possibility of not picking any mode.
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) == GOLDEN_FRAME) {
usable_ref_frame = GOLDEN_FRAME;
skip_ref_find_pred[GOLDEN_FRAME] = 0;
thresh_svc_skip_golden = 0;
}
for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) { for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
if (!skip_ref_find_pred[ref_frame]) { if (!skip_ref_find_pred[ref_frame]) {
find_predictors(cpi, x, ref_frame, frame_mv, const_motion, find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
@@ -1671,18 +1647,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (cpi->use_svc || cpi->oxcf.speed <= 7 || bsize < BLOCK_32X32) if (cpi->use_svc || cpi->oxcf.speed <= 7 || bsize < BLOCK_32X32)
x->sb_use_mv_part = 0; x->sb_use_mv_part = 0;
// Set the flag_svc_subpel to 1 for SVC if the lower spatial layer used
// an averaging filter for downsampling (phase = 8). If so, we will test
// a nonzero motion mode on the spatial (goldeen) reference.
// The nonzero motion is half pixel shifted to left and top (-4, -4).
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
svc_force_zero_mode[GOLDEN_FRAME - 1] &&
cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id - 1] == 8) {
svc_mv_col = -4;
svc_mv_row = -4;
flag_svc_subpel = 1;
}
for (idx = 0; idx < num_inter_modes + comp_modes; ++idx) { for (idx = 0; idx < num_inter_modes + comp_modes; ++idx) {
int rate_mv = 0; int rate_mv = 0;
int mode_rd_thresh; int mode_rd_thresh;
@@ -1696,7 +1660,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int inter_mv_mode = 0; int inter_mv_mode = 0;
int skip_this_mv = 0; int skip_this_mv = 0;
int comp_pred = 0; int comp_pred = 0;
int force_gf_mv = 0;
PREDICTION_MODE this_mode; PREDICTION_MODE this_mode;
second_ref_frame = NONE; second_ref_frame = NONE;
@@ -1717,29 +1680,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
comp_pred = 1; comp_pred = 1;
} }
if (ref_frame > usable_ref_frame) continue;
if (skip_ref_find_pred[ref_frame]) continue;
// If the segment reference frame feature is enabled then do nothing if the
// current ref frame is not allowed.
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
continue;
if (flag_svc_subpel && ref_frame == GOLDEN_FRAME) {
force_gf_mv = 1;
// Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row),
// otherwise set NEWMV to (svc_mv_col, svc_mv_row).
if (this_mode == NEWMV) {
frame_mv[this_mode][ref_frame].as_mv.col = svc_mv_col;
frame_mv[this_mode][ref_frame].as_mv.row = svc_mv_row;
} else if (frame_mv[this_mode][ref_frame].as_mv.col != svc_mv_col ||
frame_mv[this_mode][ref_frame].as_mv.row != svc_mv_row) {
continue;
}
}
if (comp_pred) { if (comp_pred) {
const struct segmentation *const seg = &cm->seg;
if (!cpi->allow_comp_inter_inter) continue; if (!cpi->allow_comp_inter_inter) continue;
// Skip compound inter modes if ARF is not available. // Skip compound inter modes if ARF is not available.
if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue; if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
@@ -1748,6 +1690,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) continue; if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) continue;
} }
if (ref_frame > usable_ref_frame) continue;
if (skip_ref_find_pred[ref_frame]) continue;
// For SVC, skip the golden (spatial) reference search if sse of zeromv_last // For SVC, skip the golden (spatial) reference search if sse of zeromv_last
// is below threshold. // is below threshold.
if (cpi->use_svc && ref_frame == GOLDEN_FRAME && if (cpi->use_svc && ref_frame == GOLDEN_FRAME &&
@@ -1792,7 +1737,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// Skip non-zeromv mode search for golden frame if force_skip_low_temp_var // Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
// is set. If nearestmv for golden frame is 0, zeromv mode will be skipped // is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
// later. // later.
if (!force_gf_mv && force_skip_low_temp_var && ref_frame == GOLDEN_FRAME && if (force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
frame_mv[this_mode][ref_frame].as_int != 0) { frame_mv[this_mode][ref_frame].as_int != 0) {
continue; continue;
} }
@@ -1806,15 +1751,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
} }
if (cpi->use_svc) { if (cpi->use_svc) {
if (!force_gf_mv && svc_force_zero_mode[ref_frame - 1] && if (svc_force_zero_mode[ref_frame - 1] &&
frame_mv[this_mode][ref_frame].as_int != 0) frame_mv[this_mode][ref_frame].as_int != 0)
continue; continue;
} }
// Disable this drop out case if the ref frame segment level feature is
// enabled for this segment. This is to prevent the possibility that we end
// up unable to pick any mode.
if (!segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) {
if (sf->reference_masking && if (sf->reference_masking &&
!(frame_mv[this_mode][ref_frame].as_int == 0 && !(frame_mv[this_mode][ref_frame].as_int == 0 &&
ref_frame == LAST_FRAME)) { ref_frame == LAST_FRAME)) {
@@ -1838,7 +1779,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
} }
} }
if (ref_frame_skip_mask & (1 << ref_frame)) continue; if (ref_frame_skip_mask & (1 << ref_frame)) continue;
}
// Select prediction reference frames. // Select prediction reference frames.
for (i = 0; i < MAX_MB_PLANE; i++) { for (i = 0; i < MAX_MB_PLANE; i++) {
@@ -1868,7 +1808,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
&rd_thresh_freq_fact[mode_index]))) &rd_thresh_freq_fact[mode_index])))
continue; continue;
if (this_mode == NEWMV && !force_gf_mv) { if (this_mode == NEWMV) {
if (ref_frame > LAST_FRAME && !cpi->use_svc && if (ref_frame > LAST_FRAME && !cpi->use_svc &&
cpi->oxcf.rc_mode == VPX_CBR) { cpi->oxcf.rc_mode == VPX_CBR) {
int tmp_sad; int tmp_sad;
@@ -2009,7 +1949,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
pred_filter_search && pred_filter_search &&
(ref_frame == LAST_FRAME || (ref_frame == LAST_FRAME ||
(ref_frame == GOLDEN_FRAME && !force_gf_mv && (ref_frame == GOLDEN_FRAME &&
(cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) && (cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) &&
(((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) { (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
int pf_rate[3]; int pf_rate[3];
@@ -2233,11 +2173,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// For spatial enhancemanent layer: perform intra prediction only if base // For spatial enhancemanent layer: perform intra prediction only if base
// layer is chosen as the reference. Always perform intra prediction if // layer is chosen as the reference. Always perform intra prediction if
// LAST is the only reference, or is_key_frame is set, or on base // LAST is the only reference or is_key_frame is set.
// temporal layer.
if (cpi->svc.spatial_layer_id) { if (cpi->svc.spatial_layer_id) {
perform_intra_pred = perform_intra_pred =
cpi->svc.temporal_layer_id == 0 ||
cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame || cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame ||
!(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) || !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) ||
(!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
@@ -2247,13 +2185,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
cpi->rc.is_src_frame_alt_ref) cpi->rc.is_src_frame_alt_ref)
perform_intra_pred = 0; perform_intra_pred = 0;
// If the segment reference frame feature is enabled and set then
// skip the intra prediction.
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) > 0)
perform_intra_pred = 0;
// Perform intra prediction search, if the best SAD is above a certain // Perform intra prediction search, if the best SAD is above a certain
// threshold. // threshold.
if (best_rdc.rdcost == INT64_MAX || if (best_rdc.rdcost == INT64_MAX ||

View File

@@ -31,13 +31,10 @@
#include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_encodemv.h"
#include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_ratectrl.h"
// Max rate per frame for 1080P and below encodes if no level requirement given. // Max rate target for 1080P and below encodes under normal circumstances
// For larger formats limit to MAX_MB_RATE bits per MB // (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
// 4Mbits is derived from the level requirement for level 4 (1080P 30) which
// requires that HW can sustain a rate of 16Mbits over a 4 frame group.
// If a lower level requirement is specified then this may over ride this value.
#define MAX_MB_RATE 250 #define MAX_MB_RATE 250
#define MAXRATE_1080P 4000000 #define MAXRATE_1080P 2025000
#define DEFAULT_KF_BOOST 2000 #define DEFAULT_KF_BOOST 2000
#define DEFAULT_GF_BOOST 2000 #define DEFAULT_GF_BOOST 2000
@@ -1103,9 +1100,6 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
// Baseline value derived from cpi->active_worst_quality and kf boost. // Baseline value derived from cpi->active_worst_quality and kf boost.
active_best_quality = active_best_quality =
get_kf_active_quality(rc, active_worst_quality, cm->bit_depth); get_kf_active_quality(rc, active_worst_quality, cm->bit_depth);
if (cpi->twopass.kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) {
active_best_quality /= 4;
}
// Allow somewhat lower kf minq with small image formats. // Allow somewhat lower kf minq with small image formats.
if ((cm->width * cm->height) <= (352 * 288)) { if ((cm->width * cm->height) <= (352 * 288)) {
@@ -1494,22 +1488,15 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
cpi->rc.last_frame_is_src_altref = cpi->rc.is_src_frame_alt_ref; cpi->rc.last_frame_is_src_altref = cpi->rc.is_src_frame_alt_ref;
} }
if (cm->frame_type != KEY_FRAME) rc->reset_high_source_sad = 0; if (cm->frame_type != KEY_FRAME) rc->reset_high_source_sad = 0;
rc->last_avg_frame_bandwidth = rc->avg_frame_bandwidth;
if (cpi->use_svc &&
cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
cpi->svc.lower_layer_qindex = cm->base_qindex;
} }
void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) { void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
// Update buffer level with zero size, update frame counters, and return. // Update buffer level with zero size, update frame counters, and return.
update_buffer_level(cpi, 0); update_buffer_level(cpi, 0);
cpi->common.current_video_frame++;
cpi->rc.frames_since_key++; cpi->rc.frames_since_key++;
cpi->rc.frames_to_key--; cpi->rc.frames_to_key--;
cpi->rc.rc_2_frame = 0; cpi->rc.rc_2_frame = 0;
cpi->rc.rc_1_frame = 0; cpi->rc.rc_1_frame = 0;
cpi->rc.last_avg_frame_bandwidth = cpi->rc.avg_frame_bandwidth;
} }
static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
@@ -1593,7 +1580,8 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
// Adjust boost and af_ratio based on avg_frame_low_motion, which varies // Adjust boost and af_ratio based on avg_frame_low_motion, which varies
// between 0 and 100 (stationary, 100% zero/small motion). // between 0 and 100 (stationary, 100% zero/small motion).
rc->gfu_boost = rc->gfu_boost =
VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) / VPXMAX(500,
DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /
(rc->avg_frame_low_motion + 100)); (rc->avg_frame_low_motion + 100));
rc->af_ratio_onepass_vbr = VPXMIN(15, VPXMAX(5, 3 * rc->gfu_boost / 400)); rc->af_ratio_onepass_vbr = VPXMIN(15, VPXMAX(5, 3 * rc->gfu_boost / 400));
} }
@@ -1869,8 +1857,13 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
cpi->framerate, rc->min_gf_interval); cpi->framerate, rc->min_gf_interval);
// Extended max interval for genuinely static scenes like slide shows. // Extended interval for genuinely static scenes
rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH; rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
if (is_altref_enabled(cpi)) {
if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
}
if (rc->max_gf_interval > rc->static_scene_max_gf_interval) if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
rc->max_gf_interval = rc->static_scene_max_gf_interval; rc->max_gf_interval = rc->static_scene_max_gf_interval;
@@ -1880,12 +1873,9 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
if (oxcf->target_level == LEVEL_AUTO) { if (oxcf->target_level == LEVEL_AUTO) {
const uint32_t pic_size = cpi->common.width * cpi->common.height; const uint32_t pic_size = cpi->common.width * cpi->common.height;
const uint32_t pic_breadth =
VPXMAX(cpi->common.width, cpi->common.height);
int i; int i;
for (i = LEVEL_1; i < LEVEL_MAX; ++i) { for (i = LEVEL_1; i < LEVEL_MAX; ++i) {
if (vp9_level_defs[i].max_luma_picture_size >= pic_size && if (vp9_level_defs[i].max_luma_picture_size > pic_size) {
vp9_level_defs[i].max_luma_picture_breadth >= pic_breadth) {
if (rc->min_gf_interval <= if (rc->min_gf_interval <=
(int)vp9_level_defs[i].min_altref_distance) { (int)vp9_level_defs[i].min_altref_distance) {
rc->min_gf_interval = rc->min_gf_interval =
@@ -1914,12 +1904,12 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) {
VPXMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); VPXMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
// A maximum bitrate for a frame is defined. // A maximum bitrate for a frame is defined.
// However this limit is extended if a very high rate is given on the command // The baseline for this aligns with HW implementations that
// line or the the rate cannnot be acheived because of a user specificed max q // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
// (e.g. when the user specifies lossless encode). // per 16x16 MB (averaged over a frame). However this limit is extended if
// // a very high rate is given on the command line or the the rate cannnot
// If a level is specified that requires a lower maximum rate then the level // be acheived because of a user specificed max q (e.g. when the user
// value take precedence. // specifies lossless encode.
vbr_max_bits = vbr_max_bits =
(int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) / (int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) /
100); 100);

View File

@@ -34,14 +34,6 @@ extern "C" {
#define FRAME_OVERHEAD_BITS 200 #define FRAME_OVERHEAD_BITS 200
// Threshold used to define a KF group as static (e.g. a slide show).
// Essentially this means that no frame in the group has more than 1% of MBs
// that are not marked as coded with 0,0 motion in the first pass.
#define STATIC_KF_GROUP_THRESH 99
// The maximum duration of a GF group that is static (for example a slide show).
#define MAX_STATIC_GF_GROUP_LENGTH 250
typedef enum { typedef enum {
INTER_NORMAL = 0, INTER_NORMAL = 0,
INTER_HIGH = 1, INTER_HIGH = 1,
@@ -160,8 +152,6 @@ typedef struct {
int rc_2_frame; int rc_2_frame;
int q_1_frame; int q_1_frame;
int q_2_frame; int q_2_frame;
// Keep track of the last target average frame bandwidth.
int last_avg_frame_bandwidth;
// Auto frame-scaling variables. // Auto frame-scaling variables.
FRAME_SCALE_LEVEL frame_size_selector; FRAME_SCALE_LEVEL frame_size_selector;

View File

@@ -59,9 +59,7 @@ typedef struct {
MV_REFERENCE_FRAME ref_frame[2]; MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION; } MODE_DEFINITION;
typedef struct { typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;
struct rdcost_block_args { struct rdcost_block_args {
const VP9_COMP *cpi; const VP9_COMP *cpi;

View File

@@ -37,16 +37,14 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->scaled_one_half = 0; svc->scaled_one_half = 0;
svc->current_superframe = 0; svc->current_superframe = 0;
svc->non_reference_frame = 0; svc->non_reference_frame = 0;
svc->skip_enhancement_layer = 0;
for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1; for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
svc->ext_frame_flags[sl] = 0; svc->ext_frame_flags[sl] = 0;
svc->ext_lst_fb_idx[sl] = 0; svc->ext_lst_fb_idx[sl] = 0;
svc->ext_gld_fb_idx[sl] = 1; svc->ext_gld_fb_idx[sl] = 1;
svc->ext_alt_fb_idx[sl] = 2; svc->ext_alt_fb_idx[sl] = 2;
svc->downsample_filter_type[sl] = BILINEAR; svc->downsample_filter_type[sl] = EIGHTTAP;
svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter. svc->downsample_filter_phase[sl] = 0; // Set to 8 for averaging filter.
} }
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
@@ -155,8 +153,6 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
int sl, tl, layer = 0, spatial_layer_target; int sl, tl, layer = 0, spatial_layer_target;
float bitrate_alloc = 1.0; float bitrate_alloc = 1.0;
cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
@@ -393,7 +389,7 @@ int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
.is_key_frame; .is_key_frame;
} }
void get_layer_resolution(const int width_org, const int height_org, static void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den, int *width_out, const int num, const int den, int *width_out,
int *height_out) { int *height_out) {
int w, h; int w, h;
@@ -549,8 +545,6 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
if (!spatial_id) { if (!spatial_id) {
cpi->ref_frame_flags = VP9_LAST_FLAG; cpi->ref_frame_flags = VP9_LAST_FLAG;
} else { } else {
if (spatial_id == cpi->svc.number_spatial_layers - 1)
cpi->ext_refresh_alt_ref_frame = 0;
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
} }
} }
@@ -610,7 +604,6 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
int width = 0, height = 0; int width = 0, height = 0;
LAYER_CONTEXT *lc = NULL; LAYER_CONTEXT *lc = NULL;
cpi->svc.skip_enhancement_layer = 0;
if (cpi->svc.number_spatial_layers > 1) cpi->svc.use_base_mv = 1; if (cpi->svc.number_spatial_layers > 1) cpi->svc.use_base_mv = 1;
cpi->svc.force_zero_mode_spatial_ref = 1; cpi->svc.force_zero_mode_spatial_ref = 1;
cpi->svc.mi_stride[cpi->svc.spatial_layer_id] = cpi->common.mi_stride; cpi->svc.mi_stride[cpi->svc.spatial_layer_id] = cpi->common.mi_stride;
@@ -663,14 +656,10 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
lc->scaling_factor_num, lc->scaling_factor_den, &width, lc->scaling_factor_num, lc->scaling_factor_den, &width,
&height); &height);
// For resolutions <= VGA: set phase of the filter = 8 (for symmetric // For resolutions <= QVGA: set phase of the filter = 8 (for symmetric
// averaging filter), use bilinear for now. // averaging filter), use bilinear for now.
if (width * height <= 640 * 480) { if (width * height <= 320 * 240) {
cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] = BILINEAR; cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] = BILINEAR;
// Use Eightap_smooth for low resolutions.
if (width * height <= 320 * 240)
cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] =
EIGHTTAP_SMOOTH;
cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id] = 8; cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id] = 8;
} }
@@ -872,28 +861,3 @@ void vp9_svc_reset_key_frame(VP9_COMP *const cpi) {
vp9_update_temporal_layer_framerate(cpi); vp9_update_temporal_layer_framerate(cpi);
vp9_restore_layer_context(cpi); vp9_restore_layer_context(cpi);
} }
void vp9_svc_check_reset_layer_rc_flag(VP9_COMP *const cpi) {
SVC *svc = &cpi->svc;
int sl, tl;
for (sl = 0; sl < svc->number_spatial_layers; ++sl) {
// Check for reset based on avg_frame_bandwidth for spatial layer sl.
int layer = LAYER_IDS_TO_IDX(sl, svc->number_temporal_layers - 1,
svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
if (lrc->avg_frame_bandwidth > (3 * lrc->last_avg_frame_bandwidth >> 1) ||
lrc->avg_frame_bandwidth < (lrc->last_avg_frame_bandwidth >> 1)) {
// Reset for all temporal layers with spatial layer sl.
for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
lrc->rc_1_frame = 0;
lrc->rc_2_frame = 0;
lrc->bits_off_target = lrc->optimal_buffer_level;
lrc->buffer_level = lrc->optimal_buffer_level;
}
}
}
}

View File

@@ -49,7 +49,7 @@ typedef struct {
uint8_t speed; uint8_t speed;
} LAYER_CONTEXT; } LAYER_CONTEXT;
typedef struct SVC { typedef struct {
int spatial_layer_id; int spatial_layer_id;
int temporal_layer_id; int temporal_layer_id;
int number_spatial_layers; int number_spatial_layers;
@@ -99,12 +99,6 @@ typedef struct SVC {
BLOCK_SIZE *prev_partition_svc; BLOCK_SIZE *prev_partition_svc;
int mi_stride[VPX_MAX_LAYERS]; int mi_stride[VPX_MAX_LAYERS];
int first_layer_denoise;
int skip_enhancement_layer;
int lower_layer_qindex;
} SVC; } SVC;
struct VP9_COMP; struct VP9_COMP;
@@ -134,10 +128,6 @@ void vp9_save_layer_context(struct VP9_COMP *const cpi);
// Initialize second pass rc for spatial svc. // Initialize second pass rc for spatial svc.
void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi); void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi);
void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den, int *width_out,
int *height_out);
// Increment number of video frames in layer // Increment number of video frames in layer
void vp9_inc_frame_in_layer(struct VP9_COMP *const cpi); void vp9_inc_frame_in_layer(struct VP9_COMP *const cpi);
@@ -158,8 +148,6 @@ void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi);
void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi); void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi);
void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
#endif #endif

View File

@@ -170,13 +170,13 @@ void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst4_sse2(in); fadst4_sse2(in);
write_buffer_4x4(output, in); write_buffer_4x4(output, in);
break; break;
default: case ADST_ADST:
assert(tx_type == ADST_ADST);
load_buffer_4x4(input, in, stride); load_buffer_4x4(input, in, stride);
fadst4_sse2(in); fadst4_sse2(in);
fadst4_sse2(in); fadst4_sse2(in);
write_buffer_4x4(output, in); write_buffer_4x4(output, in);
break; break;
default: assert(0); break;
} }
} }
@@ -1097,14 +1097,14 @@ void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
right_shift_8x8(in, 1); right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8); write_buffer_8x8(output, in, 8);
break; break;
default: case ADST_ADST:
assert(tx_type == ADST_ADST);
load_buffer_8x8(input, in, stride); load_buffer_8x8(input, in, stride);
fadst8_sse2(in); fadst8_sse2(in);
fadst8_sse2(in); fadst8_sse2(in);
right_shift_8x8(in, 1); right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8); write_buffer_8x8(output, in, 8);
break; break;
default: assert(0); break;
} }
} }
@@ -1963,13 +1963,13 @@ void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16); write_buffer_16x16(output, in0, in1, 16);
break; break;
default: case ADST_ADST:
assert(tx_type == ADST_ADST);
load_buffer_16x16(input, in0, in1, stride); load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1); right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1); fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16); write_buffer_16x16(output, in0, in1, 16);
break; break;
default: assert(0); break;
} }
} }

View File

@@ -1,7 +1,7 @@
/* /*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved. * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
* *
* Use of this source code is governed by a BSD-style license * Usee of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source * that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found * tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may * in the file PATENTS. All contributing project authors may

View File

@@ -1,140 +0,0 @@
/*
* Copyright (c) 2017 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <assert.h>
#include <immintrin.h> // AVX2
#include "./vp9_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/x86/bitdepth_conversion_avx2.h"
#include "vpx_dsp/x86/quantize_x86.h"
// Zero fill 8 positions in the output buffer.
static INLINE void store_zero_tran_low(tran_low_t *a) {
const __m256i zero = _mm256_setzero_si256();
#if CONFIG_VP9_HIGHBITDEPTH
_mm256_storeu_si256((__m256i *)(a), zero);
_mm256_storeu_si256((__m256i *)(a + 8), zero);
#else
_mm256_storeu_si256((__m256i *)(a), zero);
#endif
}
static INLINE __m256i scan_eob_256(const __m256i *iscan_ptr,
__m256i *coeff256) {
const __m256i iscan = _mm256_loadu_si256(iscan_ptr);
const __m256i zero256 = _mm256_setzero_si256();
#if CONFIG_VP9_HIGHBITDEPTH
// The _mm256_packs_epi32() in load_tran_low() packs the 64 bit coeff as
// B1 A1 B0 A0. Shuffle to B1 B0 A1 A0 in order to scan eob correctly.
const __m256i _coeff256 = _mm256_permute4x64_epi64(*coeff256, 0xd8);
const __m256i zero_coeff0 = _mm256_cmpeq_epi16(_coeff256, zero256);
#else
const __m256i zero_coeff0 = _mm256_cmpeq_epi16(*coeff256, zero256);
#endif
const __m256i nzero_coeff0 = _mm256_cmpeq_epi16(zero_coeff0, zero256);
// Add one to convert from indices to counts
const __m256i iscan_plus_one = _mm256_sub_epi16(iscan, nzero_coeff0);
return _mm256_and_si256(iscan_plus_one, nzero_coeff0);
}
void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr,
const int16_t *iscan_ptr) {
__m128i eob;
__m256i round256, quant256, dequant256;
__m256i eob256, thr256;
(void)scan_ptr;
(void)skip_block;
assert(!skip_block);
coeff_ptr += n_coeffs;
iscan_ptr += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
{
__m256i coeff256;
// Setup global values
{
const __m128i round = _mm_load_si128((const __m128i *)round_ptr);
const __m128i quant = _mm_load_si128((const __m128i *)quant_ptr);
const __m128i dequant = _mm_load_si128((const __m128i *)dequant_ptr);
round256 = _mm256_castsi128_si256(round);
round256 = _mm256_permute4x64_epi64(round256, 0x54);
quant256 = _mm256_castsi128_si256(quant);
quant256 = _mm256_permute4x64_epi64(quant256, 0x54);
dequant256 = _mm256_castsi128_si256(dequant);
dequant256 = _mm256_permute4x64_epi64(dequant256, 0x54);
}
{
__m256i qcoeff256;
__m256i qtmp256;
coeff256 = load_tran_low(coeff_ptr + n_coeffs);
qcoeff256 = _mm256_abs_epi16(coeff256);
qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs);
coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
}
eob256 = scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256);
n_coeffs += 8 * 2;
}
// remove dc constants
dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31);
quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31);
round256 = _mm256_permute2x128_si256(round256, round256, 0x31);
thr256 = _mm256_srai_epi16(dequant256, 1);
// AC only loop
while (n_coeffs < 0) {
__m256i coeff256 = load_tran_low(coeff_ptr + n_coeffs);
__m256i qcoeff256 = _mm256_abs_epi16(coeff256);
int32_t nzflag =
_mm256_movemask_epi8(_mm256_cmpgt_epi16(qcoeff256, thr256));
if (nzflag) {
__m256i qtmp256;
qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs);
coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
eob256 = _mm256_max_epi16(
eob256,
scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256));
} else {
store_zero_tran_low(qcoeff_ptr + n_coeffs);
store_zero_tran_low(dqcoeff_ptr + n_coeffs);
}
n_coeffs += 8 * 2;
}
eob = _mm_max_epi16(_mm256_castsi256_si128(eob256),
_mm256_extracti128_si256(eob256, 1));
*eob_ptr = accumulate_eob(eob);
}

View File

@@ -169,7 +169,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, round, quant, \
pshuflw m7, m8, 0x1 pshuflw m7, m8, 0x1
pmaxsw m8, m7 pmaxsw m8, m7
pextrw r6, m8, 0 pextrw r6, m8, 0
mov [r2], r6w mov [r2], r6
RET RET
%endmacro %endmacro

View File

@@ -63,17 +63,7 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht_neon.h
ifeq ($(CONFIG_VP9_POSTPROC),yes) ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_mfqe_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_mfqe_sse2.asm
endif endif
@@ -81,11 +71,22 @@ ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans4_dspr2.c VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans4_dspr2.c
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans8_dspr2.c VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans8_dspr2.c
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans16_dspr2.c VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans16_dspr2.c
else endif
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_highbd_iht4x4_add_neon.c
VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht4x4_add_sse4.c # common (msa)
VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht8x8_add_sse4.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht16x16_add_sse4.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c
endif
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
endif endif
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl)) $(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))

View File

@@ -1067,7 +1067,8 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi,
vpx_codec_frame_flags_t flags = lib_flags << 16; vpx_codec_frame_flags_t flags = lib_flags << 16;
if (lib_flags & FRAMEFLAGS_KEY || if (lib_flags & FRAMEFLAGS_KEY ||
(cpi->use_svc && cpi->svc (cpi->use_svc &&
cpi->svc
.layer_context[cpi->svc.spatial_layer_id * .layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers + cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id] cpi->svc.temporal_layer_id]
@@ -1212,7 +1213,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
-1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data, -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
&dst_time_stamp, &dst_end_time_stamp, &dst_time_stamp, &dst_end_time_stamp,
!img)) { !img)) {
if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) { if (size) {
vpx_codec_cx_pkt_t pkt; vpx_codec_cx_pkt_t pkt;
#if CONFIG_SPATIAL_SVC #if CONFIG_SPATIAL_SVC
@@ -1233,8 +1234,6 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
ctx->pending_frame_magnitude |= size; ctx->pending_frame_magnitude |= size;
cx_data += size; cx_data += size;
cx_data_sz -= size; cx_data_sz -= size;
pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
if (ctx->output_cx_pkt_cb.output_cx_pkt) { if (ctx->output_cx_pkt_cb.output_cx_pkt) {
pkt.kind = VPX_CODEC_CX_FRAME_PKT; pkt.kind = VPX_CODEC_CX_FRAME_PKT;
@@ -1261,11 +1260,9 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units( pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
timebase, dst_end_time_stamp - dst_time_stamp); timebase, dst_end_time_stamp - dst_time_stamp);
pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags); pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
if (ctx->pending_cx_data) { if (ctx->pending_cx_data) {
if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
ctx->pending_frame_magnitude |= size; ctx->pending_frame_magnitude |= size;
ctx->pending_cx_data_sz += size; ctx->pending_cx_data_sz += size;
// write the superframe only for the case when // write the superframe only for the case when
@@ -1415,22 +1412,12 @@ static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) {
static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx,
va_list args) { va_list args) {
vpx_roi_map_t *data = va_arg(args, vpx_roi_map_t *); (void)ctx;
(void)args;
if (data) { // TODO(yaowu): Need to re-implement and test for VP9.
vpx_roi_map_t *roi = (vpx_roi_map_t *)data;
if (!vp9_set_roi_map(ctx->cpi, roi->roi_map, roi->rows, roi->cols,
roi->delta_q, roi->delta_lf, roi->skip,
roi->ref_frame)) {
return VPX_CODEC_OK;
} else {
return VPX_CODEC_INVALID_PARAM; return VPX_CODEC_INVALID_PARAM;
} }
} else {
return VPX_CODEC_INVALID_PARAM;
}
}
static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx,
va_list args) { va_list args) {
@@ -1619,7 +1606,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
// Setters // Setters
{ VP8_SET_REFERENCE, ctrl_set_reference }, { VP8_SET_REFERENCE, ctrl_set_reference },
{ VP8_SET_POSTPROC, ctrl_set_previewpp }, { VP8_SET_POSTPROC, ctrl_set_previewpp },
{ VP9E_SET_ROI_MAP, ctrl_set_roi_map }, { VP8E_SET_ROI_MAP, ctrl_set_roi_map },
{ VP8E_SET_ACTIVEMAP, ctrl_set_active_map }, { VP8E_SET_ACTIVEMAP, ctrl_set_active_map },
{ VP8E_SET_SCALEMODE, ctrl_set_scale_mode }, { VP8E_SET_SCALEMODE, ctrl_set_scale_mode },
{ VP8E_SET_CPUUSED, ctrl_set_cpuused }, { VP8E_SET_CPUUSED, ctrl_set_cpuused },

View File

@@ -103,7 +103,6 @@ VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/temporal_filter_sse4.c VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/temporal_filter_sse4.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_quantize_avx2.c
VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_diamond_search_sad_avx.c VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_diamond_search_sad_avx.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c

View File

@@ -12,11 +12,8 @@
* \brief Provides the high level interface to wrap encoder algorithms. * \brief Provides the high level interface to wrap encoder algorithms.
* *
*/ */
#include <assert.h>
#include <limits.h> #include <limits.h>
#include <stdlib.h>
#include <string.h> #include <string.h>
#include "vp8/common/blockd.h"
#include "vpx_config.h" #include "vpx_config.h"
#include "vpx/internal/vpx_codec_internal.h" #include "vpx/internal/vpx_codec_internal.h"
@@ -84,8 +81,6 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(
int i; int i;
void *mem_loc = NULL; void *mem_loc = NULL;
if (iface->enc.mr_get_mem_loc == NULL) return VPX_CODEC_INCAPABLE;
if (!(res = iface->enc.mr_get_mem_loc(cfg, &mem_loc))) { if (!(res = iface->enc.mr_get_mem_loc(cfg, &mem_loc))) {
for (i = 0; i < num_enc; i++) { for (i = 0; i < num_enc; i++) {
vpx_codec_priv_enc_mr_cfg_t mr_cfg; vpx_codec_priv_enc_mr_cfg_t mr_cfg;
@@ -94,7 +89,9 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(
if (dsf->num < 1 || dsf->num > 4096 || dsf->den < 1 || if (dsf->num < 1 || dsf->num > 4096 || dsf->den < 1 ||
dsf->den > dsf->num) { dsf->den > dsf->num) {
res = VPX_CODEC_INVALID_PARAM; res = VPX_CODEC_INVALID_PARAM;
} else { break;
}
mr_cfg.mr_low_res_mode_info = mem_loc; mr_cfg.mr_low_res_mode_info = mem_loc;
mr_cfg.mr_total_resolutions = num_enc; mr_cfg.mr_total_resolutions = num_enc;
mr_cfg.mr_encoder_id = num_enc - 1 - i; mr_cfg.mr_encoder_id = num_enc - 1 - i;
@@ -113,7 +110,6 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(
ctx->init_flags = flags; ctx->init_flags = flags;
ctx->config.enc = cfg; ctx->config.enc = cfg;
res = ctx->iface->init(ctx, &mr_cfg); res = ctx->iface->init(ctx, &mr_cfg);
}
if (res) { if (res) {
const char *error_detail = ctx->priv ? ctx->priv->err_detail : NULL; const char *error_detail = ctx->priv ? ctx->priv->err_detail : NULL;
@@ -128,14 +124,10 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(
vpx_codec_destroy(ctx); vpx_codec_destroy(ctx);
i--; i--;
} }
#if CONFIG_MULTI_RES_ENCODING
assert(mem_loc);
free(((LOWER_RES_FRAME_INFO *)mem_loc)->mb_info);
free(mem_loc);
#endif
return SAVE_STATUS(ctx, res);
} }
if (res) break;
ctx++; ctx++;
cfg++; cfg++;
dsf++; dsf++;

View File

@@ -125,7 +125,7 @@ extern vpx_codec_iface_t *vpx_codec_vp9_cx(void);
enum vp8e_enc_control_id { enum vp8e_enc_control_id {
/*!\brief Codec control function to pass an ROI map to encoder. /*!\brief Codec control function to pass an ROI map to encoder.
* *
* Supported in codecs: VP8 * Supported in codecs: VP8, VP9
*/ */
VP8E_SET_ROI_MAP = 8, VP8E_SET_ROI_MAP = 8,
@@ -408,7 +408,7 @@ enum vp8e_enc_control_id {
/*!\brief Codec control function to set noise sensitivity. /*!\brief Codec control function to set noise sensitivity.
* *
* 0: off, 1: On(YOnly), 2: For SVC only, on top two spatial layers(YOnly) * 0: off, 1: On(YOnly)
* *
* Supported in codecs: VP9 * Supported in codecs: VP9
*/ */
@@ -423,12 +423,6 @@ enum vp8e_enc_control_id {
*/ */
VP9E_SET_SVC, VP9E_SET_SVC,
/*!\brief Codec control function to pass an ROI map to encoder.
*
* Supported in codecs: VP9
*/
VP9E_SET_ROI_MAP,
/*!\brief Codec control function to set parameters for SVC. /*!\brief Codec control function to set parameters for SVC.
* \note Parameters contain min_q, max_q, scaling factor for each of the * \note Parameters contain min_q, max_q, scaling factor for each of the
* SVC layers. * SVC layers.
@@ -649,20 +643,16 @@ typedef enum vp9e_temporal_layering_mode {
*/ */
typedef struct vpx_roi_map { typedef struct vpx_roi_map {
/*! If ROI is enabled. */ /*! An id between 0 and 3 for each 16x16 region within a frame. */
uint8_t enabled;
/*! An id between 0-3 (0-7 for vp9) for each 16x16 (8x8 for VP9)
* region within a frame. */
unsigned char *roi_map; unsigned char *roi_map;
unsigned int rows; /**< Number of rows. */ unsigned int rows; /**< Number of rows. */
unsigned int cols; /**< Number of columns. */ unsigned int cols; /**< Number of columns. */
/*! VP8 only uses the first 4 segments. VP9 uses 8 segments. */ // TODO(paulwilkins): broken for VP9 which has 8 segments
int delta_q[8]; /**< Quantizer deltas. */ // q and loop filter deltas for each segment
int delta_lf[8]; /**< Loop filter deltas. */ // (see MAX_MB_SEGMENTS)
/*! skip and ref frame segment is only used in VP9. */ int delta_q[4]; /**< Quantizer deltas. */
int skip[8]; /**< Skip this block. */ int delta_lf[4]; /**< Loop filter deltas. */
int ref_frame[8]; /**< Reference frame for this block. */ /*! Static breakout threshold for each segment. */
/*! Static breakout threshold for each segment. Only used in VP8. */
unsigned int static_threshold[4]; unsigned int static_threshold[4];
} vpx_roi_map_t; } vpx_roi_map_t;
@@ -759,8 +749,6 @@ VPX_CTRL_USE_TYPE(VP8E_SET_TEMPORAL_LAYER_ID, int)
#define VPX_CTRL_VP8E_SET_TEMPORAL_LAYER_ID #define VPX_CTRL_VP8E_SET_TEMPORAL_LAYER_ID
VPX_CTRL_USE_TYPE(VP8E_SET_ROI_MAP, vpx_roi_map_t *) VPX_CTRL_USE_TYPE(VP8E_SET_ROI_MAP, vpx_roi_map_t *)
#define VPX_CTRL_VP8E_SET_ROI_MAP #define VPX_CTRL_VP8E_SET_ROI_MAP
VPX_CTRL_USE_TYPE(VP9E_SET_ROI_MAP, vpx_roi_map_t *)
#define VPX_CTRL_VP9E_SET_ROI_MAP
VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP, vpx_active_map_t *) VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP, vpx_active_map_t *)
#define VPX_CTRL_VP8E_SET_ACTIVEMAP #define VPX_CTRL_VP8E_SET_ACTIVEMAP
VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE, vpx_scaling_mode_t *) VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE, vpx_scaling_mode_t *)

Some files were not shown because too many files have changed in this diff Show More