Compare commits

..

1 Commits

Author SHA1 Message Date
wangch
eea111f16a Test gerrit. 2017-12-05 18:07:21 -05:00
150 changed files with 3854 additions and 5687 deletions

View File

@@ -1,12 +1,12 @@
---
Language: Cpp
# BasedOnStyle: Google
# Generated with clang-format 5.0.0
# Generated with clang-format 4.0.1
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Left
AlignEscapedNewlinesLeft: true
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
@@ -33,20 +33,14 @@ BraceWrapping:
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
@@ -54,11 +48,7 @@ Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
IncludeCategories:
- Regex: '^<.*\.h>'
Priority: 1
@@ -80,7 +70,6 @@ NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
@@ -90,7 +79,6 @@ PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Right
ReflowComments: true
SortIncludes: false
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true

View File

@@ -3,7 +3,6 @@ Aex Converse <aconverse@google.com>
Aex Converse <aconverse@google.com> <alex.converse@gmail.com>
Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
Alpha Lam <hclam@google.com> <hclam@chromium.org>
Chris Cunningham <chcunningham@chromium.org>
Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
Deb Mukherjee <debargha@google.com>
Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
@@ -22,21 +21,18 @@ Marco Paniconi <marpan@google.com>
Marco Paniconi <marpan@google.com> <marpan@chromium.org>
Pascal Massimino <pascal.massimino@gmail.com>
Paul Wilkins <paulwilkins@google.com>
Peter Boström <pbos@chromium.org> <pbos@google.com>
Peter de Rivaz <peter.derivaz@gmail.com>
Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com>
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
Sami Pietilä <samipietila@google.com>
Shiyou Yin <yinshiyou-hf@loongson.cn>
Tamar Levy <tamar.levy@intel.com>
Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com>
Tom Finegan <tomfinegan@google.com>
Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
Urvang Joshi <urvang@google.com> <urvang@chromium.org>
Yaowu Xu <yaowu@google.com> <adam@xuyaowu.com>
Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
Yaowu Xu <yaowu@google.com> <Yaowu Xu>

16
AUTHORS
View File

@@ -3,13 +3,13 @@
Aaron Watry <awatry@gmail.com>
Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
Adam Xu <adam@xuyaowu.com>
Adrian Grange <agrange@google.com>
Aex Converse <aconverse@google.com>
Ahmad Sharif <asharif@google.com>
Aleksey Vasenev <margtu-fivt@ya.ru>
Alexander Potapenko <glider@google.com>
Alexander Voronov <avoronov@graphics.cs.msu.ru>
Alexandra Hájková <alexandra.khirnova@gmail.com>
Alexis Ballier <aballier@gentoo.org>
Alok Ahuja <waveletcoeff@gmail.com>
Alpha Lam <hclam@google.com>
@@ -17,7 +17,6 @@ A.Mahfoodh <ab.mahfoodh@gmail.com>
Ami Fischman <fischman@chromium.org>
Andoni Morales Alastruey <ylatuya@gmail.com>
Andres Mejia <mcitadel@gmail.com>
Andrew Lewis <andrewlewis@google.com>
Andrew Russell <anrussell@google.com>
Angie Chiang <angiebird@google.com>
Aron Rosenberg <arosenberg@logitech.com>
@@ -25,9 +24,7 @@ Attila Nagy <attilanagy@google.com>
Brion Vibber <bvibber@wikimedia.org>
changjun.yang <changjun.yang@intel.com>
Charles 'Buck' Krasic <ckrasic@google.com>
Cheng Chen <chengchen@google.com>
chm <chm@rock-chips.com>
Chris Cunningham <chcunningham@chromium.org>
Christian Duvivier <cduvivier@google.com>
Daniele Castagna <dcastagna@chromium.org>
Daniel Kang <ddkang@google.com>
@@ -49,12 +46,10 @@ Geza Lore <gezalore@gmail.com>
Ghislain MARY <ghislainmary2@gmail.com>
Giuseppe Scrivano <gscrivano@gnu.org>
Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
Gregor Jasny <gjasny@gmail.com>
Guillaume Martres <gmartres@google.com>
Guillermo Ballester Valor <gbvalor@gmail.com>
Hangyu Kuang <hkuang@google.com>
Hanno Böck <hanno@hboeck.de>
Han Shen <shenhan@google.com>
Henrik Lundin <hlundin@google.com>
Hui Su <huisu@google.com>
Ivan Krasin <krasin@chromium.org>
@@ -88,7 +83,6 @@ Justin Clift <justin@salasaga.org>
Justin Lebar <justin.lebar@gmail.com>
Kaustubh Raste <kaustubh.raste@imgtec.com>
KO Myung-Hun <komh@chollian.net>
Kyle Siefring <kylesiefring@gmail.com>
Lawrence Velázquez <larryv@macports.org>
Linfeng Zhang <linfengz@google.com>
Lou Quillio <louquillio@google.com>
@@ -107,7 +101,6 @@ Mikhal Shemer <mikhal@google.com>
Min Chen <chenm003@gmail.com>
Minghai Shang <minghai@google.com>
Min Ye <yeemmi@google.com>
Moriyoshi Koizumi <mozo@mozo.jp>
Morton Jonuschat <yabawock@gmail.com>
Nathan E. Egge <negge@mozilla.com>
Nico Weber <thakis@chromium.org>
@@ -118,15 +111,12 @@ Paul Wilkins <paulwilkins@google.com>
Pavol Rusnak <stick@gk2.sk>
Paweł Hajdan <phajdan@google.com>
Pengchong Jin <pengchong@google.com>
Peter Boström <pbos@chromium.org>
Peter Collingbourne <pcc@chromium.org>
Peter Boström <pbos@google.com>
Peter de Rivaz <peter.derivaz@gmail.com>
Philip Jägenstedt <philipj@opera.com>
Priit Laes <plaes@plaes.org>
Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
Rafaël Carré <funman@videolan.org>
Rafael de Lucena Valle <rafaeldelucena@gmail.com>
Rahul Chaudhry <rahulchaudhry@google.com>
Ralph Giles <giles@xiph.org>
Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com>
Rob Bradford <rob@linux.intel.com>
@@ -145,7 +135,6 @@ Shiyou Yin <yinshiyou-hf@loongson.cn>
Shunyao Li <shunyaoli@google.com>
Stefan Holmer <holmer@google.com>
Suman Sunkara <sunkaras@google.com>
Sylvestre Ledru <sylvestre@mozilla.com>
Taekhyun Kim <takim@nvidia.com>
Takanori MATSUURA <t.matsuu@gmail.com>
Tamar Levy <tamar.levy@intel.com>
@@ -158,7 +147,6 @@ Tom Finegan <tomfinegan@google.com>
Tristan Matthews <le.businessman@gmail.com>
Urvang Joshi <urvang@google.com>
Vignesh Venkatasubramanian <vigneshv@google.com>
Vlad Tsyrklevich <vtsyrklevich@chromium.org>
Yaowu Xu <yaowu@google.com>
Yi Luo <luoyi@google.com>
Yongzhe Wang <yongzhe@google.com>

View File

@@ -1,28 +1,3 @@
2017-01-04 v1.7.0 "Mandarin Duck"
This release focused on high bit depth performance (10/12 bit) and vp9
encoding improvements.
- Upgrading:
This release is ABI incompatible due to new vp9 encoder features.
Frame parallel decoding for vp9 has been removed.
- Enhancements:
vp9 encoding supports additional threads with --row-mt. This can be greater
than the number of tiles.
Two new vp9 encoder options have been added:
--corpus-complexity
--tune-content=film
Additional tooling for respecting the vp9 "level" profiles has been added.
- Bug fixes:
A variety of fuzzing issues.
vp8 threading fix for ARM.
Codec control VP9_SET_SKIP_LOOP_FILTER fixed.
Reject invalid multi resolution configurations.
2017-01-09 v1.6.1 "Long Tailed Duck"
This release improves upon the VP9 encoder and speeds up the encoding and
decoding processes.

4
README
View File

@@ -1,4 +1,4 @@
README - 24 January 2018
README - 26 January 2017
Welcome to the WebM VP8/VP9 Codec SDK!
@@ -63,8 +63,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
armv8-linux-gcc
mips32-linux-gcc
mips64-linux-gcc
ppc64-linux-gcc
ppc64le-linux-gcc
sparc-solaris-gcc
x86-android-gcc
x86-darwin8-gcc

View File

@@ -1,13 +1,4 @@
#!/usr/bin/env perl
##
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
## tree. An additional intellectual property rights grant can be found
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
no strict 'refs';
use warnings;
@@ -209,7 +200,6 @@ sub filter {
sub common_top() {
my $include_guard = uc($opts{sym})."_H_";
print <<EOF;
// This file is generated. Do not edit.
#ifndef ${include_guard}
#define ${include_guard}

View File

@@ -60,7 +60,6 @@ if [ ${bare} ]; then
echo "${changelog_version}${git_version_id}" > $$.tmp
else
cat<<EOF>$$.tmp
// This file is generated. Do not edit.
#define VERSION_MAJOR $major_version
#define VERSION_MINOR $minor_version
#define VERSION_PATCH $patch_version

2
configure vendored
View File

@@ -665,7 +665,7 @@ process_toolchain() {
gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
all_targets="${all_targets} solution"
INLINE="__inline"
INLINE="__forceinline"
;;
esac

View File

@@ -429,9 +429,8 @@ static void set_rate_control_stats(struct RateControlStats *rc,
rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl];
if (tl > 0) {
rc->layer_pfb[layer] =
1000.0 *
(cfg->layer_target_bitrate[layer] -
cfg->layer_target_bitrate[layer - 1]) /
1000.0 * (cfg->layer_target_bitrate[layer] -
cfg->layer_target_bitrate[layer - 1]) /
(rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
} else {
rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] /
@@ -574,8 +573,8 @@ void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
} else {
if (is_key_frame) {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
} else {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
@@ -589,24 +588,14 @@ void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
} else {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
if (sl == num_spatial_layers - 1)
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
}
}
if (tl == 0) {
ref_frame_config->lst_fb_idx[sl] = sl;
if (sl) {
if (is_key_frame) {
ref_frame_config->lst_fb_idx[sl] = sl - 1;
ref_frame_config->gld_fb_idx[sl] = sl;
} else {
ref_frame_config->gld_fb_idx[sl] = sl - 1;
}
} else {
if (sl)
ref_frame_config->gld_fb_idx[sl] = sl - 1;
else
ref_frame_config->gld_fb_idx[sl] = 0;
}
ref_frame_config->alt_fb_idx[sl] = 0;
} else if (tl == 1) {
ref_frame_config->lst_fb_idx[sl] = sl;
@@ -749,8 +738,6 @@ int main(int argc, const char **argv) {
// the encode for the whole superframe. The encoder will internally loop
// over all the spatial layers for the current superframe.
vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
// TODO(jianj): Fix the parameter passing for "is_key_frame" in
// set_frame_flags_bypass_model() for case of periodic key frames.
set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id,
svc_ctx.spatial_layers, frame_cnt == 0,
&ref_frame_config);

View File

@@ -26,29 +26,19 @@
#include "../tools_common.h"
#include "../video_writer.h"
#define ROI_MAP 0
#define zero(Dest) memset(&Dest, 0, sizeof(Dest));
#define VP8_ROI_MAP 0
static const char *exec_name;
void usage_exit(void) { exit(EXIT_FAILURE); }
// Denoiser states for vp8, for temporal denoising.
enum denoiserStateVp8 {
kVp8DenoiserOff,
kVp8DenoiserOnYOnly,
kVp8DenoiserOnYUV,
kVp8DenoiserOnYUVAggressive,
kVp8DenoiserOnAdaptive
};
// Denoiser states for vp9, for temporal denoising.
enum denoiserStateVp9 {
kVp9DenoiserOff,
kVp9DenoiserOnYOnly,
// For SVC: denoise the top two spatial layers.
kVp9DenoiserOnYTwoSpatialLayers
// Denoiser states, for temporal denoising.
enum denoiserState {
kDenoiserOff,
kDenoiserOnYOnly,
kDenoiserOnYUV,
kDenoiserOnYUVAggressive,
kDenoiserOnAdaptive
};
static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 };
@@ -101,10 +91,9 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
for (i = 0; i < cfg->ts_number_layers; ++i) {
if (i > 0) {
rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
rc->layer_pfb[i] =
1000.0 *
(rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
(rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
rc->layer_pfb[i] = 1000.0 * (rc->layer_target_bitrate[i] -
rc->layer_target_bitrate[i - 1]) /
(rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
}
rc->layer_input_frames[i] = 0;
rc->layer_enc_frames[i] = 0;
@@ -167,60 +156,38 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
die("Error: Number of input frames not equal to output! \n");
}
#if ROI_MAP
static void set_roi_map(const char *enc_name, vpx_codec_enc_cfg_t *cfg,
vpx_roi_map_t *roi) {
#if VP8_ROI_MAP
static void vp8_set_roi_map(vpx_codec_enc_cfg_t *cfg, vpx_roi_map_t *roi) {
unsigned int i, j;
int block_size = 0;
uint8_t is_vp8 = strncmp(enc_name, "vp8", 3) == 0 ? 1 : 0;
uint8_t is_vp9 = strncmp(enc_name, "vp9", 3) == 0 ? 1 : 0;
if (!is_vp8 && !is_vp9) {
die("unsupported codec.");
}
zero(*roi);
block_size = is_vp9 && !is_vp8 ? 8 : 16;
memset(roi, 0, sizeof(*roi));
// ROI is based on the segments (4 for vp8, 8 for vp9), smallest unit for
// segment is 16x16 for vp8, 8x8 for vp9.
roi->rows = (cfg->g_h + block_size - 1) / block_size;
roi->cols = (cfg->g_w + block_size - 1) / block_size;
roi->rows = (cfg->g_h + 15) / 16;
roi->cols = (cfg->g_w + 15) / 16;
// Applies delta QP on the segment blocks, varies from -63 to 63.
// Setting to negative means lower QP (better quality).
// Below we set delta_q to the extreme (-63) to show strong effect.
// VP8 uses the first 4 segments. VP9 uses all 8 segments.
zero(roi->delta_q);
roi->delta_q[0] = 0;
roi->delta_q[1] = -63;
roi->delta_q[2] = 0;
roi->delta_q[3] = 0;
// Applies delta loopfilter strength on the segment blocks, varies from -63 to
// 63. Setting to positive means stronger loopfilter. VP8 uses the first 4
// segments. VP9 uses all 8 segments.
zero(roi->delta_lf);
// 63. Setting to positive means stronger loopfilter.
roi->delta_lf[0] = 0;
roi->delta_lf[1] = 0;
roi->delta_lf[2] = 0;
roi->delta_lf[3] = 0;
if (is_vp8) {
// Applies skip encoding threshold on the segment blocks, varies from 0 to
// UINT_MAX. Larger value means more skipping of encoding is possible.
// This skip threshold only applies on delta frames.
zero(roi->static_threshold);
}
if (is_vp9) {
// Apply skip segment. Setting to 1 means this block will be copied from
// previous frame.
zero(roi->skip);
}
if (is_vp9) {
// Apply ref frame segment.
// -1 : Do not apply this segment.
// 0 : Froce using intra.
// 1 : Force using last.
// 2 : Force using golden.
// 3 : Force using alfref but not used in non-rd pickmode for 0 lag.
memset(roi->ref_frame, -1, sizeof(roi->ref_frame));
roi->ref_frame[1] = 1;
}
// Applies skip encoding threshold on the segment blocks, varies from 0 to
// UINT_MAX. Larger value means more skipping of encoding is possible.
// This skip threshold only applies on delta frames.
roi->static_threshold[0] = 0;
roi->static_threshold[1] = 0;
roi->static_threshold[2] = 0;
roi->static_threshold[3] = 0;
// Use 2 states: 1 is center square, 0 is the rest.
roi->roi_map =
@@ -588,7 +555,7 @@ int main(int argc, char **argv) {
int layering_mode = 0;
int layer_flags[VPX_TS_MAX_PERIODICITY] = { 0 };
int flag_periodicity = 1;
#if ROI_MAP
#if VP8_ROI_MAP
vpx_roi_map_t roi;
#endif
vpx_svc_layer_id_t layer_id = { 0, 0 };
@@ -788,11 +755,11 @@ int main(int argc, char **argv) {
if (strncmp(encoder->name, "vp8", 3) == 0) {
vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kVp8DenoiserOff);
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0);
#if ROI_MAP
set_roi_map(encoder->name, &cfg, &roi);
#if VP8_ROI_MAP
vp8_set_roi_map(&cfg, &roi);
if (vpx_codec_control(&codec, VP8E_SET_ROI_MAP, &roi))
die_codec(&codec, "Failed to set ROI map");
#endif
@@ -805,16 +772,10 @@ int main(int argc, char **argv) {
vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0);
vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kVp9DenoiserOff);
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
#if ROI_MAP
set_roi_map(encoder->name, &cfg, &roi);
if (vpx_codec_control(&codec, VP9E_SET_ROI_MAP, &roi))
die_codec(&codec, "Failed to set ROI map");
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 0);
#endif
// TODO(marpan/jianj): There is an issue with row-mt for low resolutons at
// high speed settings, disable its use for those cases for now.
if (cfg.g_threads > 1 && ((cfg.g_w > 320 && cfg.g_h > 240) || speed < 7))
@@ -942,8 +903,5 @@ int main(int argc, char **argv) {
for (i = 0; i < cfg.ts_number_layers; ++i) vpx_video_writer_close(outfile[i]);
vpx_img_free(&raw);
#if ROI_MAP
free(roi.roi_map);
#endif
return EXIT_SUCCESS;
}

View File

@@ -943,6 +943,18 @@ GENERATE_XML = NO
XML_OUTPUT = xml
# The XML_SCHEMA tag can be used to specify an XML schema,
# which can be used by a validating XML parser to check the
# syntax of the XML files.
XML_SCHEMA =
# The XML_DTD tag can be used to specify an XML DTD,
# which can be used by a validating XML parser to check the
# syntax of the XML files.
XML_DTD =
# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
# dump the program listings (including syntax highlighting
# and cross-referencing information) to the XML output. Note that

View File

@@ -233,8 +233,8 @@ OBJS-yes += $(LIBVPX_OBJS)
LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
SO_VERSION_MAJOR := 5
SO_VERSION_MINOR := 0
SO_VERSION_MAJOR := 4
SO_VERSION_MINOR := 1
SO_VERSION_PATCH := 0
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib

View File

@@ -215,7 +215,7 @@ using std::tr1::make_tuple;
#if CONFIG_VP9_ENCODER
const BlockinessParam c_vp9_tests[] = {
make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238)
make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238),
};
INSTANTIATE_TEST_CASE_P(C, BlockinessVP9Test, ::testing::ValuesIn(c_vp9_tests));
#endif

View File

@@ -205,7 +205,7 @@ using std::tr1::make_tuple;
#if CONFIG_VP9_ENCODER
const ConsistencyParam c_vp9_tests[] = {
make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238)
make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238),
};
INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test,
::testing::ValuesIn(c_vp9_tests));

View File

@@ -539,7 +539,6 @@ class DatarateTestVP9Large
denoiser_offon_test_ = 0;
denoiser_offon_period_ = -1;
frame_parallel_decoding_mode_ = 1;
use_roi_ = 0;
}
//
@@ -622,10 +621,6 @@ class DatarateTestVP9Large
encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING,
frame_parallel_decoding_mode_);
if (use_roi_) {
encoder->Control(VP9E_SET_ROI_MAP, &roi_);
}
if (cfg_.ts_number_layers > 1) {
if (video->frame() == 0) {
encoder->Control(VP9E_SET_SVC, 1);
@@ -706,8 +701,6 @@ class DatarateTestVP9Large
int denoiser_offon_test_;
int denoiser_offon_period_;
int frame_parallel_decoding_mode_;
bool use_roi_;
vpx_roi_map_t roi_;
};
// Check basic rate targeting for VBR mode with 0 lag.
@@ -1080,68 +1073,6 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
}
}
class DatarateTestVP9RealTime : public DatarateTestVP9Large {
public:
virtual ~DatarateTestVP9RealTime() {}
};
// Check VP9 region of interest feature.
TEST_P(DatarateTestVP9RealTime, RegionOfInterest) {
if (deadline_ != VPX_DL_REALTIME || set_cpu_used_ < 5) return;
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
cfg_.rc_dropframe_thresh = 0;
cfg_.rc_min_quantizer = 0;
cfg_.rc_max_quantizer = 63;
cfg_.rc_end_usage = VPX_CBR;
cfg_.g_lag_in_frames = 0;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 300);
cfg_.rc_target_bitrate = 450;
cfg_.g_w = 352;
cfg_.g_h = 288;
ResetModel();
// Set ROI parameters
use_roi_ = true;
memset(&roi_, 0, sizeof(roi_));
roi_.rows = (cfg_.g_h + 7) / 8;
roi_.cols = (cfg_.g_w + 7) / 8;
roi_.delta_q[1] = -20;
roi_.delta_lf[1] = -20;
memset(roi_.ref_frame, -1, sizeof(roi_.ref_frame));
roi_.ref_frame[1] = 1;
// Use 2 states: 1 is center square, 0 is the rest.
roi_.roi_map = reinterpret_cast<uint8_t *>(
calloc(roi_.rows * roi_.cols, sizeof(*roi_.roi_map)));
ASSERT_TRUE(roi_.roi_map != NULL);
for (unsigned int i = 0; i < roi_.rows; ++i) {
for (unsigned int j = 0; j < roi_.cols; ++j) {
if (i > (roi_.rows >> 2) && i < ((roi_.rows * 3) >> 2) &&
j > (roi_.cols >> 2) && j < ((roi_.cols * 3) >> 2)) {
roi_.roi_map[i * roi_.cols + j] = 1;
}
}
}
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_[0] * 0.90)
<< " The datarate for the file exceeds the target!";
ASSERT_LE(cfg_.rc_target_bitrate, effective_datarate_[0] * 1.4)
<< " The datarate for the file missed the target!";
free(roi_.roi_map);
}
#if CONFIG_VP9_TEMPORAL_DENOISING
class DatarateTestVP9LargeDenoiser : public DatarateTestVP9Large {
public:
@@ -1285,78 +1216,18 @@ class DatarateOnePassCbrSvc
}
virtual void ResetModel() {
last_pts_ = 0;
bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
frame_number_ = 0;
first_drop_ = 0;
bits_total_ = 0;
duration_ = 0.0;
mismatch_psnr_ = 0.0;
mismatch_nframes_ = 0;
denoiser_on_ = 0;
tune_content_ = 0;
base_speed_setting_ = 5;
spatial_layer_id_ = 0;
temporal_layer_id_ = 0;
update_pattern_ = 0;
memset(bits_in_buffer_model_, 0, sizeof(bits_in_buffer_model_));
memset(bits_total_, 0, sizeof(bits_total_));
memset(layer_target_avg_bandwidth_, 0, sizeof(layer_target_avg_bandwidth_));
dynamic_drop_layer_ = false;
}
virtual void BeginPassHook(unsigned int /*pass*/) {}
// Example pattern for spatial layers and 2 temporal layers used in the
// bypass/flexible mode. The pattern corresponds to the pattern
// VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
// non-flexible mode, except that we disable inter-layer prediction.
void set_frame_flags_bypass_mode(
int tl, int num_spatial_layers, int is_key_frame,
vpx_svc_ref_frame_config_t *ref_frame_config) {
for (int sl = 0; sl < num_spatial_layers; ++sl) {
if (!tl) {
if (!sl) {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
VP8_EFLAG_NO_UPD_ARF;
} else {
if (is_key_frame) {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
} else {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF |
VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF;
}
}
} else if (tl == 1) {
if (!sl) {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
} else {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_REF_GF;
}
}
if (tl == 0) {
ref_frame_config->lst_fb_idx[sl] = sl;
if (sl) {
if (is_key_frame) {
ref_frame_config->lst_fb_idx[sl] = sl - 1;
ref_frame_config->gld_fb_idx[sl] = sl;
} else {
ref_frame_config->gld_fb_idx[sl] = sl - 1;
}
} else {
ref_frame_config->gld_fb_idx[sl] = 0;
}
ref_frame_config->alt_fb_idx[sl] = 0;
} else if (tl == 1) {
ref_frame_config->lst_fb_idx[sl] = sl;
ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1;
ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl;
}
}
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if (video->frame() == 0) {
@@ -1381,137 +1252,36 @@ class DatarateOnePassCbrSvc
encoder->Control(VP8E_SET_STATIC_THRESHOLD, 1);
encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_);
}
if (update_pattern_ && video->frame() >= 100) {
vpx_svc_layer_id_t layer_id;
if (video->frame() == 100) {
cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
encoder->Config(&cfg_);
}
// Set layer id since the pattern changed.
layer_id.spatial_layer_id = 0;
layer_id.temporal_layer_id = (video->frame() % 2 != 0);
encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
set_frame_flags_bypass_mode(layer_id.temporal_layer_id,
number_spatial_layers_, 0, &ref_frame_config);
encoder->Control(VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config);
}
if (dynamic_drop_layer_) {
if (video->frame() == 100) {
// Change layer bitrates to set top layer to 0. This will trigger skip
// encoding/dropping of top spatial layer.
cfg_.rc_target_bitrate -= cfg_.layer_target_bitrate[2];
cfg_.layer_target_bitrate[2] = 0;
encoder->Config(&cfg_);
} else if (video->frame() == 300) {
// Change layer bitrate on top layer to non-zero to start encoding it
// again.
cfg_.layer_target_bitrate[2] = 500;
cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[2];
encoder->Config(&cfg_);
}
}
const vpx_rational_t tb = video->timebase();
timebase_ = static_cast<double>(tb.num) / tb.den;
duration_ = 0;
}
virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {
vpx_svc_layer_id_t layer_id;
encoder->Control(VP9E_GET_SVC_LAYER_ID, &layer_id);
spatial_layer_id_ = layer_id.spatial_layer_id;
temporal_layer_id_ = layer_id.temporal_layer_id;
// Update buffer with per-layer target frame bandwidth, this is done
// for every frame passed to the encoder (encoded or dropped).
// For temporal layers, update the cumulative buffer level.
for (int sl = 0; sl < number_spatial_layers_; ++sl) {
for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) {
const int layer = sl * number_temporal_layers_ + tl;
bits_in_buffer_model_[layer] +=
static_cast<int64_t>(layer_target_avg_bandwidth_[layer]);
}
}
}
vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz,
uint32_t sizes[8], int *count) {
uint8_t marker;
marker = *(data + data_sz - 1);
*count = 0;
if ((marker & 0xe0) == 0xc0) {
const uint32_t frames = (marker & 0x7) + 1;
const uint32_t mag = ((marker >> 3) & 0x3) + 1;
const size_t index_sz = 2 + mag * frames;
// This chunk is marked as having a superframe index but doesn't have
// enough data for it, thus it's an invalid superframe index.
if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME;
{
const uint8_t marker2 = *(data + data_sz - index_sz);
// This chunk is marked as having a superframe index but doesn't have
// the matching marker byte at the front of the index therefore it's an
// invalid chunk.
if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME;
}
{
uint32_t i, j;
const uint8_t *x = &data[data_sz - index_sz + 1];
for (i = 0; i < frames; ++i) {
uint32_t this_sz = 0;
for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8);
sizes[i] = this_sz;
}
*count = frames;
}
}
return VPX_CODEC_OK;
}
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
uint32_t sizes[8] = { 0 };
int count = 0;
last_pts_ = pkt->data.frame.pts;
vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
if (last_pts_ == 0) duration = 1;
bits_in_buffer_model_ += static_cast<int64_t>(
duration * timebase_ * cfg_.rc_target_bitrate * 1000);
const bool key_frame =
(pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
parse_superframe_index(static_cast<const uint8_t *>(pkt->data.frame.buf),
pkt->data.frame.sz, sizes, &count);
if (!dynamic_drop_layer_) ASSERT_EQ(count, number_spatial_layers_);
for (int sl = 0; sl < number_spatial_layers_; ++sl) {
sizes[sl] = sizes[sl] << 3;
// Update the total encoded bits per layer.
// For temporal layers, update the cumulative encoded bits per layer.
for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) {
const int layer = sl * number_temporal_layers_ + tl;
bits_total_[layer] += static_cast<int64_t>(sizes[sl]);
// Update the per-layer buffer level with the encoded frame size.
bits_in_buffer_model_[layer] -= static_cast<int64_t>(sizes[sl]);
// There should be no buffer underrun, except on the base
// temporal layer, since there may be key frames there.
if (!key_frame && tl > 0) {
ASSERT_GE(bits_in_buffer_model_[layer], 0)
<< "Buffer Underrun at frame " << pkt->data.frame.pts;
}
}
ASSERT_EQ(pkt->data.frame.width[sl],
top_sl_width_ * svc_params_.scaling_factor_num[sl] /
svc_params_.scaling_factor_den[sl]);
ASSERT_EQ(pkt->data.frame.height[sl],
top_sl_height_ * svc_params_.scaling_factor_num[sl] /
svc_params_.scaling_factor_den[sl]);
if (!key_frame) {
// TODO(marpan): This check currently fails for some of the SVC tests,
// re-enable when issue (webm:1350) is resolved.
// ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
// << pkt->data.frame.pts;
}
const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
bits_in_buffer_model_ -= static_cast<int64_t>(frame_size_in_bits);
bits_total_ += frame_size_in_bits;
if (!first_drop_ && duration > 1) first_drop_ = last_pts_ + 1;
last_pts_ = pkt->data.frame.pts;
bits_in_last_frame_ = frame_size_in_bits;
++frame_number_;
}
virtual void EndPassHook(void) {
for (int sl = 0; sl < number_spatial_layers_; ++sl) {
for (int tl = 0; tl < number_temporal_layers_; ++tl) {
const int layer = sl * number_temporal_layers_ + tl;
const double file_size_in_kb = bits_total_[layer] / 1000.;
duration_ = (last_pts_ + 1) * timebase_;
file_datarate_[layer] = file_size_in_kb / duration_;
}
if (bits_total_) {
const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit
duration_ = (last_pts_ + 1) * timebase_;
file_datarate_ = file_size_in_kb / duration_;
}
}
@@ -1524,11 +1294,13 @@ class DatarateOnePassCbrSvc
unsigned int GetMismatchFrames() { return mismatch_nframes_; }
vpx_codec_pts_t last_pts_;
int64_t bits_in_buffer_model_[VPX_MAX_LAYERS];
int64_t bits_in_buffer_model_;
double timebase_;
int64_t bits_total_[VPX_MAX_LAYERS];
int frame_number_;
vpx_codec_pts_t first_drop_;
int64_t bits_total_;
double duration_;
double file_datarate_[VPX_MAX_LAYERS];
double file_datarate_;
size_t bits_in_last_frame_;
vpx_svc_extra_cfg_t svc_params_;
int speed_setting_;
@@ -1537,27 +1309,14 @@ class DatarateOnePassCbrSvc
int denoiser_on_;
int tune_content_;
int base_speed_setting_;
int spatial_layer_id_;
int temporal_layer_id_;
int number_spatial_layers_;
int number_temporal_layers_;
int layer_target_avg_bandwidth_[VPX_MAX_LAYERS];
bool dynamic_drop_layer_;
unsigned int top_sl_width_;
unsigned int top_sl_height_;
vpx_svc_ref_frame_config_t ref_frame_config;
int update_pattern_;
};
static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
const vpx_svc_extra_cfg_t *svc_params,
int spatial_layers, int temporal_layers,
int temporal_layering_mode,
int *layer_target_avg_bandwidth,
int64_t *bits_in_buffer_model) {
int temporal_layering_mode) {
int sl, spatial_layer_target;
float total = 0;
float alloc_ratio[VPX_MAX_LAYERS] = { 0 };
float framerate = 30.0;
for (sl = 0; sl < spatial_layers; ++sl) {
if (svc_params->scaling_factor_den[sl] > 0) {
alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] * 1.0 /
@@ -1577,41 +1336,8 @@ static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
} else if (temporal_layering_mode == 2) {
enc_cfg->layer_target_bitrate[index] = spatial_layer_target * 2 / 3;
enc_cfg->layer_target_bitrate[index + 1] = spatial_layer_target;
} else if (temporal_layering_mode <= 1) {
enc_cfg->layer_target_bitrate[index] = spatial_layer_target;
}
}
for (sl = 0; sl < spatial_layers; ++sl) {
for (int tl = 0; tl < temporal_layers; ++tl) {
const int layer = sl * temporal_layers + tl;
float layer_framerate = framerate;
if (temporal_layers == 2 && tl == 0) layer_framerate = framerate / 2;
if (temporal_layers == 3 && tl == 0) layer_framerate = framerate / 4;
if (temporal_layers == 3 && tl == 1) layer_framerate = framerate / 2;
layer_target_avg_bandwidth[layer] = static_cast<int>(
enc_cfg->layer_target_bitrate[layer] * 1000.0 / layer_framerate);
bits_in_buffer_model[layer] =
enc_cfg->layer_target_bitrate[layer] * enc_cfg->rc_buf_initial_sz;
}
}
}
static void CheckLayerRateTargeting(vpx_codec_enc_cfg_t *const cfg,
int number_spatial_layers,
int number_temporal_layers,
double *file_datarate,
double thresh_overshoot,
double thresh_undershoot) {
for (int sl = 0; sl < number_spatial_layers; ++sl)
for (int tl = 0; tl < number_temporal_layers; ++tl) {
const int layer = sl * number_temporal_layers + tl;
ASSERT_GE(cfg->layer_target_bitrate[layer],
file_datarate[layer] * thresh_overshoot)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg->layer_target_bitrate[layer],
file_datarate[layer] * thresh_undershoot)
<< " The datarate for the file is lower than the target by too much!";
}
}
// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1
@@ -1637,21 +1363,14 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TLScreenContent1) {
svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 10;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
top_sl_width_ = 1280;
top_sl_height_ = 720;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
cfg_.rc_target_bitrate = 500;
ResetModel();
tune_content_ = 1;
base_speed_setting_ = speed_setting_;
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
}
@@ -1679,30 +1398,26 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL) {
svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 200);
// TODO(marpan): Check that effective_datarate for each layer hits the
// layer target_bitrate.
for (int i = 200; i <= 800; i += 200) {
cfg_.rc_target_bitrate = i;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78,
1.15);
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 200 (half of the sequence)
// Since frame dropper is off, we can expcet 100 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
EXPECT_EQ(static_cast<unsigned int>(100), GetMismatchFrames());
#endif
}
}
@@ -1731,43 +1446,33 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLDenoiserOn) {
svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
// TODO(marpan): Check that effective_datarate for each layer hits the
// layer target_bitrate.
// For SVC, noise_sen = 1 means denoising only the top spatial layer
// noise_sen = 2 means denoising the two top spatial layers.
for (int noise_sen = 1; noise_sen <= 2; noise_sen++) {
for (int i = 600; i <= 1000; i += 200) {
cfg_.rc_target_bitrate = i;
ResetModel();
denoiser_on_ = noise_sen;
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78,
1.15);
for (int i = 600; i <= 1000; i += 200) {
cfg_.rc_target_bitrate = i;
ResetModel();
denoiser_on_ = 1;
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC
// pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 200 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
// Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expcet 150 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
#endif
}
}
}
// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 3
// temporal layers. Run CIF clip with 1 thread, and few short key frame periods.
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLSmallKf) {
TEST_P(DatarateOnePassCbrSvc, DISABLED_OnePassCbrSvc2SL3TLSmallKf) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
@@ -1788,25 +1493,21 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLSmallKf) {
svc_params_.scaling_factor_num[1] = 288;
svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 10;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 200);
cfg_.rc_target_bitrate = 400;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
// For this 3 temporal layer case, pattern repeats every 4 frames, so choose
// 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
for (int j = 64; j <= 67; j++) {
cfg_.kf_max_dist = j;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78,
1.15);
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.80)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
}
}
@@ -1834,25 +1535,22 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL4Threads) {
svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
top_sl_width_ = 1280;
top_sl_height_ = 720;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
cfg_.rc_target_bitrate = 800;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 30 (half of the sequence)
// Since frame dropper is off, we can expcet 150 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(30), GetMismatchFrames());
EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
#endif
}
@@ -1882,126 +1580,25 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL) {
svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
cfg_.rc_target_bitrate = 800;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 200 (half of the sequence)
// Since frame dropper is off, we can expcet 150 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
#endif
}
// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
// 2 temporal layers, with a change on the fly from the fixed SVC pattern to one
// generate via SVC_SET_REF_FRAME_CONFIG. The new pattern also disables
// inter-layer prediction.
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL2TLDynamicPatternChange) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
cfg_.rc_min_quantizer = 0;
cfg_.rc_max_quantizer = 63;
cfg_.rc_end_usage = VPX_CBR;
cfg_.g_lag_in_frames = 0;
cfg_.ss_number_layers = 3;
cfg_.ts_number_layers = 2;
cfg_.ts_rate_decimator[0] = 2;
cfg_.ts_rate_decimator[1] = 1;
cfg_.g_error_resilient = 1;
cfg_.g_threads = 1;
cfg_.temporal_layering_mode = 2;
svc_params_.scaling_factor_num[0] = 72;
svc_params_.scaling_factor_den[0] = 288;
svc_params_.scaling_factor_num[1] = 144;
svc_params_.scaling_factor_den[1] = 288;
svc_params_.scaling_factor_num[2] = 288;
svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
// Change SVC pattern on the fly.
update_pattern_ = 1;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
cfg_.rc_target_bitrate = 800;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
#if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 200 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
#endif
}
// Check basic rate targeting for 1 pass CBR SVC with 3 spatial layers and on
// the fly switching to 2 spatial layers and then back to 3. This switch is done
// by setting top spatial layer bitrate to 0, and then back to non-zero, during
// the sequence.
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL_to_2SL_dynamic) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
cfg_.rc_min_quantizer = 0;
cfg_.rc_max_quantizer = 63;
cfg_.rc_end_usage = VPX_CBR;
cfg_.g_lag_in_frames = 0;
cfg_.ss_number_layers = 3;
cfg_.ts_number_layers = 1;
cfg_.ts_rate_decimator[0] = 1;
cfg_.g_error_resilient = 1;
cfg_.g_threads = 1;
cfg_.temporal_layering_mode = 0;
svc_params_.scaling_factor_num[0] = 72;
svc_params_.scaling_factor_den[0] = 288;
svc_params_.scaling_factor_num[1] = 144;
svc_params_.scaling_factor_den[1] = 288;
svc_params_.scaling_factor_num[2] = 288;
svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
cfg_.rc_target_bitrate = 800;
ResetModel();
dynamic_drop_layer_ = true;
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
// Don't check rate targeting on top spatial layer since it will be skipped
// for part of the sequence.
CheckLayerRateTargeting(&cfg_, number_spatial_layers_ - 1,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
}
// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3
// temporal layers. Run CIF clip with 1 thread, and few short key frame periods.
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) {
@@ -2027,25 +1624,20 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) {
svc_params_.scaling_factor_num[2] = 288;
svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 10;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
cfg_.rc_target_bitrate = 800;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
top_sl_height_ = 480;
// For this 3 temporal layer case, pattern repeats every 4 frames, so choose
// 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
for (int j = 32; j <= 35; j++) {
cfg_.kf_max_dist = j;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78,
1.15);
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.80)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.30)
<< " The datarate for the file is lower than the target by too much!";
}
}
@@ -2075,25 +1667,22 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4threads) {
svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
top_sl_width_ = 1280;
top_sl_height_ = 720;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
cfg_.rc_target_bitrate = 800;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 30 (half of the sequence)
// Since frame dropper is off, we can expcet 150 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(30), GetMismatchFrames());
EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
#endif
}
@@ -2125,21 +1714,9 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TL5x5MultipleRuns) {
cfg_.layer_target_bitrate[0] = 300;
cfg_.layer_target_bitrate[1] = 1400;
cfg_.rc_target_bitrate = 1700;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
ResetModel();
layer_target_avg_bandwidth_[0] = cfg_.layer_target_bitrate[0] * 1000 / 30;
bits_in_buffer_model_[0] =
cfg_.layer_target_bitrate[0] * cfg_.rc_buf_initial_sz;
layer_target_avg_bandwidth_[1] = cfg_.layer_target_bitrate[1] * 1000 / 30;
bits_in_buffer_model_[1] =
cfg_.layer_target_bitrate[1] * cfg_.rc_buf_initial_sz;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
top_sl_width_ = 1280;
top_sl_height_ = 720;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
}
@@ -2152,9 +1729,6 @@ VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
::testing::Values(::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
::testing::Range(2, 9));
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime,
::testing::Values(::libvpx_test::kRealTime),
::testing::Range(5, 9));
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser,
::testing::Values(::libvpx_test::kRealTime),

View File

@@ -28,8 +28,8 @@
using libvpx_test::ACMRandom;
using libvpx_test::Buffer;
using std::tr1::make_tuple;
using std::tr1::tuple;
using std::tr1::make_tuple;
namespace {
typedef void (*PartialFdctFunc)(const int16_t *in, tran_low_t *out, int stride);

File diff suppressed because it is too large Load Diff

View File

@@ -106,90 +106,4 @@ TEST(EncodeAPI, ImageSizeSetting) {
}
#endif
// Set up 2 spatial streams with 2 temporal layers per stream, and generate
// invalid configuration by setting the temporal layer rate allocation
// (ts_target_bitrate[]) to 0 for both layers. This should fail independent of
// CONFIG_MULTI_RES_ENCODING.
TEST(EncodeAPI, MultiResEncode) {
static const vpx_codec_iface_t *kCodecs[] = {
#if CONFIG_VP8_ENCODER
&vpx_codec_vp8_cx_algo,
#endif
#if CONFIG_VP9_ENCODER
&vpx_codec_vp9_cx_algo,
#endif
};
const int width = 1280;
const int height = 720;
const int width_down = width / 2;
const int height_down = height / 2;
const int target_bitrate = 1000;
const int framerate = 30;
for (int c = 0; c < NELEMENTS(kCodecs); ++c) {
const vpx_codec_iface_t *const iface = kCodecs[c];
vpx_codec_ctx_t enc[2];
vpx_codec_enc_cfg_t cfg[2];
vpx_rational_t dsf[2] = { { 2, 1 }, { 2, 1 } };
memset(enc, 0, sizeof(enc));
for (int i = 0; i < 2; i++) {
vpx_codec_enc_config_default(iface, &cfg[i], 0);
}
/* Highest-resolution encoder settings */
cfg[0].g_w = width;
cfg[0].g_h = height;
cfg[0].rc_dropframe_thresh = 0;
cfg[0].rc_end_usage = VPX_CBR;
cfg[0].rc_resize_allowed = 0;
cfg[0].rc_min_quantizer = 2;
cfg[0].rc_max_quantizer = 56;
cfg[0].rc_undershoot_pct = 100;
cfg[0].rc_overshoot_pct = 15;
cfg[0].rc_buf_initial_sz = 500;
cfg[0].rc_buf_optimal_sz = 600;
cfg[0].rc_buf_sz = 1000;
cfg[0].g_error_resilient = 1; /* Enable error resilient mode */
cfg[0].g_lag_in_frames = 0;
cfg[0].kf_mode = VPX_KF_AUTO;
cfg[0].kf_min_dist = 3000;
cfg[0].kf_max_dist = 3000;
cfg[0].rc_target_bitrate = target_bitrate; /* Set target bitrate */
cfg[0].g_timebase.num = 1; /* Set fps */
cfg[0].g_timebase.den = framerate;
memcpy(&cfg[1], &cfg[0], sizeof(cfg[0]));
cfg[1].rc_target_bitrate = 500;
cfg[1].g_w = width_down;
cfg[1].g_h = height_down;
for (int i = 0; i < 2; i++) {
cfg[i].ts_number_layers = 2;
cfg[i].ts_periodicity = 2;
cfg[i].ts_rate_decimator[0] = 2;
cfg[i].ts_rate_decimator[1] = 1;
cfg[i].ts_layer_id[0] = 0;
cfg[i].ts_layer_id[1] = 1;
// Invalid parameters.
cfg[i].ts_target_bitrate[0] = 0;
cfg[i].ts_target_bitrate[1] = 0;
}
// VP9 should report incapable, VP8 invalid for all configurations.
const char kVP9Name[] = "WebM Project VP9";
const bool is_vp9 = strncmp(kVP9Name, vpx_codec_iface_name(iface),
sizeof(kVP9Name) - 1) == 0;
EXPECT_EQ(is_vp9 ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM,
vpx_codec_enc_init_multi(&enc[0], iface, &cfg[0], 2, 0, &dsf[0]));
for (int i = 0; i < 2; i++) {
vpx_codec_destroy(&enc[i]);
}
}
}
} // namespace

View File

@@ -201,8 +201,6 @@ void EncoderTest::RunLoop(VideoSource *video) {
PreEncodeFrameHook(video, encoder.get());
encoder->EncodeFrame(video, frame_flags_);
PostEncodeFrameHook(encoder.get());
CxDataIterator iter = encoder->GetCxData();
bool has_cxdata = false;

View File

@@ -128,11 +128,6 @@ class Encoder {
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
void Control(int ctrl_id, struct vpx_svc_ref_frame_config *arg) {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
void Control(int ctrl_id, struct vpx_svc_parameters *arg) {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
@@ -142,12 +137,15 @@ class Encoder {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
#endif
#if CONFIG_VP8_ENCODER
void Control(int ctrl_id, vpx_roi_map_t *arg) {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
#endif
void Config(const vpx_codec_enc_cfg_t *cfg) {
const vpx_codec_err_t res = vpx_codec_enc_config_set(&encoder_, cfg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
@@ -221,8 +219,6 @@ class EncoderTest {
virtual void PreEncodeFrameHook(VideoSource * /*video*/,
Encoder * /*encoder*/) {}
virtual void PostEncodeFrameHook(Encoder * /*encoder*/) {}
// Hook to be called on every compressed data packet.
virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}

View File

@@ -675,9 +675,7 @@ INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
::testing::Values(make_tuple(&vpx_fdct8x8_neon,
&vpx_idct8x8_64_add_neon,
0, VPX_BITS_8)));
// TODO(linfengz): reenable these functions once test vector failures are
// addressed.
#if 0 // !CONFIG_VP9_HIGHBITDEPTH
#if !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
NEON, FwdTrans8x8HT,
::testing::Values(

View File

@@ -174,4 +174,4 @@ INSTANTIATE_TEST_CASE_P(MSA, IDCTTest,
INSTANTIATE_TEST_CASE_P(MMI, IDCTTest,
::testing::Values(vp8_short_idct4x4llm_mmi));
#endif // HAVE_MMI
} // namespace
}

View File

@@ -123,7 +123,6 @@ TEST_P(InvalidFileTest, ReturnCode) { RunTest(); }
#if CONFIG_VP8_DECODER
const DecodeParam kVP8InvalidFileTests[] = {
{ 1, "invalid-bug-1443.ivf" },
{ 1, "invalid-token-partition.ivf" },
};
VP8_INSTANTIATE_TEST_CASE(InvalidFileTest,

View File

@@ -114,18 +114,6 @@ void InitInput(Pixel *s, Pixel *ref_s, ACMRandom *rnd, const uint8_t limit,
}
}
uint8_t GetOuterThresh(ACMRandom *rnd) {
return static_cast<uint8_t>(rnd->RandRange(3 * MAX_LOOP_FILTER + 5));
}
uint8_t GetInnerThresh(ACMRandom *rnd) {
return static_cast<uint8_t>(rnd->RandRange(MAX_LOOP_FILTER + 1));
}
uint8_t GetHevThresh(ACMRandom *rnd) {
return static_cast<uint8_t>(rnd->RandRange(MAX_LOOP_FILTER + 1) >> 4);
}
class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
public:
virtual ~Loop8Test6Param() {}
@@ -174,15 +162,15 @@ TEST_P(Loop8Test6Param, OperationCheck) {
int first_failure = -1;
for (int i = 0; i < count_test_block; ++i) {
int err_count = 0;
uint8_t tmp = GetOuterThresh(&rnd);
uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t,
blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t,
limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd);
tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t,
thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -233,15 +221,15 @@ TEST_P(Loop8Test6Param, ValueCheck) {
for (int i = 0; i < count_test_block; ++i) {
int err_count = 0;
uint8_t tmp = GetOuterThresh(&rnd);
uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t,
blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t,
limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd);
tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t,
thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -283,27 +271,27 @@ TEST_P(Loop8Test9Param, OperationCheck) {
int first_failure = -1;
for (int i = 0; i < count_test_block; ++i) {
int err_count = 0;
uint8_t tmp = GetOuterThresh(&rnd);
uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t,
blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t,
limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd);
tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t,
thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetOuterThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t,
blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t,
limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd);
tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t,
thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -346,27 +334,27 @@ TEST_P(Loop8Test9Param, ValueCheck) {
int first_failure = -1;
for (int i = 0; i < count_test_block; ++i) {
int err_count = 0;
uint8_t tmp = GetOuterThresh(&rnd);
uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t,
blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t,
limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd);
tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t,
thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetOuterThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t,
blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t,
limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd);
tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t,
thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };

View File

@@ -277,29 +277,12 @@ class ResizeTest
SetMode(GET_PARAM(1));
}
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
ASSERT_NE(static_cast<int>(pkt->data.frame.width[0]), 0);
ASSERT_NE(static_cast<int>(pkt->data.frame.height[0]), 0);
encode_frame_width_.push_back(pkt->data.frame.width[0]);
encode_frame_height_.push_back(pkt->data.frame.height[0]);
}
unsigned int GetFrameWidth(size_t idx) const {
return encode_frame_width_[idx];
}
unsigned int GetFrameHeight(size_t idx) const {
return encode_frame_height_[idx];
}
virtual void DecompressedFrameHook(const vpx_image_t &img,
vpx_codec_pts_t pts) {
frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
}
std::vector<FrameInfo> frame_info_list_;
std::vector<unsigned int> encode_frame_width_;
std::vector<unsigned int> encode_frame_height_;
};
TEST_P(ResizeTest, TestExternalResizeWorks) {
@@ -313,9 +296,6 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
const unsigned int frame = static_cast<unsigned>(info->pts);
unsigned int expected_w;
unsigned int expected_h;
const size_t idx = info - frame_info_list_.begin();
ASSERT_EQ(info->w, GetFrameWidth(idx));
ASSERT_EQ(info->h, GetFrameHeight(idx));
ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
&expected_h, 0);
EXPECT_EQ(expected_w, info->w)
@@ -484,23 +464,8 @@ class ResizeRealtimeTest
++mismatch_nframes_;
}
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
ASSERT_NE(static_cast<int>(pkt->data.frame.width[0]), 0);
ASSERT_NE(static_cast<int>(pkt->data.frame.height[0]), 0);
encode_frame_width_.push_back(pkt->data.frame.width[0]);
encode_frame_height_.push_back(pkt->data.frame.height[0]);
}
unsigned int GetMismatchFrames() { return mismatch_nframes_; }
unsigned int GetFrameWidth(size_t idx) const {
return encode_frame_width_[idx];
}
unsigned int GetFrameHeight(size_t idx) const {
return encode_frame_height_[idx];
}
void DefaultConfig() {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 600;
@@ -528,8 +493,6 @@ class ResizeRealtimeTest
bool change_bitrate_;
double mismatch_psnr_;
int mismatch_nframes_;
std::vector<unsigned int> encode_frame_width_;
std::vector<unsigned int> encode_frame_height_;
};
TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
@@ -619,9 +582,6 @@ TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
int resize_count = 0;
for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
info != frame_info_list_.end(); ++info) {
const size_t idx = info - frame_info_list_.begin();
ASSERT_EQ(info->w, GetFrameWidth(idx));
ASSERT_EQ(info->h, GetFrameHeight(idx));
if (info->w != last_w || info->h != last_h) {
resize_count++;
if (resize_count == 1) {

View File

@@ -112,9 +112,8 @@ INSTANTIATE_TEST_CASE_P(
#endif // HAVE_SSE2
#if HAVE_MSA
INSTANTIATE_TEST_CASE_P(
MSA, SumSquaresTest,
::testing::Values(make_tuple(&vpx_sum_squares_2d_i16_c,
&vpx_sum_squares_2d_i16_msa)));
INSTANTIATE_TEST_CASE_P(MSA, SumSquaresTest, ::testing::Values(make_tuple(
&vpx_sum_squares_2d_i16_c,
&vpx_sum_squares_2d_i16_msa)));
#endif // HAVE_MSA
} // namespace

View File

@@ -734,8 +734,6 @@ endif # CONFIG_VP9_HIGHBITDEPTH
# Invalid files for testing libvpx error checking.
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm

View File

@@ -852,7 +852,5 @@ e402cbbf9e550ae017a1e9f1f73931c1d18474e8 *invalid-crbug-667044.webm
d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-crbug-667044.webm.res
fd9df7f3f6992af1d7a9dde975c9a0d6f28c053d *invalid-bug-1443.ivf
fd3020fa6e9ca5966206738654c97dec313b0a95 *invalid-bug-1443.ivf.res
1a0e405606939f2febab1a21b30c37cb8f2c8cb1 *invalid-token-partition.ivf
90a8a95e7024f015b87f5483a65036609b3d1b74 *invalid-token-partition.ivf.res
17696cd21e875f1d6e5d418cbf89feab02c8850a *vp90-2-22-svc_1280x720_1.webm
e2f9e1e47a791b4e939a9bdc50bf7a25b3761f77 *vp90-2-22-svc_1280x720_1.webm.md5

View File

@@ -61,6 +61,7 @@ int main(int argc, char **argv) {
#if !CONFIG_SHARED
// Shared library builds don't support whitebox tests
// that exercise internal symbols.
#if CONFIG_VP8
vp8_rtcd();
#endif // CONFIG_VP8

View File

@@ -27,8 +27,8 @@
namespace {
using libvpx_test::ACMRandom;
using std::string;
using libvpx_test::ACMRandom;
#if CONFIG_WEBM_IO

View File

@@ -59,7 +59,7 @@ const TestVideoParam kTestVectors[] = {
// Encoding modes tested
const libvpx_test::TestMode kEncodingModeVectors[] = {
::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime
::libvpx_test::kRealTime,
};
// Speed settings tested

View File

@@ -22,7 +22,7 @@ namespace {
// Encoding modes
const libvpx_test::TestMode kEncodingModeVectors[] = {
::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime
::libvpx_test::kRealTime,
};
// Encoding speeds

View File

@@ -14,9 +14,9 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "./vp9_rtcd.h"
#include "test/acm_random.h"
#include "test/buffer.h"
#include "test/clear_system_state.h"
@@ -42,7 +42,7 @@ typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
uint16_t *eob, const int16_t *scan,
const int16_t *iscan);
typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
int /*max_size*/, bool /*is_fp*/>
int /*max_size*/>
QuantizeParam;
// Wrapper for FP version which does not use zbin or quant_shift.
@@ -69,15 +69,11 @@ void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
class VP9QuantizeBase {
public:
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
: bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) {
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size)
: bit_depth_(bit_depth), max_size_(max_size) {
max_value_ = (1 << bit_depth_) - 1;
zbin_ptr_ =
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
round_fp_ptr_ = reinterpret_cast<int16_t *>(
vpx_memalign(16, 8 * sizeof(*round_fp_ptr_)));
quant_fp_ptr_ = reinterpret_cast<int16_t *>(
vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_)));
round_ptr_ =
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*round_ptr_)));
quant_ptr_ =
@@ -90,15 +86,11 @@ class VP9QuantizeBase {
~VP9QuantizeBase() {
vpx_free(zbin_ptr_);
vpx_free(round_fp_ptr_);
vpx_free(quant_fp_ptr_);
vpx_free(round_ptr_);
vpx_free(quant_ptr_);
vpx_free(quant_shift_ptr_);
vpx_free(dequant_ptr_);
zbin_ptr_ = NULL;
round_fp_ptr_ = NULL;
quant_fp_ptr_ = NULL;
round_ptr_ = NULL;
quant_ptr_ = NULL;
quant_shift_ptr_ = NULL;
@@ -108,8 +100,6 @@ class VP9QuantizeBase {
protected:
int16_t *zbin_ptr_;
int16_t *round_fp_ptr_;
int16_t *quant_fp_ptr_;
int16_t *round_ptr_;
int16_t *quant_ptr_;
int16_t *quant_shift_ptr_;
@@ -117,136 +107,29 @@ class VP9QuantizeBase {
const vpx_bit_depth_t bit_depth_;
int max_value_;
const int max_size_;
const bool is_fp_;
};
class VP9QuantizeTest : public VP9QuantizeBase,
public ::testing::TestWithParam<QuantizeParam> {
public:
VP9QuantizeTest()
: VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)),
quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
: VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3)), quantize_op_(GET_PARAM(0)),
ref_quantize_op_(GET_PARAM(1)) {}
protected:
const QuantizeFunc quantize_op_;
const QuantizeFunc ref_quantize_op_;
};
// This quantizer compares the AC coefficients to the quantization step size to
// determine if further multiplication operations are needed.
// Based on vp9_quantize_fp_sse2().
inline void quant_fp_nz(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan, int is_32x32) {
int i, eob = -1;
const int thr = dequant_ptr[1] >> (1 + is_32x32);
(void)iscan;
(void)skip_block;
assert(!skip_block);
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
for (i = 0; i < n_coeffs; i += 16) {
int y;
int nzflag_cnt = 0;
int abs_coeff[16];
int coeff_sign[16];
// count nzflag for each row (16 tran_low_t)
for (y = 0; y < 16; ++y) {
const int rc = i + y;
const int coeff = coeff_ptr[rc];
coeff_sign[y] = (coeff >> 31);
abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y];
// The first 16 are skipped in the sse2 code. Do the same here to match.
if (i >= 16 && (abs_coeff[y] <= thr)) {
nzflag_cnt++;
}
}
for (y = 0; y < 16; ++y) {
const int rc = i + y;
// If all of the AC coeffs in a row has magnitude less than the
// quantization step_size/2, quantize to zero.
if (nzflag_cnt < 16) {
int tmp;
int _round;
if (is_32x32) {
_round = ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
} else {
_round = round_ptr[rc != 0];
}
tmp = clamp(abs_coeff[y] + _round, INT16_MIN, INT16_MAX);
tmp = (tmp * quant_ptr[rc != 0]) >> (16 - is_32x32);
qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y];
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
if (is_32x32) {
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
} else {
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
}
} else {
qcoeff_ptr[rc] = 0;
dqcoeff_ptr[rc] = 0;
}
}
}
// Scan for eob.
for (i = 0; i < n_coeffs; i++) {
// Use the scan order to find the correct eob.
const int rc = scan[i];
if (qcoeff_ptr[rc]) {
eob = i;
}
}
*eob_ptr = eob + 1;
}
void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan) {
quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 0);
}
void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan) {
quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 1);
}
void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
int16_t *quant, int16_t *quant_shift,
int16_t *dequant, int16_t *round_fp,
int16_t *quant_fp) {
// Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
const int max_qrounding_factor_fp = 64;
int16_t *dequant) {
for (int j = 0; j < 2; j++) {
// The range is 4 to 1828 in the VP9 tables.
const int qlookup = rnd->RandRange(1825) + 4;
round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7;
quant_fp[j] = (1 << 16) / qlookup;
// Values determined by deconstructing vp9_init_quantizer().
// zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
// values or U/V values of any bit depth. This is because y_delta is not
// factored into the vp9_ac_quant() call.
zbin[j] = rnd->RandRange(1200);
// round may be up to 685 for Y values or 914 for U/V.
round[j] = rnd->RandRange(914);
// quant ranges from 1 to -32703
@@ -258,8 +141,6 @@ void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
}
for (int j = 2; j < 8; j++) {
zbin[j] = zbin[1];
round_fp[j] = round_fp[1];
quant_fp[j] = quant_fp[1];
round[j] = round[1];
quant[j] = quant[1];
quant_shift[j] = quant_shift[1];
@@ -298,19 +179,19 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
const int count = (4 << sz) * (4 << sz);
coeff.Set(&rnd, -max_value_, max_value_);
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
quant_fp_ptr_);
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
scan_order->scan, scan_order->iscan);
quant_shift_ptr_, dequant_ptr_);
ASM_REGISTER_STATE_CHECK(quantize_op_(
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
round_ptr_, quant_ptr_, quant_shift_ptr_,
ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
dequant_ptr_, &ref_eob, scan_order->scan,
scan_order->iscan);
ASM_REGISTER_STATE_CHECK(
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
round_ptr_, quant_ptr_, quant_shift_ptr_,
qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
@@ -360,19 +241,19 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
coeff.TopLeftPixel()[rnd(count)] =
static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
quant_fp_ptr_);
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
scan_order->scan, scan_order->iscan);
quant_shift_ptr_, dequant_ptr_);
ASM_REGISTER_STATE_CHECK(quantize_op_(
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
round_ptr_, quant_ptr_, quant_shift_ptr_,
ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
dequant_ptr_, &ref_eob, scan_order->scan,
scan_order->iscan);
ASM_REGISTER_STATE_CHECK(
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
round_ptr_, quant_ptr_, quant_shift_ptr_,
qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
@@ -418,10 +299,7 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
const int count = (4 << sz) * (4 << sz);
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
quant_fp_ptr_);
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
quant_shift_ptr_, dequant_ptr_);
if (i == 0) {
// When |coeff values| are less than zbin the results are 0.
@@ -441,10 +319,10 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
vpx_usec_timer timer;
vpx_usec_timer_start(&timer);
for (int j = 0; j < 100000000 / count; ++j) {
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(),
dqcoeff.TopLeftPixel(), dequant_ptr_, &eob,
scan_order->scan, scan_order->iscan);
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
round_ptr_, quant_ptr_, quant_shift_ptr_,
qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan);
}
vpx_usec_timer_mark(&timer);
const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
@@ -467,54 +345,50 @@ INSTANTIATE_TEST_CASE_P(
SSE2, VP9QuantizeTest,
::testing::Values(
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
VPX_BITS_8, 16, false),
VPX_BITS_8, 16),
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
VPX_BITS_10, 16, false),
VPX_BITS_10, 16),
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
VPX_BITS_12, 16, false),
VPX_BITS_12, 16),
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false),
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32),
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false),
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32),
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false)));
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32)));
#else
INSTANTIATE_TEST_CASE_P(
SSE2, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c,
VPX_BITS_8, 16, false),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
16, true)));
INSTANTIATE_TEST_CASE_P(SSE2, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_sse2,
&vpx_quantize_b_c,
VPX_BITS_8, 16)));
#endif // CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
DISABLED_SSE2, VP9QuantizeTest,
::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
16)));
#endif // HAVE_SSE2
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(
SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
VPX_BITS_8, 16, false),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
16, true),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
&QuantFPWrapper<quantize_fp_32x32_nz_c>,
VPX_BITS_8, 32, true)));
#else
INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_ssse3,
&vpx_quantize_b_c,
VPX_BITS_8, 16, false)));
#endif
VPX_BITS_8, 16)));
#if ARCH_X86_64
// TODO(johannkoenig): SSSE3 optimizations do not yet pass this test.
INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(
&vpx_quantize_b_32x32_ssse3,
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false)));
INSTANTIATE_TEST_CASE_P(
DISABLED_SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_32x32_ssse3,
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
16),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
VPX_BITS_8, 32)));
#endif // ARCH_X86_64
#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
@@ -524,54 +398,36 @@ INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, VP9QuantizeTest,
INSTANTIATE_TEST_CASE_P(
AVX, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c,
VPX_BITS_8, 16, false),
VPX_BITS_8, 16),
// Even though SSSE3 and AVX do not match the reference
// code, we can keep them in sync with each other.
make_tuple(&vpx_quantize_b_32x32_avx,
&vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32,
false)));
&vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32)));
#endif // HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH
#if ARCH_X86_64 && HAVE_AVX2
INSTANTIATE_TEST_CASE_P(
AVX2, VP9QuantizeTest,
::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
16, true)));
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
// TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
NEON, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c,
VPX_BITS_8, 16, false),
make_tuple(&vpx_quantize_b_32x32_neon,
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
false),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
16, true),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
VPX_BITS_8, 32, true)));
::testing::Values(
make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c, VPX_BITS_8, 16),
make_tuple(&vpx_quantize_b_32x32_neon, &vpx_quantize_b_32x32_c,
VPX_BITS_8, 32),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32)));
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
// Only useful to compare "Speed" test results.
INSTANTIATE_TEST_CASE_P(
DISABLED_C, VP9QuantizeTest,
::testing::Values(
make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16, false),
make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16),
make_tuple(&vpx_quantize_b_32x32_c, &vpx_quantize_b_32x32_c, VPX_BITS_8,
32, false),
32),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
make_tuple(&QuantFPWrapper<quantize_fp_nz_c>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
make_tuple(&QuantFPWrapper<quantize_fp_32x32_nz_c>,
&QuantFPWrapper<quantize_fp_32x32_nz_c>, VPX_BITS_8, 32,
true),
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
true)));
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32)));
} // namespace

View File

@@ -47,7 +47,7 @@ class ScaleTest : public VpxScaleBase,
scale_fn_(&img_, &dst_img_, filter_type, phase_scaler));
}
void RunTest(INTERP_FILTER filter_type) {
void RunTest() {
static const int kNumSizesToTest = 20;
static const int kNumScaleFactorsToTest = 4;
static const int kSizesToTest[] = {
@@ -55,48 +55,50 @@ class ScaleTest : public VpxScaleBase,
22, 24, 26, 28, 30, 32, 34, 68, 128, 134
};
static const int kScaleFactors[] = { 1, 2, 3, 4 };
for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) {
for (int h = 0; h < kNumSizesToTest; ++h) {
const int src_height = kSizesToTest[h];
for (int w = 0; w < kNumSizesToTest; ++w) {
const int src_width = kSizesToTest[w];
for (int sf_up_idx = 0; sf_up_idx < kNumScaleFactorsToTest;
++sf_up_idx) {
const int sf_up = kScaleFactors[sf_up_idx];
for (int sf_down_idx = 0; sf_down_idx < kNumScaleFactorsToTest;
++sf_down_idx) {
const int sf_down = kScaleFactors[sf_down_idx];
const int dst_width = src_width * sf_up / sf_down;
const int dst_height = src_height * sf_up / sf_down;
if (sf_up == sf_down && sf_up != 1) {
continue;
for (INTERP_FILTER filter_type = 0; filter_type < 4; ++filter_type) {
for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) {
for (int h = 0; h < kNumSizesToTest; ++h) {
const int src_height = kSizesToTest[h];
for (int w = 0; w < kNumSizesToTest; ++w) {
const int src_width = kSizesToTest[w];
for (int sf_up_idx = 0; sf_up_idx < kNumScaleFactorsToTest;
++sf_up_idx) {
const int sf_up = kScaleFactors[sf_up_idx];
for (int sf_down_idx = 0; sf_down_idx < kNumScaleFactorsToTest;
++sf_down_idx) {
const int sf_down = kScaleFactors[sf_down_idx];
const int dst_width = src_width * sf_up / sf_down;
const int dst_height = src_height * sf_up / sf_down;
if (sf_up == sf_down && sf_up != 1) {
continue;
}
// I420 frame width and height must be even.
if (!dst_width || !dst_height || dst_width & 1 ||
dst_height & 1) {
continue;
}
// vpx_convolve8_c() has restriction on the step which cannot
// exceed 64 (ratio 1 to 4).
if (src_width > 4 * dst_width || src_height > 4 * dst_height) {
continue;
}
ASSERT_NO_FATAL_FAILURE(ResetScaleImages(
src_width, src_height, dst_width, dst_height));
ReferenceScaleFrame(filter_type, phase_scaler);
ScaleFrame(filter_type, phase_scaler);
if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc,
ref_img_.frame_size)) {
printf(
"filter_type = %d, phase_scaler = %d, src_width = %4d, "
"src_height = %4d, dst_width = %4d, dst_height = %4d, "
"scale factor = %d:%d\n",
filter_type, phase_scaler, src_width, src_height,
dst_width, dst_height, sf_down, sf_up);
PrintDiff();
}
CompareImages(dst_img_);
DeallocScaleImages();
}
// I420 frame width and height must be even.
if (!dst_width || !dst_height || dst_width & 1 ||
dst_height & 1) {
continue;
}
// vpx_convolve8_c() has restriction on the step which cannot
// exceed 64 (ratio 1 to 4).
if (src_width > 4 * dst_width || src_height > 4 * dst_height) {
continue;
}
ASSERT_NO_FATAL_FAILURE(ResetScaleImages(src_width, src_height,
dst_width, dst_height));
ReferenceScaleFrame(filter_type, phase_scaler);
ScaleFrame(filter_type, phase_scaler);
if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc,
ref_img_.frame_size)) {
printf(
"filter_type = %d, phase_scaler = %d, src_width = %4d, "
"src_height = %4d, dst_width = %4d, dst_height = %4d, "
"scale factor = %d:%d\n",
filter_type, phase_scaler, src_width, src_height, dst_width,
dst_height, sf_down, sf_up);
PrintDiff();
}
CompareImages(dst_img_);
DeallocScaleImages();
}
}
}
@@ -143,10 +145,7 @@ class ScaleTest : public VpxScaleBase,
ScaleFrameFunc scale_fn_;
};
TEST_P(ScaleTest, ScaleFrame_EightTap) { RunTest(EIGHTTAP); }
TEST_P(ScaleTest, ScaleFrame_EightTapSmooth) { RunTest(EIGHTTAP_SMOOTH); }
TEST_P(ScaleTest, ScaleFrame_EightTapSharp) { RunTest(EIGHTTAP_SHARP); }
TEST_P(ScaleTest, ScaleFrame_Bilinear) { RunTest(BILINEAR); }
TEST_P(ScaleTest, ScaleFrame) { ASSERT_NO_FATAL_FAILURE(RunTest()); }
TEST_P(ScaleTest, DISABLED_Speed) {
static const int kCountSpeedTestBlock = 100;

View File

@@ -147,6 +147,7 @@ TEST(VPxWorkerThreadTest, TestInterfaceAPI) {
// -----------------------------------------------------------------------------
// Multi-threaded decode tests
#if CONFIG_WEBM_IO
struct FileList {
const char *name;

72
tools/all_builds.py Executable file
View File

@@ -0,0 +1,72 @@
#!/usr/bin/python
import getopt
import subprocess
import sys
LONG_OPTIONS = ["shard=", "shards="]
BASE_COMMAND = "./configure --enable-internal-stats --enable-experimental"
def RunCommand(command):
run = subprocess.Popen(command, shell=True)
output = run.communicate()
if run.returncode:
print "Non-zero return code: " + str(run.returncode) + " => exiting!"
sys.exit(1)
def list_of_experiments():
experiments = []
configure_file = open("configure")
list_start = False
for line in configure_file.read().split("\n"):
if line == 'EXPERIMENT_LIST="':
list_start = True
elif line == '"':
list_start = False
elif list_start:
currently_broken = ["csm"]
experiment = line[4:]
if experiment not in currently_broken:
experiments.append(experiment)
return experiments
def main(argv):
# Parse arguments
options = {"--shard": 0, "--shards": 1}
if "--" in argv:
opt_end_index = argv.index("--")
else:
opt_end_index = len(argv)
try:
o, _ = getopt.getopt(argv[1:opt_end_index], None, LONG_OPTIONS)
except getopt.GetoptError, err:
print str(err)
print "Usage: %s [--shard=<n> --shards=<n>] -- [configure flag ...]"%argv[0]
sys.exit(2)
options.update(o)
extra_args = argv[opt_end_index + 1:]
# Shard experiment list
shard = int(options["--shard"])
shards = int(options["--shards"])
experiments = list_of_experiments()
base_command = " ".join([BASE_COMMAND] + extra_args)
configs = [base_command]
configs += ["%s --enable-%s" % (base_command, e) for e in experiments]
my_configs = zip(configs, range(len(configs)))
my_configs = filter(lambda x: x[1] % shards == shard, my_configs)
my_configs = [e[0] for e in my_configs]
# Run configs for this shard
for config in my_configs:
test_build(config)
def test_build(configure_command):
print "\033[34m\033[47mTesting %s\033[0m" % (configure_command)
RunCommand(configure_command)
RunCommand("make clean")
RunCommand("make")
if __name__ == "__main__":
main(sys.argv)

15
tools/author_first_release.sh Executable file
View File

@@ -0,0 +1,15 @@
#!/bin/bash
##
## List the release each author first contributed to.
##
## Usage: author_first_release.sh [TAGS]
##
## If the TAGS arguments are unspecified, all tags reported by `git tag`
## will be considered.
##
tags=${@:-$(git tag)}
for tag in $tags; do
git shortlog -n -e -s $tag |
cut -f2- |
awk "{print \"${tag#v}\t\"\$0}"
done | sort -k2 | uniq -f2

158
tools/ftfy.sh Executable file
View File

@@ -0,0 +1,158 @@
#!/bin/sh
self="$0"
dirname_self=$(dirname "$self")
usage() {
cat <<EOF >&2
Usage: $self [option]
This script applies a whitespace transformation to the commit at HEAD. If no
options are given, then the modified files are left in the working tree.
Options:
-h, --help Shows this message
-n, --dry-run Shows a diff of the changes to be made.
--amend Squashes the changes into the commit at HEAD
This option will also reformat the commit message.
--commit Creates a new commit containing only the whitespace changes
--msg-only Reformat the commit message only, ignore the patch itself.
EOF
rm -f ${CLEAN_FILES}
exit 1
}
log() {
echo "${self##*/}: $@" >&2
}
vpx_style() {
for f; do
case "$f" in
*.h|*.c|*.cc)
clang-format -i --style=file "$f"
;;
esac
done
}
apply() {
[ $INTERSECT_RESULT -ne 0 ] && patch -p1 < "$1"
}
commit() {
LAST_CHANGEID=$(git show | awk '/Change-Id:/{print $2}')
if [ -z "$LAST_CHANGEID" ]; then
log "HEAD doesn't have a Change-Id, unable to generate a new commit"
exit 1
fi
# Build a deterministic Change-Id from the parent's
NEW_CHANGEID=${LAST_CHANGEID}-styled
NEW_CHANGEID=I$(echo $NEW_CHANGEID | git hash-object --stdin)
# Commit, preserving authorship from the parent commit.
git commit -a -C HEAD > /dev/null
git commit --amend -F- << EOF
Cosmetic: Fix whitespace in change ${LAST_CHANGEID:0:9}
Change-Id: ${NEW_CHANGEID}
EOF
}
show_commit_msg_diff() {
if [ $DIFF_MSG_RESULT -ne 0 ]; then
log "Modified commit message:"
diff -u "$ORIG_COMMIT_MSG" "$NEW_COMMIT_MSG" | tail -n +3
fi
}
amend() {
show_commit_msg_diff
if [ $DIFF_MSG_RESULT -ne 0 ] || [ $INTERSECT_RESULT -ne 0 ]; then
git commit -a --amend -F "$NEW_COMMIT_MSG"
fi
}
diff_msg() {
git log -1 --format=%B > "$ORIG_COMMIT_MSG"
"${dirname_self}"/wrap-commit-msg.py \
< "$ORIG_COMMIT_MSG" > "$NEW_COMMIT_MSG"
cmp -s "$ORIG_COMMIT_MSG" "$NEW_COMMIT_MSG"
DIFF_MSG_RESULT=$?
}
# Temporary files
ORIG_DIFF=orig.diff.$$
MODIFIED_DIFF=modified.diff.$$
FINAL_DIFF=final.diff.$$
ORIG_COMMIT_MSG=orig.commit-msg.$$
NEW_COMMIT_MSG=new.commit-msg.$$
CLEAN_FILES="${ORIG_DIFF} ${MODIFIED_DIFF} ${FINAL_DIFF}"
CLEAN_FILES="${CLEAN_FILES} ${ORIG_COMMIT_MSG} ${NEW_COMMIT_MSG}"
# Preconditions
[ $# -lt 2 ] || usage
if ! clang-format -version >/dev/null 2>&1; then
log "clang-format not found"
exit 1
fi
if ! git diff --quiet HEAD; then
log "Working tree is dirty, commit your changes first"
exit 1
fi
# Need to be in the root
cd "$(git rev-parse --show-toplevel)"
# Collect the original diff
git show > "${ORIG_DIFF}"
# Apply the style guide on new and modified files and collect its diff
for f in $(git diff HEAD^ --name-only -M90 --diff-filter=AM); do
case "$f" in
third_party/*) continue;;
esac
vpx_style "$f"
done
git diff --no-color --no-ext-diff > "${MODIFIED_DIFF}"
# Intersect the two diffs
"${dirname_self}"/intersect-diffs.py \
"${ORIG_DIFF}" "${MODIFIED_DIFF}" > "${FINAL_DIFF}"
INTERSECT_RESULT=$?
git reset --hard >/dev/null
# Fixup the commit message
diff_msg
# Handle options
if [ -n "$1" ]; then
case "$1" in
-h|--help) usage;;
-n|--dry-run) cat "${FINAL_DIFF}"; show_commit_msg_diff;;
--commit) apply "${FINAL_DIFF}"; commit;;
--amend) apply "${FINAL_DIFF}"; amend;;
--msg-only) amend;;
*) usage;;
esac
else
apply "${FINAL_DIFF}"
if ! git diff --quiet; then
log "Formatting changes applied, verify and commit."
log "See also: http://www.webmproject.org/code/contribute/conventions/"
git diff --stat
fi
fi
rm -f ${CLEAN_FILES}

View File

@@ -37,9 +37,7 @@ extern "C" {
#define SEGMENT_DELTADATA 0
#define SEGMENT_ABSDATA 1
typedef struct {
int r, c;
} POS;
typedef struct { int r, c; } POS;
#define PLANE_TYPE_Y_NO_DC 0
#define PLANE_TYPE_Y2 1
@@ -182,9 +180,6 @@ typedef struct {
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
// The video frame counter value for the key frame, for lowest resolution.
unsigned int key_frame_counter_value;
// Flags to signal skipped encoding of previous and base layer stream.
unsigned int skip_encoding_prev_stream;
unsigned int skip_encoding_base_stream;
LOWER_RES_MB_INFO *mb_info;
} LOWER_RES_FRAME_INFO;
#endif

View File

@@ -6,7 +6,7 @@
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
*/
#ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_
#define VP8_COMMON_DEFAULT_COEF_PROBS_H_

View File

@@ -20,7 +20,8 @@ static void copy_and_extend_plane(unsigned char *s, /* source */
int et, /* extend top border */
int el, /* extend left border */
int eb, /* extend bottom border */
int er) { /* extend right border */
int er /* extend right border */
) {
int i;
unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;

View File

@@ -934,8 +934,8 @@ void vp8_loop_filter_uvvertical_edge_mips(unsigned char *s, int p,
s4 = s3 + p;
/* load quad-byte vectors
* memory is 4 byte aligned
*/
* memory is 4 byte aligned
*/
p2 = *((uint32_t *)(s1 - 4));
p6 = *((uint32_t *)(s1));
p1 = *((uint32_t *)(s2 - 4));
@@ -990,8 +990,8 @@ void vp8_loop_filter_uvvertical_edge_mips(unsigned char *s, int p,
:);
/* if (p1 - p4 == 0) and (p2 - p3 == 0)
* mask will be zero and filtering is not needed
*/
* mask will be zero and filtering is not needed
*/
if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6,
thresh, &hev, &mask);
@@ -2102,8 +2102,8 @@ void vp8_mbloop_filter_uvvertical_edge_mips(unsigned char *s, int p,
s4 = s3 + p;
/* load quad-byte vectors
* memory is 4 byte aligned
*/
* memory is 4 byte aligned
*/
p2 = *((uint32_t *)(s1 - 4));
p6 = *((uint32_t *)(s1));
p1 = *((uint32_t *)(s2 - 4));

View File

@@ -12,7 +12,7 @@
#include "vpx_mem/vpx_mem.h"
void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst,
int stride, char *eobs) {
int stride, int8_t *eobs) {
int i, j;
for (i = 0; i < 4; i++) {
@@ -33,7 +33,8 @@ void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst,
}
void vp8_dequant_idct_add_uv_block_mmi(int16_t *q, int16_t *dq, uint8_t *dstu,
uint8_t *dstv, int stride, char *eobs) {
uint8_t *dstv, int stride,
int8_t *eobs) {
int i, j;
for (i = 0; i < 2; i++) {

View File

@@ -461,87 +461,96 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
);
}
/* clang-format off */
#define VP8_MBLOOP_HPSRAB \
"punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" \
"punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" \
"psrah %[ftmp10], %[ftmp10], %[ftmp9] \n\t" \
"psrah %[ftmp11], %[ftmp11], %[ftmp9] \n\t" \
"packsshb %[ftmp0], %[ftmp10], %[ftmp11] \n\t"
"xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" \
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" \
"psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t" \
"psrah %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
"packsshb %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
#define VP8_MBLOOP_HPSRAB_ADD(reg) \
"punpcklbh %[ftmp1], %[ftmp0], %[ftmp12] \n\t" \
"punpckhbh %[ftmp2], %[ftmp0], %[ftmp12] \n\t" \
"pmulhh %[ftmp1], %[ftmp1], " #reg " \n\t" \
"pmulhh %[ftmp2], %[ftmp2], " #reg " \n\t" \
"paddh %[ftmp1], %[ftmp1], %[ff_ph_003f] \n\t" \
"paddh %[ftmp2], %[ftmp2], %[ff_ph_003f] \n\t" \
"psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t" \
"psrah %[ftmp2], %[ftmp2], %[ftmp9] \n\t" \
"packsshb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
/* clang-format on */
#define VP8_MBLOOP_HPSRAB_PMULHH(reg1, reg2) \
"pmulhh " #reg1 ", " #reg1 ", " #reg2 " \n\t"
#define VP8_MBLOOP_HPSRAB_ADD(reg) \
"xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" \
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \
VP8_MBLOOP_HPSRAB_PMULHH(%[ftmp3], reg) \
VP8_MBLOOP_HPSRAB_PMULHH(%[ftmp8], reg) \
"paddh %[ftmp3], %[ftmp3], %[ff_ph_003f] \n\t" \
"paddh %[ftmp8], %[ftmp8], %[ff_ph_003f] \n\t" \
"psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t" \
"psrah %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
"packsshb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
void vp8_mbloop_filter_horizontal_edge_mmi(
unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit,
const unsigned char *limit, const unsigned char *thresh, int count) {
uint32_t tmp[1];
double ftmp[13];
mips_reg addr[2];
DECLARE_ALIGNED(8, const uint64_t, srct[1]);
double ftmp[10];
__asm__ volatile (
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
"1: \n\t"
"gsldlc1 %[ftmp9], 0x07(%[limit]) \n\t"
"gsldrc1 %[ftmp9], 0x00(%[limit]) \n\t"
/* ftmp1: p3 */
"gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
/* ftmp3: p2 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp3], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp3], 0x00(%[src_ptr]) \n\t"
/* ftmp4: p1 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp4], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp4], 0x00(%[src_ptr]) \n\t"
/* ftmp5: p0 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
/* ftmp6: q0 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
/* ftmp7: q1 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
/* ftmp8: q2 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
/* ftmp2: q3 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp2], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp2], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[ftmp12], 0x07(%[blimit]) \n\t"
"gsldrc1 %[ftmp12], 0x00(%[blimit]) \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step])
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[addr1], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp1], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[addr1]) \n\t"
MMI_SUBU(%[addr1], %[addr0], %[tmp0])
"gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp0], %[ftmp1], %[ftmp3] \n\t"
"psubusb %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
/* ftmp4:p1 */
MMI_SLL(%[tmp0], %[src_pixel_step], 0x01)
MMI_SUBU(%[addr1], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp1], %[ftmp3], %[ftmp4] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
"pasubub %[ftmp10], %[ftmp4], %[ftmp5] \n\t"
"psubusb %[ftmp1], %[ftmp10], %[ftmp9] \n\t"
/* ftmp5:p0 */
MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp5], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp5], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp1], %[ftmp4], %[ftmp5] \n\t"
"sdc1 %[ftmp1], 0x00(%[srct]) \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
"pasubub %[ftmp11], %[ftmp7], %[ftmp6] \n\t"
"psubusb %[ftmp1], %[ftmp11], %[ftmp9] \n\t"
/* ftmp6:q0 */
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
/* ftmp7:q1 */
"gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t"
"pasubub %[ftmp1], %[ftmp7], %[ftmp6] \n\t"
"sdc1 %[ftmp1], 0x08(%[srct]) \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
MMI_ADDU(%[addr1], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp8], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp1], %[ftmp8], %[ftmp7] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
MMI_ADDU(%[addr1], %[addr0], %[tmp0])
"gsldlc1 %[ftmp2], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp2], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp1], %[ftmp2], %[ftmp8] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
@@ -554,7 +563,9 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"mtc1 %[tmp0], %[ftmp9] \n\t"
"psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp12] \n\t"
"gsldlc1 %[ftmp9], 0x07(%[blimit]) \n\t"
"gsldrc1 %[ftmp9], 0x00(%[blimit]) \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
"xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
/* ftmp0: mask */
@@ -562,26 +573,29 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"gsldlc1 %[ftmp9], 0x07(%[thresh]) \n\t"
"gsldrc1 %[ftmp9], 0x00(%[thresh]) \n\t"
"psubusb %[ftmp1], %[ftmp10], %[ftmp9] \n\t"
"psubusb %[ftmp2], %[ftmp11], %[ftmp9] \n\t"
"ldc1 %[ftmp1], 0x00(%[srct]) \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"ldc1 %[ftmp2], 0x08(%[srct]) \n\t"
"psubusb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
"pcmpeqb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"pcmpeqb %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
/* ftmp1: hev */
/* ftmp1:hev*/
"xor %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
"xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t"
"xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t"
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
"psubsb %[ftmp2], %[ftmp4], %[ftmp7] \n\t"
"psubsb %[ftmp9], %[ftmp6], %[ftmp5] \n\t"
"paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"and %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
"pandn %[ftmp12], %[ftmp1], %[ftmp2] \n\t"
"sdc1 %[ftmp2], 0x00(%[srct]) \n\t"
"and %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
"li %[tmp0], 0x0b \n\t"
@@ -592,71 +606,75 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"paddsb %[ftmp0], %[ftmp2], %[ff_pb_04] \n\t"
VP8_MBLOOP_HPSRAB
"psubsb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"ldc1 %[ftmp2], 0x00(%[srct]) \n\t"
"pandn %[ftmp2], %[ftmp1], %[ftmp2] \n\t"
"li %[tmp0], 0x07 \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t"
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1b00])
"psubsb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"paddsb %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
"psubsb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"paddsb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
"xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t"
"xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
"gssdlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp5], 0x07(%[addr1]) \n\t"
"gssdrc1 %[ftmp5], 0x00(%[addr1]) \n\t"
"gssdlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1200])
"paddsb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
"psubsb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
"paddsb %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
"psubsb %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp4], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp4], 0x00(%[src_ptr]) \n\t"
"gssdlc1 %[ftmp7], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp7], 0x00(%[addr0]) \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x01)
MMI_SUBU(%[addr1], %[src_ptr], %[tmp0])
"gssdlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
"gssdrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_0900])
"xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
"xor %[ftmp8], %[ftmp8], %[ff_pb_80] \n\t"
"paddsb %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
"psubsb %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
"xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
"xor %[ftmp8], %[ftmp8], %[ff_pb_80] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0])
"gssdlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp3], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp3], 0x00(%[src_ptr]) \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[addr1], %[addr0], %[tmp0])
"gsldlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step])
"gsldlc1 %[ftmp7], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[addr1]) \n\t"
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
"paddsb %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
"psubsb %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp7], 0x07(%[addr1]) \n\t"
"gssdrc1 %[ftmp7], 0x00(%[addr1]) \n\t"
MMI_SUBU(%[addr1], %[addr0], %[tmp0])
"gssdlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
"gssdrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
MMI_ADDIU(%[src_ptr], %[src_ptr], 0x08)
"addiu %[count], %[count], -0x01 \n\t"
MMI_ADDIU(%[src_ptr], %[src_ptr], 0x08)
"bnez %[count], 1b \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]),
[src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
: [limit]"r"(limit), [blimit]"r"(blimit),
[thresh]"r"(thresh),
[tmp0]"=&r"(tmp[0]),
[addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
[src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
: [limit]"r"(limit), [blimit]"r"(blimit),
[srct]"r"(srct), [thresh]"r"(thresh),
[src_pixel_step]"r"((mips_reg)src_pixel_step),
[ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80),
[ff_pb_04]"f"(ff_pb_04), [ff_pb_03]"f"(ff_pb_03),
[ff_ph_0900]"f"(ff_ph_0900), [ff_ph_1b00]"f"(ff_ph_1b00),
[ff_ph_1200]"f"(ff_ph_1200), [ff_ph_003f]"f"(ff_ph_003f)
[ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80),
[ff_pb_04]"f"(ff_pb_04), [ff_pb_03]"f"(ff_pb_03),
[ff_ph_0900]"f"(ff_ph_0900), [ff_ph_1b00]"f"(ff_ph_1b00),
[ff_ph_1200]"f"(ff_ph_1200), [ff_ph_003f]"f"(ff_ph_003f)
: "memory"
);
}
@@ -678,60 +696,64 @@ void vp8_mbloop_filter_vertical_edge_mmi(
unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit,
const unsigned char *limit, const unsigned char *thresh, int count) {
mips_reg tmp[1];
mips_reg addr[2];
DECLARE_ALIGNED(8, const uint64_t, srct[1]);
double ftmp[14];
double ftmp[13];
__asm__ volatile (
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0])
MMI_SUBU(%[src_ptr], %[src_ptr], 0x04)
"1: \n\t"
"gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
MMI_SLL (%[tmp0], %[src_pixel_step], 0x01)
MMI_ADDU(%[addr0], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
"punpcklbh %[ftmp1], %[ftmp11], %[ftmp12] \n\t"
"punpckhbh %[ftmp2], %[ftmp11], %[ftmp12] \n\t"
"punpcklbh %[ftmp11], %[ftmp5], %[ftmp6] \n\t"
"punpckhbh %[ftmp12], %[ftmp5], %[ftmp6] \n\t"
"punpcklbh %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
"punpckhbh %[ftmp10], %[ftmp7], %[ftmp8] \n\t"
"gsldlc1 %[ftmp11], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp11], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
"punpcklbh %[ftmp3], %[ftmp11], %[ftmp12] \n\t"
"punpckhbh %[ftmp4], %[ftmp11], %[ftmp12] \n\t"
"punpcklhw %[ftmp1], %[ftmp12], %[ftmp10] \n\t"
"punpckhhw %[ftmp2], %[ftmp12], %[ftmp10] \n\t"
"punpcklhw %[ftmp3], %[ftmp11], %[ftmp9] \n\t"
"punpckhhw %[ftmp4], %[ftmp11], %[ftmp9] \n\t"
"punpcklhw %[ftmp5], %[ftmp4], %[ftmp2] \n\t"
"punpckhhw %[ftmp6], %[ftmp4], %[ftmp2] \n\t"
"punpcklhw %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
"punpckhhw %[ftmp8], %[ftmp3], %[ftmp1] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x01)
MMI_SUBU(%[addr0], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
MMI_SUBU(%[addr0], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
"punpcklbh %[ftmp9], %[ftmp11], %[ftmp12] \n\t"
"punpckhbh %[ftmp10], %[ftmp11], %[ftmp12] \n\t"
"punpcklbh %[ftmp11], %[ftmp5], %[ftmp6] \n\t"
"punpckhbh %[ftmp12], %[ftmp5], %[ftmp6] \n\t"
"punpcklbh %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
"punpckhbh %[ftmp10], %[ftmp7], %[ftmp8] \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[addr0], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
"punpcklbh %[ftmp0], %[ftmp11], %[ftmp12] \n\t"
"punpckhbh %[ftmp11], %[ftmp11], %[ftmp12] \n\t"
"punpcklhw %[ftmp5], %[ftmp12], %[ftmp10] \n\t"
"punpckhhw %[ftmp6], %[ftmp12], %[ftmp10] \n\t"
"punpcklhw %[ftmp7], %[ftmp11], %[ftmp9] \n\t"
"punpckhhw %[ftmp8], %[ftmp11], %[ftmp9] \n\t"
"punpcklhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t"
"punpckhhw %[ftmp2], %[ftmp11], %[ftmp10] \n\t"
"punpcklhw %[ftmp3], %[ftmp0], %[ftmp9] \n\t"
"punpckhhw %[ftmp4], %[ftmp0], %[ftmp9] \n\t"
"gsldlc1 %[ftmp13], 0x07(%[limit]) \n\t"
"gsldrc1 %[ftmp13], 0x00(%[limit]) \n\t"
/* ftmp9:q0 ftmp10:q1 */
"punpcklwd %[ftmp9], %[ftmp1], %[ftmp5] \n\t"
"punpckhwd %[ftmp10], %[ftmp1], %[ftmp5] \n\t"
@@ -749,61 +771,60 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"punpcklwd %[ftmp5], %[ftmp4], %[ftmp8] \n\t"
"punpckhwd %[ftmp6], %[ftmp4], %[ftmp8] \n\t"
"gsldlc1 %[ftmp8], 0x07(%[limit]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[limit]) \n\t"
/* abs (q3-q2) */
"pasubub %[ftmp7], %[ftmp12], %[ftmp11] \n\t"
"psubusb %[ftmp0], %[ftmp7], %[ftmp13] \n\t"
"psubusb %[ftmp0], %[ftmp7], %[ftmp8] \n\t"
/* abs (q2-q1) */
"pasubub %[ftmp7], %[ftmp11], %[ftmp10] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* ftmp3: abs(q1-q0) */
"pasubub %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
"psubusb %[ftmp7], %[ftmp3], %[ftmp13] \n\t"
"psubusb %[ftmp7], %[ftmp3], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* ftmp4: abs(p1-p0) */
"pasubub %[ftmp4], %[ftmp5], %[ftmp6] \n\t"
"psubusb %[ftmp7], %[ftmp4], %[ftmp13] \n\t"
"psubusb %[ftmp7], %[ftmp4], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* abs (p2-p1) */
"pasubub %[ftmp7], %[ftmp2], %[ftmp5] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* abs (p3-p2) */
"pasubub %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
"gsldlc1 %[ftmp13], 0x07(%[blimit]) \n\t"
"gsldrc1 %[ftmp13], 0x00(%[blimit]) \n\t"
"gsldlc1 %[ftmp7], 0x07(%[thresh]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[thresh]) \n\t"
/* abs (p0-q0) * 2 */
/* abs (p0-q0) */
"pasubub %[ftmp1], %[ftmp9], %[ftmp6] \n\t"
"paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
/* abs (p1-q1) / 2 */
/* abs (p1-q1) */
"pasubub %[ftmp12], %[ftmp10], %[ftmp5] \n\t"
"and %[ftmp12], %[ftmp12], %[ff_pb_fe] \n\t"
"li %[tmp0], 0x01 \n\t"
"mtc1 %[tmp0], %[ftmp8] \n\t"
"psrlh %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
"paddusb %[ftmp12], %[ftmp1], %[ftmp12] \n\t"
"psubusb %[ftmp12], %[ftmp12], %[ftmp13] \n\t"
"gsldlc1 %[ftmp8], 0x07(%[blimit]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[blimit]) \n\t"
"psubusb %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp12] \n\t"
"xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t"
/* ftmp0: mask */
"pcmpeqb %[ftmp0], %[ftmp0], %[ftmp12] \n\t"
/* abs(p1-p0) - thresh */
"psubusb %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
/* abs(q1-q0) - thresh */
"psubusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
"gsldlc1 %[ftmp8], 0x07(%[thresh]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[thresh]) \n\t"
/* ftmp3: abs(q1-q0) ftmp4: abs(p1-p0) */
"psubusb %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
"psubusb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
"or %[ftmp3], %[ftmp4], %[ftmp3] \n\t"
"pcmpeqb %[ftmp3], %[ftmp3], %[ftmp12] \n\t"
"pcmpeqb %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
/* ftmp1: hev */
"xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t"
/* ftmp2:ps2, ftmp5:ps1, ftmp6:ps0, ftmp9:qs0, ftmp10:qs1, ftmp11:qs2 */
"xor %[ftmp11], %[ftmp11], %[ff_pb_80] \n\t"
"xor %[ftmp10], %[ftmp10], %[ff_pb_80] \n\t"
"xor %[ftmp9], %[ftmp9], %[ff_pb_80] \n\t"
@@ -816,30 +837,30 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
"paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
"paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
/* filter_value &= mask */
"and %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
/* Filter2 = filter_value & hev */
"and %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
/* filter_value &= ~hev */
"pandn %[ftmp0], %[ftmp1], %[ftmp0] \n\t"
"paddsb %[ftmp4], %[ftmp3], %[ff_pb_04] \n\t"
"li %[tmp0], 0x0b \n\t"
"mtc1 %[tmp0], %[ftmp12] \n\t"
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"punpcklbh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
"psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
"psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
"packsshb %[ftmp4], %[ftmp7], %[ftmp8] \n\t"
/* ftmp9: qs0 */
"psubsb %[ftmp9], %[ftmp9], %[ftmp4] \n\t"
"paddsb %[ftmp3], %[ftmp3], %[ff_pb_03] \n\t"
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"punpcklbh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp3] \n\t"
"psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
"psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
"packsshb %[ftmp3], %[ftmp7], %[ftmp8] \n\t"
/* ftmp6: ps0 */
"paddsb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"li %[tmp0], 0x07 \n\t"
@@ -851,10 +872,8 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"pmulhh %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
VP8_MBLOOP_VPSRAB_ADDT
"psubsb %[ftmp4], %[ftmp9], %[ftmp3] \n\t"
/* ftmp9: oq0 */
"xor %[ftmp9], %[ftmp4], %[ff_pb_80] \n\t"
"paddsb %[ftmp4], %[ftmp6], %[ftmp3] \n\t"
/* ftmp6: op0 */
"xor %[ftmp6], %[ftmp4], %[ff_pb_80] \n\t"
VP8_MBLOOP_VPSRAB_ADDH
@@ -863,10 +882,8 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"pmulhh %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
VP8_MBLOOP_VPSRAB_ADDT
"psubsb %[ftmp4], %[ftmp10], %[ftmp3] \n\t"
/* ftmp10: oq1 */
"xor %[ftmp10], %[ftmp4], %[ff_pb_80] \n\t"
"paddsb %[ftmp4], %[ftmp5], %[ftmp3] \n\t"
/* ftmp5: op1 */
"xor %[ftmp5], %[ftmp4], %[ff_pb_80] \n\t"
VP8_MBLOOP_VPSRAB_ADDH
@@ -874,10 +891,8 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"pmulhh %[ftmp8], %[ftmp8], %[ff_ph_0900] \n\t"
VP8_MBLOOP_VPSRAB_ADDT
"psubsb %[ftmp4], %[ftmp11], %[ftmp3] \n\t"
/* ftmp11: oq2 */
"xor %[ftmp11], %[ftmp4], %[ff_pb_80] \n\t"
"paddsb %[ftmp4], %[ftmp2], %[ftmp3] \n\t"
/* ftmp2: op2 */
"xor %[ftmp2], %[ftmp4], %[ff_pb_80] \n\t"
"ldc1 %[ftmp12], 0x00(%[srct]) \n\t"
@@ -901,40 +916,41 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"punpcklhw %[ftmp10], %[ftmp1], %[ftmp3] \n\t"
"punpckhhw %[ftmp11], %[ftmp1], %[ftmp3] \n\t"
"punpcklwd %[ftmp0], %[ftmp7], %[ftmp11] \n\t"
"punpckhwd %[ftmp1], %[ftmp7], %[ftmp11] \n\t"
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
"punpcklwd %[ftmp0], %[ftmp6], %[ftmp10] \n\t"
"punpckhwd %[ftmp1], %[ftmp6], %[ftmp10] \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
"punpcklwd %[ftmp0], %[ftmp7], %[ftmp11] \n\t"
"punpckhwd %[ftmp1], %[ftmp7], %[ftmp11] \n\t"
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
"punpcklwd %[ftmp1], %[ftmp5], %[ftmp9] \n\t"
"punpckhwd %[ftmp0], %[ftmp5], %[ftmp9] \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[addr0], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
MMI_SUBU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
"punpcklwd %[ftmp1], %[ftmp4], %[ftmp8] \n\t"
"punpckhwd %[ftmp0], %[ftmp4], %[ftmp8] \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
MMI_SUBU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
"addiu %[count], %[count], -0x01 \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x03)
@@ -946,9 +962,9 @@ void vp8_mbloop_filter_vertical_edge_mmi(
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
[tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr),
[count]"+&r"(count)
[ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]),
[addr0]"=&r"(addr[0]),
[src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
: [limit]"r"(limit), [blimit]"r"(blimit),
[srct]"r"(srct), [thresh]"r"(thresh),
[src_pixel_step]"r"((mips_reg)src_pixel_step),

View File

@@ -86,7 +86,6 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
register double ftmp8 asm("$f18");
register double ftmp9 asm("$f20");
register double ftmp10 asm("$f22");
register double ftmp11 asm("$f24");
#else
register double fzero asm("$f0");
register double ftmp0 asm("$f1");
@@ -100,7 +99,6 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
register double ftmp8 asm("$f9");
register double ftmp9 asm("$f10");
register double ftmp10 asm("$f11");
register double ftmp11 asm("$f12");
#endif // _MIPS_SIM == _ABIO32
__asm__ volatile (
@@ -114,13 +112,11 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
"li %[tmp0], 0x07 \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t"
"li %[tmp0], 0x08 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t"
"mtc1 %[tmp0], %[ftmp10] \n\t"
"1: \n\t"
"gsldlc1 %[ftmp9], 0x05(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp9], -0x02(%[src_ptr]) \n\t"
"gsldlc1 %[ftmp10], 0x06(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp10], -0x01(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp9], -0x02(%[src_ptr]) \n\t"
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp8], %[ftmp6], %[ftmp0] \n\t"
@@ -129,21 +125,24 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
"pmullh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
"gsldlc1 %[ftmp9], 0x06(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp9], -0x01(%[src_ptr]) \n\t"
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"punpckhbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
"punpckhbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
"dsrl %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
"dsrl %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
@@ -164,9 +163,8 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
[ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6),
[ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8),
[ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10),
[ftmp11]"=&f"(ftmp11), [tmp0]"=&r"(tmp[0]),
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height),
[src_ptr]"+&r"(src_ptr)
[tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr),
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height)
: [src_pixels_per_line]"r"((mips_reg)src_pixels_per_line),
[vp8_filter]"r"(vp8_filter), [output_width]"r"(output_width),
[ff_ph_40]"f"(ff_ph_40)
@@ -192,11 +190,6 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
register double ftmp6 asm("$f14");
register double ftmp7 asm("$f16");
register double ftmp8 asm("$f18");
register double ftmp9 asm("$f20");
register double ftmp10 asm("$f22");
register double ftmp11 asm("$f24");
register double ftmp12 asm("$f26");
register double ftmp13 asm("$f28");
#else
register double fzero asm("$f0");
register double ftmp0 asm("$f1");
@@ -208,11 +201,6 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
register double ftmp6 asm("$f7");
register double ftmp7 asm("$f8");
register double ftmp8 asm("$f9");
register double ftmp9 asm("$f10");
register double ftmp10 asm("$f11");
register double ftmp11 asm("$f12");
register double ftmp12 asm("$f13");
register double ftmp13 asm("$f14");
#endif // _MIPS_SIM == _ABIO32
__asm__ volatile (
@@ -222,56 +210,52 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
"ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t"
"ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t"
"ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[pixels_per_line_x2])
"xor %[fzero], %[fzero], %[fzero] \n\t"
"li %[tmp0], 0x07 \n\t"
"mtc1 %[tmp0], %[ftmp13] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t"
/* In order to make full use of memory load delay slot,
* Operation of memory loading and calculating has been rearranged.
*/
"1: \n\t"
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
"pmullh %[ftmp8], %[ftmp6], %[ftmp0] \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line])
"gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t"
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2])
"gsldlc1 %[ftmp8], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[addr0]) \n\t"
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4])
"gsldlc1 %[ftmp9], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp9], 0x00(%[addr0]) \n\t"
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line])
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2])
"gsldlc1 %[ftmp10], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp10], 0x00(%[addr0]) \n\t"
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4])
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"pmullh %[ftmp12], %[ftmp6], %[ftmp0] \n\t"
"pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp7] \n\t"
"pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
"pmullh %[ftmp9], %[ftmp9], %[ftmp4] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp9] \n\t"
"pmullh %[ftmp10], %[ftmp10], %[ftmp3] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp10] \n\t"
"pmullh %[ftmp11], %[ftmp11], %[ftmp5] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp11] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ff_ph_40] \n\t"
"psrah %[ftmp12], %[ftmp12], %[ftmp13] \n\t"
"packushb %[ftmp12], %[ftmp12], %[fzero] \n\t"
"gsswlc1 %[ftmp12], 0x03(%[output_ptr]) \n\t"
"gsswrc1 %[ftmp12], 0x00(%[output_ptr]) \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ff_ph_40] \n\t"
"psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
"packushb %[ftmp8], %[ftmp8], %[fzero] \n\t"
"gsswlc1 %[ftmp8], 0x03(%[output_ptr]) \n\t"
"gsswrc1 %[ftmp8], 0x00(%[output_ptr]) \n\t"
MMI_ADDIU(%[output_height], %[output_height], -0x01)
MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch])
@@ -281,11 +265,9 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
[ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4),
[ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6),
[ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8),
[ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10),
[ftmp11]"=&f"(ftmp11), [ftmp12]"=&f"(ftmp12),
[ftmp13]"=&f"(ftmp13), [tmp0]"=&r"(tmp[0]),
[addr0]"=&r"(addr[0]), [src_ptr]"+&r"(src_ptr),
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height)
[tmp0]"=&r"(tmp[0]), [addr0]"=&r"(addr[0]),
[src_ptr]"+&r"(src_ptr), [output_ptr]"+&r"(output_ptr),
[output_height]"+&r"(output_height)
: [pixels_per_line]"r"((mips_reg)pixels_per_line),
[pixels_per_line_x2]"r"((mips_reg)(pixels_per_line<<1)),
[pixels_per_line_x4]"r"((mips_reg)(pixels_per_line<<2)),
@@ -319,7 +301,6 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
"1: \n\t"
"gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixels_per_line])
"punpcklbh %[ftmp1], %[ftmp0], %[fzero] \n\t"
"gssdlc1 %[ftmp1], 0x07(%[output_ptr]) \n\t"
@@ -327,6 +308,7 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
"addiu %[output_height], %[output_height], -0x01 \n\t"
MMI_ADDU(%[output_ptr], %[output_ptr], %[output_width])
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixels_per_line])
"bnez %[output_height], 1b \n\t"
: [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0),
[ftmp1]"=&f"(ftmp1), [src_ptr]"+&r"(src_ptr),
@@ -356,12 +338,12 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
"1: \n\t"
"gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line])
MMI_ADDIU(%[output_height], %[output_height], -0x01)
"packushb %[ftmp1], %[ftmp0], %[fzero] \n\t"
"gsswlc1 %[ftmp1], 0x03(%[output_ptr]) \n\t"
"gsswrc1 %[ftmp1], 0x00(%[output_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line])
MMI_ADDIU(%[output_height], %[output_height], -0x01)
MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch])
"bnez %[output_height], 1b \n\t"
: [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0),
@@ -404,7 +386,7 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
} \
} else { \
for (i = 0; i < loop; ++i) { \
vp8_filter_block1dc_v6_mmi(FData2 + i * 4, dst_ptr + i * 4, m, \
vp8_filter_block1dc_v6_mmi(FData2 + n * 2 + i * 4, dst_ptr + i * 4, m, \
dst_pitch, n * 2, VFilter); \
} \
} \

View File

@@ -11,16 +11,28 @@
#include "entropy.h"
const int vp8_mode_contexts[6][4] = {
{ /* 0 */
7, 1, 1, 143 },
{ /* 1 */
14, 18, 14, 107 },
{ /* 2 */
135, 64, 57, 68 },
{ /* 3 */
60, 56, 128, 65 },
{ /* 4 */
159, 134, 128, 34 },
{ /* 5 */
234, 188, 128, 28 },
{
/* 0 */
7, 1, 1, 143,
},
{
/* 1 */
14, 18, 14, 107,
},
{
/* 2 */
135, 64, 57, 68,
},
{
/* 3 */
60, 56, 128, 65,
},
{
/* 4 */
159, 134, 128, 34,
},
{
/* 5 */
234, 188, 128, 28,
},
};

View File

@@ -1,13 +1,3 @@
##
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
## tree. An additional intellectual property rights grant can be found
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
sub vp8_common_forward_decls() {
print <<EOF
/*

View File

@@ -6,7 +6,7 @@
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
*/
#ifndef VP8_COMMON_VP8_ENTROPYMODEDATA_H_
#define VP8_COMMON_VP8_ENTROPYMODEDATA_H_

View File

@@ -95,7 +95,9 @@ void vp8_sixtap_predict4x4_mmx(unsigned char *src_ptr, int src_pixels_per_line,
void vp8_sixtap_predict16x16_sse2(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
int dst_pitch
) {
DECLARE_ALIGNED(16, unsigned short,
FData2[24 * 24]); /* Temp data bufffer used in filtering */
@@ -234,7 +236,9 @@ extern void vp8_filter_block1d4_v6_ssse3(unsigned char *src_ptr,
void vp8_sixtap_predict16x16_ssse3(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr,
int dst_pitch) {
int dst_pitch
) {
DECLARE_ALIGNED(16, unsigned char, FData2[24 * 24]);
if (xoffset) {
@@ -347,8 +351,8 @@ void vp8_sixtap_predict4x4_ssse3(unsigned char *src_ptr,
yoffset);
} else {
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
* yoffset==0) case correctly. Add copy function here to guarantee
* six-tap function handles all possible offsets. */
* yoffset==0) case correctly. Add copy function here to guarantee
* six-tap function handles all possible offsets. */
int r;
for (r = 0; r < 4; ++r) {

View File

@@ -674,7 +674,7 @@ static unsigned int read_partition_size(VP8D_COMP *pbi,
static int read_is_valid(const unsigned char *start, size_t len,
const unsigned char *end) {
return len != 0 && end > start && len <= (size_t)(end - start);
return (start + len > start && start + len <= end);
}
static unsigned int read_available_partition_size(

View File

@@ -34,9 +34,7 @@ typedef struct {
/* Structure used to hold all the overlaps of a macroblock. The overlaps of a
* macroblock is further divided into block overlaps.
*/
typedef struct {
B_OVERLAP overlaps[16];
} MB_OVERLAP;
typedef struct { B_OVERLAP overlaps[16]; } MB_OVERLAP;
/* Structure for keeping track of motion vectors and which reference frame they
* refer to. Used for motion vector interpolation.

View File

@@ -31,9 +31,7 @@ typedef struct {
void *ptr2;
} DECODETHREAD_DATA;
typedef struct {
MACROBLOCKD mbd;
} MB_ROW_DEC;
typedef struct { MACROBLOCKD mbd; } MB_ROW_DEC;
typedef struct {
int enabled;

View File

@@ -739,21 +739,24 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
/* Allocate memory for above_row buffers. */
CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i)
CHECK_MEM_ERROR(pbi->mt_yabove_row[i],
vpx_memalign(16, sizeof(unsigned char) *
(width + (VP8BORDERINPIXELS << 1))));
CHECK_MEM_ERROR(
pbi->mt_yabove_row[i],
vpx_memalign(
16, sizeof(unsigned char) * (width + (VP8BORDERINPIXELS << 1))));
CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i)
CHECK_MEM_ERROR(pbi->mt_uabove_row[i],
vpx_memalign(16, sizeof(unsigned char) *
(uv_width + VP8BORDERINPIXELS)));
CHECK_MEM_ERROR(
pbi->mt_uabove_row[i],
vpx_memalign(16,
sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i)
CHECK_MEM_ERROR(pbi->mt_vabove_row[i],
vpx_memalign(16, sizeof(unsigned char) *
(uv_width + VP8BORDERINPIXELS)));
CHECK_MEM_ERROR(
pbi->mt_vabove_row[i],
vpx_memalign(16,
sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
/* Allocate memory for left_col buffers. */
CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);

View File

@@ -9,12 +9,12 @@
*/
/****************************************************************************
*
* Module Title : boolhuff.h
*
* Description : Bool Coder header file.
*
****************************************************************************/
*
* Module Title : boolhuff.h
*
* Description : Bool Coder header file.
*
****************************************************************************/
#ifndef VP8_ENCODER_BOOLHUFF_H_
#define VP8_ENCODER_BOOLHUFF_H_

View File

@@ -989,11 +989,11 @@ static int estimate_max_q(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats,
bits_per_mb_at_this_q =
vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb;
bits_per_mb_at_this_q =
(int)(.5 + err_correction_factor * speed_correction *
cpi->twopass.est_max_qcorrection_factor *
cpi->twopass.section_max_qfactor *
(double)bits_per_mb_at_this_q);
bits_per_mb_at_this_q = (int)(.5 +
err_correction_factor * speed_correction *
cpi->twopass.est_max_qcorrection_factor *
cpi->twopass.section_max_qfactor *
(double)bits_per_mb_at_this_q);
/* Mode and motion overhead */
/* As Q rises in real encode loop rd code will force overhead down
@@ -1086,8 +1086,9 @@ static int estimate_cq(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats,
vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb;
bits_per_mb_at_this_q =
(int)(.5 + err_correction_factor * speed_correction * clip_iifactor *
(double)bits_per_mb_at_this_q);
(int)(.5 +
err_correction_factor * speed_correction * clip_iifactor *
(double)bits_per_mb_at_this_q);
/* Mode and motion overhead */
/* As Q rises in real encode loop rd code will force overhead down
@@ -1272,8 +1273,9 @@ void vp8_init_second_pass(VP8_COMP *cpi) {
* sum duration is not. Its calculated based on the actual durations of
* all frames from the first pass.
*/
vp8_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count /
cpi->twopass.total_stats.duration);
vp8_new_framerate(cpi,
10000000.0 * cpi->twopass.total_stats.count /
cpi->twopass.total_stats.duration);
cpi->output_framerate = cpi->framerate;
cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration *
@@ -1737,11 +1739,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) {
/* Dont break out very close to a key frame */
((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) &&
((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) &&
(!flash_detected) &&
((mv_ratio_accumulator > 100.0) ||
(abs_mv_in_out_accumulator > 3.0) ||
(mv_in_out_accumulator < -2.0) ||
((boost_score - old_boost_score) < 2.0)))) {
(!flash_detected) && ((mv_ratio_accumulator > 100.0) ||
(abs_mv_in_out_accumulator > 3.0) ||
(mv_in_out_accumulator < -2.0) ||
((boost_score - old_boost_score) < 2.0)))) {
boost_score = old_boost_score;
break;
}
@@ -1814,9 +1815,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) {
(next_frame.pcnt_inter > 0.75) &&
((mv_in_out_accumulator / (double)i > -0.2) ||
(mv_in_out_accumulator > -2.0)) &&
(cpi->gfu_boost > 100) &&
(cpi->twopass.gf_decay_rate <=
(ARF_DECAY_THRESH + (cpi->gfu_boost / 200))))
(cpi->gfu_boost > 100) && (cpi->twopass.gf_decay_rate <=
(ARF_DECAY_THRESH + (cpi->gfu_boost / 200))))
#endif
{
int Boost;

View File

@@ -2862,6 +2862,7 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
fclose(yframe);
}
#endif
/* return of 0 means drop frame */
#if !CONFIG_REALTIME_ONLY
/* Function to test for conditions that indeicate we should loop
@@ -3363,6 +3364,11 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
(LOWER_RES_FRAME_INFO *)cpi->oxcf.mr_low_res_mode_info;
if (cpi->oxcf.mr_encoder_id) {
// TODO(marpan): This constraint shouldn't be needed, as we would like
// to allow for key frame setting (forced or periodic) defined per
// spatial layer. For now, keep this in.
cm->frame_type = low_res_frame_info->frame_type;
// Check if lower resolution is available for motion vector reuse.
if (cm->frame_type != KEY_FRAME) {
cpi->mr_low_res_mv_avail = 1;
@@ -3387,16 +3393,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
== low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]);
*/
}
// Disable motion vector reuse (i.e., disable any usage of the low_res)
// if the previous lower stream is skipped/disabled.
if (low_res_frame_info->skip_encoding_prev_stream) {
cpi->mr_low_res_mv_avail = 0;
}
}
// This stream is not skipped (i.e., it's being encoded), so set this skip
// flag to 0. This is needed for the next stream (i.e., which is the next
// frame to be encoded).
low_res_frame_info->skip_encoding_prev_stream = 0;
// On a key frame: For the lowest resolution, keep track of the key frame
// counter value. For the higher resolutions, reset the current video
@@ -3802,7 +3799,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
/* Setup background Q adjustment for error resilient mode.
* For multi-layer encodes only enable this for the base layer.
*/
*/
if (cpi->cyclic_refresh_mode_enabled) {
// Special case for screen_content_mode with golden frame updates.
int disable_cr_gf =
@@ -4785,6 +4782,8 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
cpi->temporal_pattern_counter++;
}
/* reset to normal state now that we are done. */
#if 0
{
char filename[512];
@@ -5000,13 +4999,10 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags,
// be received for that high layer, which will yield an incorrect
// frame rate (from time-stamp adjustment in above calculation).
if (cpi->oxcf.mr_encoder_id) {
if (!low_res_frame_info->skip_encoding_base_stream)
cpi->ref_framerate = low_res_frame_info->low_res_framerate;
cpi->ref_framerate = low_res_frame_info->low_res_framerate;
} else {
// Keep track of frame rate for lowest resolution.
low_res_frame_info->low_res_framerate = cpi->ref_framerate;
// The base stream is being encoded so set skip flag to 0.
low_res_frame_info->skip_encoding_base_stream = 0;
}
}
#endif

View File

@@ -741,10 +741,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
/* If the frame has big static background and current MB is in low
* motion area, its mode decision is biased to ZEROMV mode.
* No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12).
* At such speed settings, ZEROMV is already heavily favored.
*/
* motion area, its mode decision is biased to ZEROMV mode.
* No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12).
* At such speed settings, ZEROMV is already heavily favored.
*/
if (cpi->Speed < 12) {
calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment);
}

View File

@@ -996,7 +996,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) {
* bits on this frame even if it is a contructed arf.
* The active maximum quantizer insures that an appropriate
* number of bits will be spent if needed for contstructed ARFs.
*/
*/
cpi->this_frame_target = 0;
}
@@ -1052,8 +1052,9 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) {
* overflow when values are large
*/
projected_size_based_on_q =
(int)(((.5 + rate_correction_factor *
vp8_bits_per_mb[cpi->common.frame_type][Q]) *
(int)(((.5 +
rate_correction_factor *
vp8_bits_per_mb[cpi->common.frame_type][Q]) *
cpi->common.MBs) /
(1 << BPER_MB_NORMBITS));

View File

@@ -23,7 +23,6 @@
#include "modecosts.h"
#include "encodeintra.h"
#include "pickinter.h"
#include "vp8/common/common.h"
#include "vp8/common/entropymode.h"
#include "vp8/common/reconinter.h"
#include "vp8/common/reconintra.h"
@@ -770,9 +769,9 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
vp8_quantize_mbuv(x);
rate_to = rd_cost_mbuv(x);
this_rate =
rate_to + x->intra_uv_mode_cost[xd->frame_type]
[xd->mode_info_context->mbmi.uv_mode];
this_rate = rate_to +
x->intra_uv_mode_cost[xd->frame_type]
[xd->mode_info_context->mbmi.uv_mode];
this_distortion = vp8_mbuverror(x) / 4;
@@ -960,13 +959,19 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
vp8_variance_fn_ptr_t *v_fn_ptr;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
ENTROPY_CONTEXT *ta_b;
ENTROPY_CONTEXT *tl_b;
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
vp8_zero(t_above_b);
vp8_zero(t_left_b);
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
ta_b = (ENTROPY_CONTEXT *)&t_above_b;
tl_b = (ENTROPY_CONTEXT *)&t_left_b;
br = 0;
bd = 0;
@@ -1146,13 +1151,13 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
mode_selected = this_mode;
best_label_rd = this_rd;
memcpy(&t_above_b, &t_above_s, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left_b, &t_left_s, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
}
} /*for each 4x4 mode*/
memcpy(&t_above, &t_above_b, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, &t_left_b, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
bsi->ref_mv, x->mvcost);

View File

@@ -56,7 +56,8 @@ static INLINE unsigned int vp8_cost_branch(const unsigned int ct[2],
static void vp8_treed_write(vp8_writer *const w, vp8_tree t,
const vp8_prob *const p, int v,
int n) { /* number of bits in v, assumed nonzero */
int n /* number of bits in v, assumed nonzero */
) {
vp8_tree_index i = 0;
do {
@@ -72,7 +73,8 @@ static INLINE void vp8_write_token(vp8_writer *const w, vp8_tree t,
}
static int vp8_treed_cost(vp8_tree t, const vp8_prob *const p, int v,
int n) { /* number of bits in v, assumed nonzero */
int n /* number of bits in v, assumed nonzero */
) {
int c = 0;
vp8_tree_index i = 0;

View File

@@ -802,20 +802,7 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
unsigned long deadline) {
vpx_codec_err_t res = VPX_CODEC_OK;
if (!ctx->cfg.rc_target_bitrate) {
#if CONFIG_MULTI_RES_ENCODING
if (!ctx->cpi) return VPX_CODEC_ERROR;
if (ctx->cpi->oxcf.mr_total_resolutions > 1) {
LOWER_RES_FRAME_INFO *low_res_frame_info =
(LOWER_RES_FRAME_INFO *)ctx->cpi->oxcf.mr_low_res_mode_info;
if (!low_res_frame_info) return VPX_CODEC_ERROR;
low_res_frame_info->skip_encoding_prev_stream = 1;
if (ctx->cpi->oxcf.mr_encoder_id == 0)
low_res_frame_info->skip_encoding_base_stream = 1;
}
#endif
return res;
}
if (!ctx->cfg.rc_target_bitrate) return res;
if (img) res = validate_img(ctx, img);
@@ -915,8 +902,6 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
(unsigned long)((delta * ctx->cfg.g_timebase.den + round) /
ctx->cfg.g_timebase.num / 10000000);
pkt.data.frame.flags = lib_flags << 16;
pkt.data.frame.width[0] = cpi->common.Width;
pkt.data.frame.height[0] = cpi->common.Height;
if (lib_flags & FRAMEFLAGS_KEY) {
pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
@@ -1274,9 +1259,6 @@ CODEC_INTERFACE(vpx_codec_vp8_cx) = {
vp8e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t cfg_maps; */
vp8e_encode, /* vpx_codec_encode_fn_t encode; */
vp8e_get_cxdata, /* vpx_codec_get_cx_data_fn_t get_cx_data; */
vp8e_set_config,
NULL,
vp8e_get_preview,
vp8e_mr_alloc_mem,
vp8e_set_config, NULL, vp8e_get_preview, vp8e_mr_alloc_mem,
} /* encoder functions */
};

View File

@@ -200,9 +200,9 @@ static vpx_codec_err_t update_error_state(
static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
void *user_priv) {
/** vpx_img_wrap() doesn't allow specifying independent strides for
* the Y, U, and V planes, nor other alignment adjustments that
* might be representable by a YV12_BUFFER_CONFIG, so we just
* initialize all the fields.*/
* the Y, U, and V planes, nor other alignment adjustments that
* might be representable by a YV12_BUFFER_CONFIG, so we just
* initialize all the fields.*/
img->fmt = VPX_IMG_FMT_I420;
img->w = yv12->y_stride;
img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15;

View File

@@ -1,160 +0,0 @@
/*
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include <assert.h>
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/arm/neon/vp9_iht_neon.h"
#include "vpx_dsp/arm/highbd_idct_neon.h"
#include "vpx_dsp/arm/idct_neon.h"
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/txfm_common.h"
static INLINE void highbd_iadst4(int32x4_t *const io) {
const int32_t sinpis[4] = { sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9 };
const int32x4_t sinpi = vld1q_s32(sinpis);
int32x4_t s[8];
s[0] = vmulq_lane_s32(io[0], vget_low_s32(sinpi), 0);
s[1] = vmulq_lane_s32(io[0], vget_low_s32(sinpi), 1);
s[2] = vmulq_lane_s32(io[1], vget_high_s32(sinpi), 0);
s[3] = vmulq_lane_s32(io[2], vget_high_s32(sinpi), 1);
s[4] = vmulq_lane_s32(io[2], vget_low_s32(sinpi), 0);
s[5] = vmulq_lane_s32(io[3], vget_low_s32(sinpi), 1);
s[6] = vmulq_lane_s32(io[3], vget_high_s32(sinpi), 1);
s[7] = vsubq_s32(io[0], io[2]);
s[7] = vaddq_s32(s[7], io[3]);
s[0] = vaddq_s32(s[0], s[3]);
s[0] = vaddq_s32(s[0], s[5]);
s[1] = vsubq_s32(s[1], s[4]);
s[1] = vsubq_s32(s[1], s[6]);
s[3] = s[2];
s[2] = vmulq_lane_s32(s[7], vget_high_s32(sinpi), 0);
io[0] = vaddq_s32(s[0], s[3]);
io[1] = vaddq_s32(s[1], s[3]);
io[2] = s[2];
io[3] = vaddq_s32(s[0], s[1]);
io[3] = vsubq_s32(io[3], s[3]);
io[0] = vrshrq_n_s32(io[0], DCT_CONST_BITS);
io[1] = vrshrq_n_s32(io[1], DCT_CONST_BITS);
io[2] = vrshrq_n_s32(io[2], DCT_CONST_BITS);
io[3] = vrshrq_n_s32(io[3], DCT_CONST_BITS);
}
void vp9_highbd_iht4x4_16_add_neon(const tran_low_t *input, uint16_t *dest,
int stride, int tx_type, int bd) {
const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
int16x8_t a[2];
int32x4_t c[4];
c[0] = vld1q_s32(input);
c[1] = vld1q_s32(input + 4);
c[2] = vld1q_s32(input + 8);
c[3] = vld1q_s32(input + 12);
if (bd == 8) {
a[0] = vcombine_s16(vmovn_s32(c[0]), vmovn_s32(c[1]));
a[1] = vcombine_s16(vmovn_s32(c[2]), vmovn_s32(c[3]));
transpose_s16_4x4q(&a[0], &a[1]);
switch (tx_type) {
case DCT_DCT:
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
transpose_s16_4x4q(&a[0], &a[1]);
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
break;
case ADST_DCT:
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
transpose_s16_4x4q(&a[0], &a[1]);
iadst4(a);
break;
case DCT_ADST:
iadst4(a);
transpose_s16_4x4q(&a[0], &a[1]);
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
break;
default:
assert(tx_type == ADST_ADST);
iadst4(a);
transpose_s16_4x4q(&a[0], &a[1]);
iadst4(a);
break;
}
a[0] = vrshrq_n_s16(a[0], 4);
a[1] = vrshrq_n_s16(a[1], 4);
} else {
switch (tx_type) {
case DCT_DCT: {
const int32x4_t cospis = vld1q_s32(kCospi32);
if (bd == 10) {
idct4x4_16_kernel_bd10(cospis, c);
idct4x4_16_kernel_bd10(cospis, c);
} else {
idct4x4_16_kernel_bd12(cospis, c);
idct4x4_16_kernel_bd12(cospis, c);
}
break;
}
case ADST_DCT: {
const int32x4_t cospis = vld1q_s32(kCospi32);
if (bd == 10) {
idct4x4_16_kernel_bd10(cospis, c);
} else {
idct4x4_16_kernel_bd12(cospis, c);
}
transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
highbd_iadst4(c);
break;
}
case DCT_ADST: {
const int32x4_t cospis = vld1q_s32(kCospi32);
transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
highbd_iadst4(c);
if (bd == 10) {
idct4x4_16_kernel_bd10(cospis, c);
} else {
idct4x4_16_kernel_bd12(cospis, c);
}
break;
}
default: {
assert(tx_type == ADST_ADST);
transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
highbd_iadst4(c);
transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
highbd_iadst4(c);
break;
}
}
a[0] = vcombine_s16(vqrshrn_n_s32(c[0], 4), vqrshrn_n_s32(c[1], 4));
a[1] = vcombine_s16(vqrshrn_n_s32(c[2], 4), vqrshrn_n_s32(c[3], 4));
}
highbd_idct4x4_1_add_kernel1(&dest, stride, a[0], max);
highbd_idct4x4_1_add_kernel1(&dest, stride, a[1], max);
}

View File

@@ -14,63 +14,206 @@
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/arm/neon/vp9_iht_neon.h"
#include "vpx_dsp/arm/idct_neon.h"
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/txfm_common.h"
static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
int32x4_t q8s32, q9s32;
int16x4x2_t d0x2s16, d1x2s16;
int32x4x2_t q0x2s32;
d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16));
d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16));
q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]));
q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]));
q0x2s32 = vtrnq_s32(q8s32, q9s32);
*q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]);
*q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]);
}
static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16,
int16x4_t *d2s16) {
*d0s16 = vdup_n_s16(cospi_8_64);
*d1s16 = vdup_n_s16(cospi_16_64);
*d2s16 = vdup_n_s16(cospi_24_64);
}
static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16,
int16x4_t *d5s16, int16x8_t *q3s16) {
*d3s16 = vdup_n_s16(sinpi_1_9);
*d4s16 = vdup_n_s16(sinpi_2_9);
*q3s16 = vdupq_n_s16(sinpi_3_9);
*d5s16 = vdup_n_s16(sinpi_4_9);
}
static INLINE void IDCT4x4_1D(int16x4_t *d0s16, int16x4_t *d1s16,
int16x4_t *d2s16, int16x8_t *q8s16,
int16x8_t *q9s16) {
int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16;
int16x4_t d26s16, d27s16, d28s16, d29s16;
int32x4_t q10s32, q13s32, q14s32, q15s32;
int16x8_t q13s16, q14s16;
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
d18s16 = vget_low_s16(*q9s16);
d19s16 = vget_high_s16(*q9s16);
d23s16 = vadd_s16(d16s16, d18s16);
d24s16 = vsub_s16(d16s16, d18s16);
q15s32 = vmull_s16(d17s16, *d2s16);
q10s32 = vmull_s16(d17s16, *d0s16);
q13s32 = vmull_s16(d23s16, *d1s16);
q14s32 = vmull_s16(d24s16, *d1s16);
q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16);
q10s32 = vmlal_s16(q10s32, d19s16, *d2s16);
d26s16 = vrshrn_n_s32(q13s32, 14);
d27s16 = vrshrn_n_s32(q14s32, 14);
d29s16 = vrshrn_n_s32(q15s32, 14);
d28s16 = vrshrn_n_s32(q10s32, 14);
q13s16 = vcombine_s16(d26s16, d27s16);
q14s16 = vcombine_s16(d28s16, d29s16);
*q8s16 = vaddq_s16(q13s16, q14s16);
*q9s16 = vsubq_s16(q13s16, q14s16);
*q9s16 = vcombine_s16(vget_high_s16(*q9s16), vget_low_s16(*q9s16)); // vswp
}
static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16,
int16x4_t *d5s16, int16x8_t *q3s16,
int16x8_t *q8s16, int16x8_t *q9s16) {
int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16;
int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
d6s16 = vget_low_s16(*q3s16);
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
d18s16 = vget_low_s16(*q9s16);
d19s16 = vget_high_s16(*q9s16);
q10s32 = vmull_s16(*d3s16, d16s16);
q11s32 = vmull_s16(*d4s16, d16s16);
q12s32 = vmull_s16(d6s16, d17s16);
q13s32 = vmull_s16(*d5s16, d18s16);
q14s32 = vmull_s16(*d3s16, d18s16);
q15s32 = vmovl_s16(d16s16);
q15s32 = vaddw_s16(q15s32, d19s16);
q8s32 = vmull_s16(*d4s16, d19s16);
q15s32 = vsubw_s16(q15s32, d18s16);
q9s32 = vmull_s16(*d5s16, d19s16);
q10s32 = vaddq_s32(q10s32, q13s32);
q10s32 = vaddq_s32(q10s32, q8s32);
q11s32 = vsubq_s32(q11s32, q14s32);
q8s32 = vdupq_n_s32(sinpi_3_9);
q11s32 = vsubq_s32(q11s32, q9s32);
q15s32 = vmulq_s32(q15s32, q8s32);
q13s32 = vaddq_s32(q10s32, q12s32);
q10s32 = vaddq_s32(q10s32, q11s32);
q14s32 = vaddq_s32(q11s32, q12s32);
q10s32 = vsubq_s32(q10s32, q12s32);
d16s16 = vrshrn_n_s32(q13s32, 14);
d17s16 = vrshrn_n_s32(q14s32, 14);
d18s16 = vrshrn_n_s32(q15s32, 14);
d19s16 = vrshrn_n_s32(q10s32, 14);
*q8s16 = vcombine_s16(d16s16, d17s16);
*q9s16 = vcombine_s16(d18s16, d19s16);
}
void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
int16x8_t a[2];
uint8x8_t s[2], d[2];
uint16x8_t sum[2];
uint8x8_t d26u8, d27u8;
int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16;
uint32x2_t d26u32, d27u32;
int16x8_t q3s16, q8s16, q9s16;
uint16x8_t q8u16, q9u16;
assert(!((intptr_t)dest % sizeof(uint32_t)));
assert(!(stride % sizeof(uint32_t)));
d26u32 = d27u32 = vdup_n_u32(0);
a[0] = load_tran_low_to_s16q(input);
a[1] = load_tran_low_to_s16q(input + 8);
transpose_s16_4x4q(&a[0], &a[1]);
q8s16 = vld1q_s16(input);
q9s16 = vld1q_s16(input + 8);
TRANSPOSE4X4(&q8s16, &q9s16);
switch (tx_type) {
case DCT_DCT:
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
transpose_s16_4x4q(&a[0], &a[1]);
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
break;
case 0: // idct_idct is not supported. Fall back to C
vp9_iht4x4_16_add_c(input, dest, stride, tx_type);
return;
case 1: // iadst_idct
// generate constants
GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
case ADST_DCT:
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
transpose_s16_4x4q(&a[0], &a[1]);
iadst4(a);
break;
// first transform rows
IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
case DCT_ADST:
iadst4(a);
transpose_s16_4x4q(&a[0], &a[1]);
idct4x4_16_kernel_bd8(a);
a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
break;
// transpose the matrix
TRANSPOSE4X4(&q8s16, &q9s16);
default:
assert(tx_type == ADST_ADST);
iadst4(a);
transpose_s16_4x4q(&a[0], &a[1]);
iadst4(a);
// then transform columns
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
break;
case 2: // idct_iadst
// generate constantsyy
GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
// first transform rows
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
// transpose the matrix
TRANSPOSE4X4(&q8s16, &q9s16);
// then transform columns
IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
break;
case 3: // iadst_iadst
// generate constants
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
// first transform rows
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
// transpose the matrix
TRANSPOSE4X4(&q8s16, &q9s16);
// then transform columns
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
break;
default: // iadst_idct
assert(0);
break;
}
a[0] = vrshrq_n_s16(a[0], 4);
a[1] = vrshrq_n_s16(a[1], 4);
s[0] = load_u8(dest, stride);
s[1] = load_u8(dest + 2 * stride, stride);
sum[0] = vaddw_u8(vreinterpretq_u16_s16(a[0]), s[0]);
sum[1] = vaddw_u8(vreinterpretq_u16_s16(a[1]), s[1]);
d[0] = vqmovun_s16(vreinterpretq_s16_u16(sum[0]));
d[1] = vqmovun_s16(vreinterpretq_s16_u16(sum[1]));
store_u8(dest, stride, d[0]);
store_u8(dest + 2 * stride, stride, d[1]);
q8s16 = vrshrq_n_s16(q8s16, 4);
q9s16 = vrshrq_n_s16(q9s16, 4);
d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0);
dest += stride;
d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1);
dest += stride;
d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0);
dest += stride;
d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1);
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32));
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32));
d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1);
dest -= stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0);
dest -= stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1);
dest -= stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0);
}

View File

@@ -14,199 +14,527 @@
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
#include "vpx_dsp/arm/idct_neon.h"
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
static INLINE void iadst_half_butterfly_neon(int16x8_t *const x,
const int16x4_t c) {
const int16x8_t sum = vaddq_s16(x[0], x[1]);
const int16x8_t sub = vsubq_s16(x[0], x[1]);
int32x4_t t0[2], t1[2];
static int16_t cospi_2_64 = 16305;
static int16_t cospi_4_64 = 16069;
static int16_t cospi_6_64 = 15679;
static int16_t cospi_8_64 = 15137;
static int16_t cospi_10_64 = 14449;
static int16_t cospi_12_64 = 13623;
static int16_t cospi_14_64 = 12665;
static int16_t cospi_16_64 = 11585;
static int16_t cospi_18_64 = 10394;
static int16_t cospi_20_64 = 9102;
static int16_t cospi_22_64 = 7723;
static int16_t cospi_24_64 = 6270;
static int16_t cospi_26_64 = 4756;
static int16_t cospi_28_64 = 3196;
static int16_t cospi_30_64 = 1606;
t0[0] = vmull_lane_s16(vget_low_s16(sum), c, 0);
t0[1] = vmull_lane_s16(vget_high_s16(sum), c, 0);
t1[0] = vmull_lane_s16(vget_low_s16(sub), c, 0);
t1[1] = vmull_lane_s16(vget_high_s16(sub), c, 0);
x[0] = dct_const_round_shift_low_8(t0);
x[1] = dct_const_round_shift_low_8(t1);
static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
int16x8_t *q10s16, int16x8_t *q11s16,
int16x8_t *q12s16, int16x8_t *q13s16,
int16x8_t *q14s16, int16x8_t *q15s16) {
int16x4_t d0s16, d1s16, d2s16, d3s16;
int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
d0s16 = vdup_n_s16(cospi_28_64);
d1s16 = vdup_n_s16(cospi_4_64);
d2s16 = vdup_n_s16(cospi_12_64);
d3s16 = vdup_n_s16(cospi_20_64);
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
d18s16 = vget_low_s16(*q9s16);
d19s16 = vget_high_s16(*q9s16);
d20s16 = vget_low_s16(*q10s16);
d21s16 = vget_high_s16(*q10s16);
d22s16 = vget_low_s16(*q11s16);
d23s16 = vget_high_s16(*q11s16);
d24s16 = vget_low_s16(*q12s16);
d25s16 = vget_high_s16(*q12s16);
d26s16 = vget_low_s16(*q13s16);
d27s16 = vget_high_s16(*q13s16);
d28s16 = vget_low_s16(*q14s16);
d29s16 = vget_high_s16(*q14s16);
d30s16 = vget_low_s16(*q15s16);
d31s16 = vget_high_s16(*q15s16);
q2s32 = vmull_s16(d18s16, d0s16);
q3s32 = vmull_s16(d19s16, d0s16);
q5s32 = vmull_s16(d26s16, d2s16);
q6s32 = vmull_s16(d27s16, d2s16);
q2s32 = vmlsl_s16(q2s32, d30s16, d1s16);
q3s32 = vmlsl_s16(q3s32, d31s16, d1s16);
q5s32 = vmlsl_s16(q5s32, d22s16, d3s16);
q6s32 = vmlsl_s16(q6s32, d23s16, d3s16);
d8s16 = vrshrn_n_s32(q2s32, 14);
d9s16 = vrshrn_n_s32(q3s32, 14);
d10s16 = vrshrn_n_s32(q5s32, 14);
d11s16 = vrshrn_n_s32(q6s32, 14);
q4s16 = vcombine_s16(d8s16, d9s16);
q5s16 = vcombine_s16(d10s16, d11s16);
q2s32 = vmull_s16(d18s16, d1s16);
q3s32 = vmull_s16(d19s16, d1s16);
q9s32 = vmull_s16(d26s16, d3s16);
q13s32 = vmull_s16(d27s16, d3s16);
q2s32 = vmlal_s16(q2s32, d30s16, d0s16);
q3s32 = vmlal_s16(q3s32, d31s16, d0s16);
q9s32 = vmlal_s16(q9s32, d22s16, d2s16);
q13s32 = vmlal_s16(q13s32, d23s16, d2s16);
d14s16 = vrshrn_n_s32(q2s32, 14);
d15s16 = vrshrn_n_s32(q3s32, 14);
d12s16 = vrshrn_n_s32(q9s32, 14);
d13s16 = vrshrn_n_s32(q13s32, 14);
q6s16 = vcombine_s16(d12s16, d13s16);
q7s16 = vcombine_s16(d14s16, d15s16);
d0s16 = vdup_n_s16(cospi_16_64);
q2s32 = vmull_s16(d16s16, d0s16);
q3s32 = vmull_s16(d17s16, d0s16);
q13s32 = vmull_s16(d16s16, d0s16);
q15s32 = vmull_s16(d17s16, d0s16);
q2s32 = vmlal_s16(q2s32, d24s16, d0s16);
q3s32 = vmlal_s16(q3s32, d25s16, d0s16);
q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
d0s16 = vdup_n_s16(cospi_24_64);
d1s16 = vdup_n_s16(cospi_8_64);
d18s16 = vrshrn_n_s32(q2s32, 14);
d19s16 = vrshrn_n_s32(q3s32, 14);
d22s16 = vrshrn_n_s32(q13s32, 14);
d23s16 = vrshrn_n_s32(q15s32, 14);
*q9s16 = vcombine_s16(d18s16, d19s16);
*q11s16 = vcombine_s16(d22s16, d23s16);
q2s32 = vmull_s16(d20s16, d0s16);
q3s32 = vmull_s16(d21s16, d0s16);
q8s32 = vmull_s16(d20s16, d1s16);
q12s32 = vmull_s16(d21s16, d1s16);
q2s32 = vmlsl_s16(q2s32, d28s16, d1s16);
q3s32 = vmlsl_s16(q3s32, d29s16, d1s16);
q8s32 = vmlal_s16(q8s32, d28s16, d0s16);
q12s32 = vmlal_s16(q12s32, d29s16, d0s16);
d26s16 = vrshrn_n_s32(q2s32, 14);
d27s16 = vrshrn_n_s32(q3s32, 14);
d30s16 = vrshrn_n_s32(q8s32, 14);
d31s16 = vrshrn_n_s32(q12s32, 14);
*q13s16 = vcombine_s16(d26s16, d27s16);
*q15s16 = vcombine_s16(d30s16, d31s16);
q0s16 = vaddq_s16(*q9s16, *q15s16);
q1s16 = vaddq_s16(*q11s16, *q13s16);
q2s16 = vsubq_s16(*q11s16, *q13s16);
q3s16 = vsubq_s16(*q9s16, *q15s16);
*q13s16 = vsubq_s16(q4s16, q5s16);
q4s16 = vaddq_s16(q4s16, q5s16);
*q14s16 = vsubq_s16(q7s16, q6s16);
q7s16 = vaddq_s16(q7s16, q6s16);
d26s16 = vget_low_s16(*q13s16);
d27s16 = vget_high_s16(*q13s16);
d28s16 = vget_low_s16(*q14s16);
d29s16 = vget_high_s16(*q14s16);
d16s16 = vdup_n_s16(cospi_16_64);
q9s32 = vmull_s16(d28s16, d16s16);
q10s32 = vmull_s16(d29s16, d16s16);
q11s32 = vmull_s16(d28s16, d16s16);
q12s32 = vmull_s16(d29s16, d16s16);
q9s32 = vmlsl_s16(q9s32, d26s16, d16s16);
q10s32 = vmlsl_s16(q10s32, d27s16, d16s16);
q11s32 = vmlal_s16(q11s32, d26s16, d16s16);
q12s32 = vmlal_s16(q12s32, d27s16, d16s16);
d10s16 = vrshrn_n_s32(q9s32, 14);
d11s16 = vrshrn_n_s32(q10s32, 14);
d12s16 = vrshrn_n_s32(q11s32, 14);
d13s16 = vrshrn_n_s32(q12s32, 14);
q5s16 = vcombine_s16(d10s16, d11s16);
q6s16 = vcombine_s16(d12s16, d13s16);
*q8s16 = vaddq_s16(q0s16, q7s16);
*q9s16 = vaddq_s16(q1s16, q6s16);
*q10s16 = vaddq_s16(q2s16, q5s16);
*q11s16 = vaddq_s16(q3s16, q4s16);
*q12s16 = vsubq_s16(q3s16, q4s16);
*q13s16 = vsubq_s16(q2s16, q5s16);
*q14s16 = vsubq_s16(q1s16, q6s16);
*q15s16 = vsubq_s16(q0s16, q7s16);
}
static INLINE void iadst_butterfly_lane_0_1_neon(const int16x8_t in0,
const int16x8_t in1,
const int16x4_t c,
int32x4_t *const s0,
int32x4_t *const s1) {
s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 0);
s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 0);
s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 1);
s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 1);
static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
int16x8_t *q10s16, int16x8_t *q11s16,
int16x8_t *q12s16, int16x8_t *q13s16,
int16x8_t *q14s16, int16x8_t *q15s16) {
int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16;
int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
int16x8_t q2s16, q4s16, q5s16, q6s16;
int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32;
int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 1);
s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 1);
s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 0);
s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 0);
}
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
d18s16 = vget_low_s16(*q9s16);
d19s16 = vget_high_s16(*q9s16);
d20s16 = vget_low_s16(*q10s16);
d21s16 = vget_high_s16(*q10s16);
d22s16 = vget_low_s16(*q11s16);
d23s16 = vget_high_s16(*q11s16);
d24s16 = vget_low_s16(*q12s16);
d25s16 = vget_high_s16(*q12s16);
d26s16 = vget_low_s16(*q13s16);
d27s16 = vget_high_s16(*q13s16);
d28s16 = vget_low_s16(*q14s16);
d29s16 = vget_high_s16(*q14s16);
d30s16 = vget_low_s16(*q15s16);
d31s16 = vget_high_s16(*q15s16);
static INLINE void iadst_butterfly_lane_2_3_neon(const int16x8_t in0,
const int16x8_t in1,
const int16x4_t c,
int32x4_t *const s0,
int32x4_t *const s1) {
s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
d14s16 = vdup_n_s16(cospi_2_64);
d15s16 = vdup_n_s16(cospi_30_64);
s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 3);
s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 3);
s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 2);
s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 2);
}
q1s32 = vmull_s16(d30s16, d14s16);
q2s32 = vmull_s16(d31s16, d14s16);
q3s32 = vmull_s16(d30s16, d15s16);
q4s32 = vmull_s16(d31s16, d15s16);
static INLINE void iadst_butterfly_lane_3_2_neon(const int16x8_t in0,
const int16x8_t in1,
const int16x4_t c,
int32x4_t *const s0,
int32x4_t *const s1) {
s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
d30s16 = vdup_n_s16(cospi_18_64);
d31s16 = vdup_n_s16(cospi_14_64);
s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 2);
s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 2);
s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 3);
s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 3);
}
q1s32 = vmlal_s16(q1s32, d16s16, d15s16);
q2s32 = vmlal_s16(q2s32, d17s16, d15s16);
q3s32 = vmlsl_s16(q3s32, d16s16, d14s16);
q4s32 = vmlsl_s16(q4s32, d17s16, d14s16);
static INLINE int16x8_t add_dct_const_round_shift_low_8(
const int32x4_t *const in0, const int32x4_t *const in1) {
int32x4_t sum[2];
q5s32 = vmull_s16(d22s16, d30s16);
q6s32 = vmull_s16(d23s16, d30s16);
q7s32 = vmull_s16(d22s16, d31s16);
q8s32 = vmull_s16(d23s16, d31s16);
sum[0] = vaddq_s32(in0[0], in1[0]);
sum[1] = vaddq_s32(in0[1], in1[1]);
return dct_const_round_shift_low_8(sum);
}
q5s32 = vmlal_s16(q5s32, d24s16, d31s16);
q6s32 = vmlal_s16(q6s32, d25s16, d31s16);
q7s32 = vmlsl_s16(q7s32, d24s16, d30s16);
q8s32 = vmlsl_s16(q8s32, d25s16, d30s16);
static INLINE int16x8_t sub_dct_const_round_shift_low_8(
const int32x4_t *const in0, const int32x4_t *const in1) {
int32x4_t sum[2];
q11s32 = vaddq_s32(q1s32, q5s32);
q12s32 = vaddq_s32(q2s32, q6s32);
q1s32 = vsubq_s32(q1s32, q5s32);
q2s32 = vsubq_s32(q2s32, q6s32);
sum[0] = vsubq_s32(in0[0], in1[0]);
sum[1] = vsubq_s32(in0[1], in1[1]);
return dct_const_round_shift_low_8(sum);
}
d22s16 = vrshrn_n_s32(q11s32, 14);
d23s16 = vrshrn_n_s32(q12s32, 14);
*q11s16 = vcombine_s16(d22s16, d23s16);
static INLINE void iadst8(int16x8_t *const io) {
const int16x4_t c0 =
create_s16x4_neon(cospi_2_64, cospi_30_64, cospi_10_64, cospi_22_64);
const int16x4_t c1 =
create_s16x4_neon(cospi_18_64, cospi_14_64, cospi_26_64, cospi_6_64);
const int16x4_t c2 =
create_s16x4_neon(cospi_16_64, 0, cospi_8_64, cospi_24_64);
int16x8_t x[8], t[4];
int32x4_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
q12s32 = vaddq_s32(q3s32, q7s32);
q15s32 = vaddq_s32(q4s32, q8s32);
q3s32 = vsubq_s32(q3s32, q7s32);
q4s32 = vsubq_s32(q4s32, q8s32);
x[0] = io[7];
x[1] = io[0];
x[2] = io[5];
x[3] = io[2];
x[4] = io[3];
x[5] = io[4];
x[6] = io[1];
x[7] = io[6];
d2s16 = vrshrn_n_s32(q1s32, 14);
d3s16 = vrshrn_n_s32(q2s32, 14);
d24s16 = vrshrn_n_s32(q12s32, 14);
d25s16 = vrshrn_n_s32(q15s32, 14);
d6s16 = vrshrn_n_s32(q3s32, 14);
d7s16 = vrshrn_n_s32(q4s32, 14);
*q12s16 = vcombine_s16(d24s16, d25s16);
// stage 1
iadst_butterfly_lane_0_1_neon(x[0], x[1], c0, s0, s1);
iadst_butterfly_lane_2_3_neon(x[2], x[3], c0, s2, s3);
iadst_butterfly_lane_0_1_neon(x[4], x[5], c1, s4, s5);
iadst_butterfly_lane_2_3_neon(x[6], x[7], c1, s6, s7);
d0s16 = vdup_n_s16(cospi_10_64);
d1s16 = vdup_n_s16(cospi_22_64);
q4s32 = vmull_s16(d26s16, d0s16);
q5s32 = vmull_s16(d27s16, d0s16);
q2s32 = vmull_s16(d26s16, d1s16);
q6s32 = vmull_s16(d27s16, d1s16);
x[0] = add_dct_const_round_shift_low_8(s0, s4);
x[1] = add_dct_const_round_shift_low_8(s1, s5);
x[2] = add_dct_const_round_shift_low_8(s2, s6);
x[3] = add_dct_const_round_shift_low_8(s3, s7);
x[4] = sub_dct_const_round_shift_low_8(s0, s4);
x[5] = sub_dct_const_round_shift_low_8(s1, s5);
x[6] = sub_dct_const_round_shift_low_8(s2, s6);
x[7] = sub_dct_const_round_shift_low_8(s3, s7);
d30s16 = vdup_n_s16(cospi_26_64);
d31s16 = vdup_n_s16(cospi_6_64);
// stage 2
t[0] = x[0];
t[1] = x[1];
t[2] = x[2];
t[3] = x[3];
iadst_butterfly_lane_2_3_neon(x[4], x[5], c2, s4, s5);
iadst_butterfly_lane_3_2_neon(x[7], x[6], c2, s7, s6);
q4s32 = vmlal_s16(q4s32, d20s16, d1s16);
q5s32 = vmlal_s16(q5s32, d21s16, d1s16);
q2s32 = vmlsl_s16(q2s32, d20s16, d0s16);
q6s32 = vmlsl_s16(q6s32, d21s16, d0s16);
x[0] = vaddq_s16(t[0], t[2]);
x[1] = vaddq_s16(t[1], t[3]);
x[2] = vsubq_s16(t[0], t[2]);
x[3] = vsubq_s16(t[1], t[3]);
x[4] = add_dct_const_round_shift_low_8(s4, s6);
x[5] = add_dct_const_round_shift_low_8(s5, s7);
x[6] = sub_dct_const_round_shift_low_8(s4, s6);
x[7] = sub_dct_const_round_shift_low_8(s5, s7);
q0s32 = vmull_s16(d18s16, d30s16);
q13s32 = vmull_s16(d19s16, d30s16);
// stage 3
iadst_half_butterfly_neon(x + 2, c2);
iadst_half_butterfly_neon(x + 6, c2);
q0s32 = vmlal_s16(q0s32, d28s16, d31s16);
q13s32 = vmlal_s16(q13s32, d29s16, d31s16);
io[0] = x[0];
io[1] = vnegq_s16(x[4]);
io[2] = x[6];
io[3] = vnegq_s16(x[2]);
io[4] = x[3];
io[5] = vnegq_s16(x[7]);
io[6] = x[5];
io[7] = vnegq_s16(x[1]);
q10s32 = vmull_s16(d18s16, d31s16);
q9s32 = vmull_s16(d19s16, d31s16);
q10s32 = vmlsl_s16(q10s32, d28s16, d30s16);
q9s32 = vmlsl_s16(q9s32, d29s16, d30s16);
q14s32 = vaddq_s32(q2s32, q10s32);
q15s32 = vaddq_s32(q6s32, q9s32);
q2s32 = vsubq_s32(q2s32, q10s32);
q6s32 = vsubq_s32(q6s32, q9s32);
d28s16 = vrshrn_n_s32(q14s32, 14);
d29s16 = vrshrn_n_s32(q15s32, 14);
d4s16 = vrshrn_n_s32(q2s32, 14);
d5s16 = vrshrn_n_s32(q6s32, 14);
*q14s16 = vcombine_s16(d28s16, d29s16);
q9s32 = vaddq_s32(q4s32, q0s32);
q10s32 = vaddq_s32(q5s32, q13s32);
q4s32 = vsubq_s32(q4s32, q0s32);
q5s32 = vsubq_s32(q5s32, q13s32);
d30s16 = vdup_n_s16(cospi_8_64);
d31s16 = vdup_n_s16(cospi_24_64);
d18s16 = vrshrn_n_s32(q9s32, 14);
d19s16 = vrshrn_n_s32(q10s32, 14);
d8s16 = vrshrn_n_s32(q4s32, 14);
d9s16 = vrshrn_n_s32(q5s32, 14);
*q9s16 = vcombine_s16(d18s16, d19s16);
q5s32 = vmull_s16(d2s16, d30s16);
q6s32 = vmull_s16(d3s16, d30s16);
q7s32 = vmull_s16(d2s16, d31s16);
q0s32 = vmull_s16(d3s16, d31s16);
q5s32 = vmlal_s16(q5s32, d6s16, d31s16);
q6s32 = vmlal_s16(q6s32, d7s16, d31s16);
q7s32 = vmlsl_s16(q7s32, d6s16, d30s16);
q0s32 = vmlsl_s16(q0s32, d7s16, d30s16);
q1s32 = vmull_s16(d4s16, d30s16);
q3s32 = vmull_s16(d5s16, d30s16);
q10s32 = vmull_s16(d4s16, d31s16);
q2s32 = vmull_s16(d5s16, d31s16);
q1s32 = vmlsl_s16(q1s32, d8s16, d31s16);
q3s32 = vmlsl_s16(q3s32, d9s16, d31s16);
q10s32 = vmlal_s16(q10s32, d8s16, d30s16);
q2s32 = vmlal_s16(q2s32, d9s16, d30s16);
*q8s16 = vaddq_s16(*q11s16, *q9s16);
*q11s16 = vsubq_s16(*q11s16, *q9s16);
q4s16 = vaddq_s16(*q12s16, *q14s16);
*q12s16 = vsubq_s16(*q12s16, *q14s16);
q14s32 = vaddq_s32(q5s32, q1s32);
q15s32 = vaddq_s32(q6s32, q3s32);
q5s32 = vsubq_s32(q5s32, q1s32);
q6s32 = vsubq_s32(q6s32, q3s32);
d18s16 = vrshrn_n_s32(q14s32, 14);
d19s16 = vrshrn_n_s32(q15s32, 14);
d10s16 = vrshrn_n_s32(q5s32, 14);
d11s16 = vrshrn_n_s32(q6s32, 14);
*q9s16 = vcombine_s16(d18s16, d19s16);
q1s32 = vaddq_s32(q7s32, q10s32);
q3s32 = vaddq_s32(q0s32, q2s32);
q7s32 = vsubq_s32(q7s32, q10s32);
q0s32 = vsubq_s32(q0s32, q2s32);
d28s16 = vrshrn_n_s32(q1s32, 14);
d29s16 = vrshrn_n_s32(q3s32, 14);
d14s16 = vrshrn_n_s32(q7s32, 14);
d15s16 = vrshrn_n_s32(q0s32, 14);
*q14s16 = vcombine_s16(d28s16, d29s16);
d30s16 = vdup_n_s16(cospi_16_64);
d22s16 = vget_low_s16(*q11s16);
d23s16 = vget_high_s16(*q11s16);
q2s32 = vmull_s16(d22s16, d30s16);
q3s32 = vmull_s16(d23s16, d30s16);
q13s32 = vmull_s16(d22s16, d30s16);
q1s32 = vmull_s16(d23s16, d30s16);
d24s16 = vget_low_s16(*q12s16);
d25s16 = vget_high_s16(*q12s16);
q2s32 = vmlal_s16(q2s32, d24s16, d30s16);
q3s32 = vmlal_s16(q3s32, d25s16, d30s16);
q13s32 = vmlsl_s16(q13s32, d24s16, d30s16);
q1s32 = vmlsl_s16(q1s32, d25s16, d30s16);
d4s16 = vrshrn_n_s32(q2s32, 14);
d5s16 = vrshrn_n_s32(q3s32, 14);
d24s16 = vrshrn_n_s32(q13s32, 14);
d25s16 = vrshrn_n_s32(q1s32, 14);
q2s16 = vcombine_s16(d4s16, d5s16);
*q12s16 = vcombine_s16(d24s16, d25s16);
q13s32 = vmull_s16(d10s16, d30s16);
q1s32 = vmull_s16(d11s16, d30s16);
q11s32 = vmull_s16(d10s16, d30s16);
q0s32 = vmull_s16(d11s16, d30s16);
q13s32 = vmlal_s16(q13s32, d14s16, d30s16);
q1s32 = vmlal_s16(q1s32, d15s16, d30s16);
q11s32 = vmlsl_s16(q11s32, d14s16, d30s16);
q0s32 = vmlsl_s16(q0s32, d15s16, d30s16);
d20s16 = vrshrn_n_s32(q13s32, 14);
d21s16 = vrshrn_n_s32(q1s32, 14);
d12s16 = vrshrn_n_s32(q11s32, 14);
d13s16 = vrshrn_n_s32(q0s32, 14);
*q10s16 = vcombine_s16(d20s16, d21s16);
q6s16 = vcombine_s16(d12s16, d13s16);
q5s16 = vdupq_n_s16(0);
*q9s16 = vsubq_s16(q5s16, *q9s16);
*q11s16 = vsubq_s16(q5s16, q2s16);
*q13s16 = vsubq_s16(q5s16, q6s16);
*q15s16 = vsubq_s16(q5s16, q4s16);
}
void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
const int16x8_t cospis = vld1q_s16(kCospi);
const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24
const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28
int16x8_t a[8];
int i;
uint8_t *d1, *d2;
uint8x8_t d0u8, d1u8, d2u8, d3u8;
uint64x1_t d0u64, d1u64, d2u64, d3u64;
int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
uint16x8_t q8u16, q9u16, q10u16, q11u16;
a[0] = load_tran_low_to_s16q(input + 0 * 8);
a[1] = load_tran_low_to_s16q(input + 1 * 8);
a[2] = load_tran_low_to_s16q(input + 2 * 8);
a[3] = load_tran_low_to_s16q(input + 3 * 8);
a[4] = load_tran_low_to_s16q(input + 4 * 8);
a[5] = load_tran_low_to_s16q(input + 5 * 8);
a[6] = load_tran_low_to_s16q(input + 6 * 8);
a[7] = load_tran_low_to_s16q(input + 7 * 8);
q8s16 = vld1q_s16(input);
q9s16 = vld1q_s16(input + 8);
q10s16 = vld1q_s16(input + 8 * 2);
q11s16 = vld1q_s16(input + 8 * 3);
q12s16 = vld1q_s16(input + 8 * 4);
q13s16 = vld1q_s16(input + 8 * 5);
q14s16 = vld1q_s16(input + 8 * 6);
q15s16 = vld1q_s16(input + 8 * 7);
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
switch (tx_type) {
case DCT_DCT:
idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
break;
case 0: // idct_idct is not supported. Fall back to C
vp9_iht8x8_64_add_c(input, dest, stride, tx_type);
return;
case 1: // iadst_idct
// generate IDCT constants
// GENERATE_IDCT_CONSTANTS
case ADST_DCT:
idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
iadst8(a);
break;
// first transform rows
IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
case DCT_ADST:
iadst8(a);
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
break;
// transpose the matrix
transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
&q14s16, &q15s16);
default:
assert(tx_type == ADST_ADST);
iadst8(a);
transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
iadst8(a);
// generate IADST constants
// GENERATE_IADST_CONSTANTS
// then transform columns
IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
break;
case 2: // idct_iadst
// generate IADST constants
// GENERATE_IADST_CONSTANTS
// first transform rows
IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
// transpose the matrix
transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
&q14s16, &q15s16);
// generate IDCT constants
// GENERATE_IDCT_CONSTANTS
// then transform columns
IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
break;
case 3: // iadst_iadst
// generate IADST constants
// GENERATE_IADST_CONSTANTS
// first transform rows
IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
// transpose the matrix
transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
&q14s16, &q15s16);
// then transform columns
IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
break;
default: // iadst_idct
assert(0);
break;
}
idct8x8_add8x8_neon(a, dest, stride);
q8s16 = vrshrq_n_s16(q8s16, 5);
q9s16 = vrshrq_n_s16(q9s16, 5);
q10s16 = vrshrq_n_s16(q10s16, 5);
q11s16 = vrshrq_n_s16(q11s16, 5);
q12s16 = vrshrq_n_s16(q12s16, 5);
q13s16 = vrshrq_n_s16(q13s16, 5);
q14s16 = vrshrq_n_s16(q14s16, 5);
q15s16 = vrshrq_n_s16(q15s16, 5);
for (d1 = d2 = dest, i = 0; i < 2; i++) {
if (i != 0) {
q8s16 = q12s16;
q9s16 = q13s16;
q10s16 = q14s16;
q11s16 = q15s16;
}
d0u64 = vld1_u64((uint64_t *)d1);
d1 += stride;
d1u64 = vld1_u64((uint64_t *)d1);
d1 += stride;
d2u64 = vld1_u64((uint64_t *)d1);
d1 += stride;
d3u64 = vld1_u64((uint64_t *)d1);
d1 += stride;
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
q10u16 =
vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
q11u16 =
vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
d2 += stride;
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
d2 += stride;
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
d2 += stride;
vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
d2 += stride;
}
}

View File

@@ -1,60 +0,0 @@
/*
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
#define VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
#include <arm_neon.h>
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
#include "vpx_dsp/arm/idct_neon.h"
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/txfm_common.h"
static INLINE void iadst4(int16x8_t *const io) {
const int32x4_t c3 = vdupq_n_s32(sinpi_3_9);
int16x4_t x[4];
int32x4_t s[8], output[4];
const int16x4_t c =
create_s16x4_neon(sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9);
x[0] = vget_low_s16(io[0]);
x[1] = vget_low_s16(io[1]);
x[2] = vget_high_s16(io[0]);
x[3] = vget_high_s16(io[1]);
s[0] = vmull_lane_s16(x[0], c, 0);
s[1] = vmull_lane_s16(x[0], c, 1);
s[2] = vmull_lane_s16(x[1], c, 2);
s[3] = vmull_lane_s16(x[2], c, 3);
s[4] = vmull_lane_s16(x[2], c, 0);
s[5] = vmull_lane_s16(x[3], c, 1);
s[6] = vmull_lane_s16(x[3], c, 3);
s[7] = vaddl_s16(x[0], x[3]);
s[7] = vsubw_s16(s[7], x[2]);
s[0] = vaddq_s32(s[0], s[3]);
s[0] = vaddq_s32(s[0], s[5]);
s[1] = vsubq_s32(s[1], s[4]);
s[1] = vsubq_s32(s[1], s[6]);
s[3] = s[2];
s[2] = vmulq_s32(c3, s[7]);
output[0] = vaddq_s32(s[0], s[3]);
output[1] = vaddq_s32(s[1], s[3]);
output[2] = s[2];
output[3] = vaddq_s32(s[0], s[1]);
output[3] = vsubq_s32(output[3], s[3]);
dct_const_round_shift_low_8_dual(output, &io[0], &io[1]);
}
#endif // VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_

View File

@@ -42,7 +42,6 @@ const vpx_prob vp9_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254,
177, 153, 140, 133, 130, 129 };
#endif
/* clang-format off */
const uint8_t vp9_coefband_trans_8x8plus[1024] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
// beyond MAXBAND_INDEX+1 all values are filled as 5
@@ -86,7 +85,6 @@ const uint8_t vp9_coefband_trans_8x8plus[1024] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
};
/* clang-format on */
const uint8_t vp9_coefband_trans_4x4[16] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,

View File

@@ -137,6 +137,7 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
// 128 lists of probabilities are stored for the following ONE node probs:
// 1, 3, 5, 7, ..., 253, 255
// In between probabilities are interpolated linearly
#define COEFF_PROB_MODELS 255
#define UNCONSTRAINED_NODES 3

View File

@@ -186,19 +186,16 @@ const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] =
{ 93, 24, 99 }, // a split, l not split
{ 85, 119, 44 }, // l split, a not split
{ 62, 59, 67 }, // a/l both split
// 16x16 -> 8x8
{ 149, 53, 53 }, // a/l both not split
{ 94, 20, 48 }, // a split, l not split
{ 83, 53, 24 }, // l split, a not split
{ 52, 18, 18 }, // a/l both split
// 32x32 -> 16x16
{ 150, 40, 39 }, // a/l both not split
{ 78, 12, 26 }, // a split, l not split
{ 67, 33, 11 }, // l split, a not split
{ 24, 7, 5 }, // a/l both split
// 64x64 -> 32x32
{ 174, 35, 49 }, // a/l both not split
{ 68, 11, 27 }, // a split, l not split

View File

@@ -22,7 +22,9 @@ const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = {
18, -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10,
};
const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { -0, -1 };
const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = {
-0, -1,
};
const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1,
4, -2, -3 };

View File

@@ -1174,7 +1174,7 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
}
// Disable filtering on the leftmost column
border_mask = ~(mi_col == 0 ? 1 : 0);
border_mask = ~(mi_col == 0);
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
highbd_filter_selectively_vert(

View File

@@ -229,8 +229,9 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
else
pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME);
} else {
pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
edge_mi->ref_frame[1] == GOLDEN_FRAME);
pred_context = 1 +
2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
edge_mi->ref_frame[1] == GOLDEN_FRAME);
}
} else { // inter/inter
const int above_has_second = has_second_ref(above_mi);

View File

@@ -1,13 +1,3 @@
##
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
## tree. An additional intellectual property rights grant can be found
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
sub vp9_common_forward_decls() {
print <<EOF
/*
@@ -67,13 +57,13 @@ add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *outp
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
# Note that there are more specializations appended when
# CONFIG_VP9_HIGHBITDEPTH is off.
specialize qw/vp9_iht4x4_16_add neon sse2/;
specialize qw/vp9_iht4x4_16_add sse2/;
specialize qw/vp9_iht8x8_64_add sse2/;
specialize qw/vp9_iht16x16_256_add sse2/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
# Note that these specializations are appended to the above ones.
specialize qw/vp9_iht4x4_16_add dspr2 msa/;
specialize qw/vp9_iht8x8_64_add dspr2 msa/;
specialize qw/vp9_iht4x4_16_add neon dspr2 msa/;
specialize qw/vp9_iht8x8_64_add neon dspr2 msa/;
specialize qw/vp9_iht16x16_256_add dspr2 msa/;
}
}
@@ -101,12 +91,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd";
add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *output, int pitch, int tx_type, int bd";
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
specialize qw/vp9_highbd_iht4x4_16_add neon sse4_1/;
specialize qw/vp9_highbd_iht8x8_64_add sse4_1/;
specialize qw/vp9_highbd_iht16x16_256_add sse4_1/;
}
}
#
@@ -129,7 +113,7 @@ add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_
add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size";
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp neon sse2 avx2/, "$ssse3_x86_64";
specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64";
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp_32x32 neon/, "$ssse3_x86_64";

View File

@@ -1,419 +0,0 @@
/*
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h"
#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/transpose_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in,
const int c,
__m128i *const s) {
const __m128i pair_c = pair_set_epi32(4 * c, 0);
__m128i x[2];
extend_64bit(in, x);
s[0] = _mm_mul_epi32(pair_c, x[0]);
s[1] = _mm_mul_epi32(pair_c, x[1]);
}
static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0,
const __m128i in1,
const int c0, const int c1,
__m128i *const s0,
__m128i *const s1) {
const __m128i pair_c0 = pair_set_epi32(4 * c0, 0);
const __m128i pair_c1 = pair_set_epi32(4 * c1, 0);
__m128i t00[2], t01[2], t10[2], t11[2];
__m128i x0[2], x1[2];
extend_64bit(in0, x0);
extend_64bit(in1, x1);
t00[0] = _mm_mul_epi32(pair_c0, x0[0]);
t00[1] = _mm_mul_epi32(pair_c0, x0[1]);
t01[0] = _mm_mul_epi32(pair_c0, x1[0]);
t01[1] = _mm_mul_epi32(pair_c0, x1[1]);
t10[0] = _mm_mul_epi32(pair_c1, x0[0]);
t10[1] = _mm_mul_epi32(pair_c1, x0[1]);
t11[0] = _mm_mul_epi32(pair_c1, x1[0]);
t11[1] = _mm_mul_epi32(pair_c1, x1[1]);
s0[0] = _mm_add_epi64(t00[0], t11[0]);
s0[1] = _mm_add_epi64(t00[1], t11[1]);
s1[0] = _mm_sub_epi64(t10[0], t01[0]);
s1[1] = _mm_sub_epi64(t10[1], t01[1]);
}
static void highbd_iadst16_4col_sse4_1(__m128i *const io /*io[16]*/) {
__m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2], s8[2], s9[2],
s10[2], s11[2], s12[2], s13[2], s14[2], s15[2];
__m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2], x8[2], x9[2],
x10[2], x11[2], x12[2], x13[2], x14[2], x15[2];
// stage 1
highbd_iadst_butterfly_sse4_1(io[15], io[0], cospi_1_64, cospi_31_64, s0, s1);
highbd_iadst_butterfly_sse4_1(io[13], io[2], cospi_5_64, cospi_27_64, s2, s3);
highbd_iadst_butterfly_sse4_1(io[11], io[4], cospi_9_64, cospi_23_64, s4, s5);
highbd_iadst_butterfly_sse4_1(io[9], io[6], cospi_13_64, cospi_19_64, s6, s7);
highbd_iadst_butterfly_sse4_1(io[7], io[8], cospi_17_64, cospi_15_64, s8, s9);
highbd_iadst_butterfly_sse4_1(io[5], io[10], cospi_21_64, cospi_11_64, s10,
s11);
highbd_iadst_butterfly_sse4_1(io[3], io[12], cospi_25_64, cospi_7_64, s12,
s13);
highbd_iadst_butterfly_sse4_1(io[1], io[14], cospi_29_64, cospi_3_64, s14,
s15);
x0[0] = _mm_add_epi64(s0[0], s8[0]);
x0[1] = _mm_add_epi64(s0[1], s8[1]);
x1[0] = _mm_add_epi64(s1[0], s9[0]);
x1[1] = _mm_add_epi64(s1[1], s9[1]);
x2[0] = _mm_add_epi64(s2[0], s10[0]);
x2[1] = _mm_add_epi64(s2[1], s10[1]);
x3[0] = _mm_add_epi64(s3[0], s11[0]);
x3[1] = _mm_add_epi64(s3[1], s11[1]);
x4[0] = _mm_add_epi64(s4[0], s12[0]);
x4[1] = _mm_add_epi64(s4[1], s12[1]);
x5[0] = _mm_add_epi64(s5[0], s13[0]);
x5[1] = _mm_add_epi64(s5[1], s13[1]);
x6[0] = _mm_add_epi64(s6[0], s14[0]);
x6[1] = _mm_add_epi64(s6[1], s14[1]);
x7[0] = _mm_add_epi64(s7[0], s15[0]);
x7[1] = _mm_add_epi64(s7[1], s15[1]);
x8[0] = _mm_sub_epi64(s0[0], s8[0]);
x8[1] = _mm_sub_epi64(s0[1], s8[1]);
x9[0] = _mm_sub_epi64(s1[0], s9[0]);
x9[1] = _mm_sub_epi64(s1[1], s9[1]);
x10[0] = _mm_sub_epi64(s2[0], s10[0]);
x10[1] = _mm_sub_epi64(s2[1], s10[1]);
x11[0] = _mm_sub_epi64(s3[0], s11[0]);
x11[1] = _mm_sub_epi64(s3[1], s11[1]);
x12[0] = _mm_sub_epi64(s4[0], s12[0]);
x12[1] = _mm_sub_epi64(s4[1], s12[1]);
x13[0] = _mm_sub_epi64(s5[0], s13[0]);
x13[1] = _mm_sub_epi64(s5[1], s13[1]);
x14[0] = _mm_sub_epi64(s6[0], s14[0]);
x14[1] = _mm_sub_epi64(s6[1], s14[1]);
x15[0] = _mm_sub_epi64(s7[0], s15[0]);
x15[1] = _mm_sub_epi64(s7[1], s15[1]);
x0[0] = dct_const_round_shift_64bit(x0[0]);
x0[1] = dct_const_round_shift_64bit(x0[1]);
x1[0] = dct_const_round_shift_64bit(x1[0]);
x1[1] = dct_const_round_shift_64bit(x1[1]);
x2[0] = dct_const_round_shift_64bit(x2[0]);
x2[1] = dct_const_round_shift_64bit(x2[1]);
x3[0] = dct_const_round_shift_64bit(x3[0]);
x3[1] = dct_const_round_shift_64bit(x3[1]);
x4[0] = dct_const_round_shift_64bit(x4[0]);
x4[1] = dct_const_round_shift_64bit(x4[1]);
x5[0] = dct_const_round_shift_64bit(x5[0]);
x5[1] = dct_const_round_shift_64bit(x5[1]);
x6[0] = dct_const_round_shift_64bit(x6[0]);
x6[1] = dct_const_round_shift_64bit(x6[1]);
x7[0] = dct_const_round_shift_64bit(x7[0]);
x7[1] = dct_const_round_shift_64bit(x7[1]);
x8[0] = dct_const_round_shift_64bit(x8[0]);
x8[1] = dct_const_round_shift_64bit(x8[1]);
x9[0] = dct_const_round_shift_64bit(x9[0]);
x9[1] = dct_const_round_shift_64bit(x9[1]);
x10[0] = dct_const_round_shift_64bit(x10[0]);
x10[1] = dct_const_round_shift_64bit(x10[1]);
x11[0] = dct_const_round_shift_64bit(x11[0]);
x11[1] = dct_const_round_shift_64bit(x11[1]);
x12[0] = dct_const_round_shift_64bit(x12[0]);
x12[1] = dct_const_round_shift_64bit(x12[1]);
x13[0] = dct_const_round_shift_64bit(x13[0]);
x13[1] = dct_const_round_shift_64bit(x13[1]);
x14[0] = dct_const_round_shift_64bit(x14[0]);
x14[1] = dct_const_round_shift_64bit(x14[1]);
x15[0] = dct_const_round_shift_64bit(x15[0]);
x15[1] = dct_const_round_shift_64bit(x15[1]);
x0[0] = pack_4(x0[0], x0[1]);
x1[0] = pack_4(x1[0], x1[1]);
x2[0] = pack_4(x2[0], x2[1]);
x3[0] = pack_4(x3[0], x3[1]);
x4[0] = pack_4(x4[0], x4[1]);
x5[0] = pack_4(x5[0], x5[1]);
x6[0] = pack_4(x6[0], x6[1]);
x7[0] = pack_4(x7[0], x7[1]);
x8[0] = pack_4(x8[0], x8[1]);
x9[0] = pack_4(x9[0], x9[1]);
x10[0] = pack_4(x10[0], x10[1]);
x11[0] = pack_4(x11[0], x11[1]);
x12[0] = pack_4(x12[0], x12[1]);
x13[0] = pack_4(x13[0], x13[1]);
x14[0] = pack_4(x14[0], x14[1]);
x15[0] = pack_4(x15[0], x15[1]);
// stage 2
s0[0] = x0[0];
s1[0] = x1[0];
s2[0] = x2[0];
s3[0] = x3[0];
s4[0] = x4[0];
s5[0] = x5[0];
s6[0] = x6[0];
s7[0] = x7[0];
x0[0] = _mm_add_epi32(s0[0], s4[0]);
x1[0] = _mm_add_epi32(s1[0], s5[0]);
x2[0] = _mm_add_epi32(s2[0], s6[0]);
x3[0] = _mm_add_epi32(s3[0], s7[0]);
x4[0] = _mm_sub_epi32(s0[0], s4[0]);
x5[0] = _mm_sub_epi32(s1[0], s5[0]);
x6[0] = _mm_sub_epi32(s2[0], s6[0]);
x7[0] = _mm_sub_epi32(s3[0], s7[0]);
highbd_iadst_butterfly_sse4_1(x8[0], x9[0], cospi_4_64, cospi_28_64, s8, s9);
highbd_iadst_butterfly_sse4_1(x10[0], x11[0], cospi_20_64, cospi_12_64, s10,
s11);
highbd_iadst_butterfly_sse4_1(x13[0], x12[0], cospi_28_64, cospi_4_64, s13,
s12);
highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_12_64, cospi_20_64, s15,
s14);
x8[0] = _mm_add_epi64(s8[0], s12[0]);
x8[1] = _mm_add_epi64(s8[1], s12[1]);
x9[0] = _mm_add_epi64(s9[0], s13[0]);
x9[1] = _mm_add_epi64(s9[1], s13[1]);
x10[0] = _mm_add_epi64(s10[0], s14[0]);
x10[1] = _mm_add_epi64(s10[1], s14[1]);
x11[0] = _mm_add_epi64(s11[0], s15[0]);
x11[1] = _mm_add_epi64(s11[1], s15[1]);
x12[0] = _mm_sub_epi64(s8[0], s12[0]);
x12[1] = _mm_sub_epi64(s8[1], s12[1]);
x13[0] = _mm_sub_epi64(s9[0], s13[0]);
x13[1] = _mm_sub_epi64(s9[1], s13[1]);
x14[0] = _mm_sub_epi64(s10[0], s14[0]);
x14[1] = _mm_sub_epi64(s10[1], s14[1]);
x15[0] = _mm_sub_epi64(s11[0], s15[0]);
x15[1] = _mm_sub_epi64(s11[1], s15[1]);
x8[0] = dct_const_round_shift_64bit(x8[0]);
x8[1] = dct_const_round_shift_64bit(x8[1]);
x9[0] = dct_const_round_shift_64bit(x9[0]);
x9[1] = dct_const_round_shift_64bit(x9[1]);
x10[0] = dct_const_round_shift_64bit(x10[0]);
x10[1] = dct_const_round_shift_64bit(x10[1]);
x11[0] = dct_const_round_shift_64bit(x11[0]);
x11[1] = dct_const_round_shift_64bit(x11[1]);
x12[0] = dct_const_round_shift_64bit(x12[0]);
x12[1] = dct_const_round_shift_64bit(x12[1]);
x13[0] = dct_const_round_shift_64bit(x13[0]);
x13[1] = dct_const_round_shift_64bit(x13[1]);
x14[0] = dct_const_round_shift_64bit(x14[0]);
x14[1] = dct_const_round_shift_64bit(x14[1]);
x15[0] = dct_const_round_shift_64bit(x15[0]);
x15[1] = dct_const_round_shift_64bit(x15[1]);
x8[0] = pack_4(x8[0], x8[1]);
x9[0] = pack_4(x9[0], x9[1]);
x10[0] = pack_4(x10[0], x10[1]);
x11[0] = pack_4(x11[0], x11[1]);
x12[0] = pack_4(x12[0], x12[1]);
x13[0] = pack_4(x13[0], x13[1]);
x14[0] = pack_4(x14[0], x14[1]);
x15[0] = pack_4(x15[0], x15[1]);
// stage 3
s0[0] = x0[0];
s1[0] = x1[0];
s2[0] = x2[0];
s3[0] = x3[0];
highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5);
highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6);
s8[0] = x8[0];
s9[0] = x9[0];
s10[0] = x10[0];
s11[0] = x11[0];
highbd_iadst_butterfly_sse4_1(x12[0], x13[0], cospi_8_64, cospi_24_64, s12,
s13);
highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_24_64, cospi_8_64, s15,
s14);
x0[0] = _mm_add_epi32(s0[0], s2[0]);
x1[0] = _mm_add_epi32(s1[0], s3[0]);
x2[0] = _mm_sub_epi32(s0[0], s2[0]);
x3[0] = _mm_sub_epi32(s1[0], s3[0]);
x4[0] = _mm_add_epi64(s4[0], s6[0]);
x4[1] = _mm_add_epi64(s4[1], s6[1]);
x5[0] = _mm_add_epi64(s5[0], s7[0]);
x5[1] = _mm_add_epi64(s5[1], s7[1]);
x6[0] = _mm_sub_epi64(s4[0], s6[0]);
x6[1] = _mm_sub_epi64(s4[1], s6[1]);
x7[0] = _mm_sub_epi64(s5[0], s7[0]);
x7[1] = _mm_sub_epi64(s5[1], s7[1]);
x4[0] = dct_const_round_shift_64bit(x4[0]);
x4[1] = dct_const_round_shift_64bit(x4[1]);
x5[0] = dct_const_round_shift_64bit(x5[0]);
x5[1] = dct_const_round_shift_64bit(x5[1]);
x6[0] = dct_const_round_shift_64bit(x6[0]);
x6[1] = dct_const_round_shift_64bit(x6[1]);
x7[0] = dct_const_round_shift_64bit(x7[0]);
x7[1] = dct_const_round_shift_64bit(x7[1]);
x4[0] = pack_4(x4[0], x4[1]);
x5[0] = pack_4(x5[0], x5[1]);
x6[0] = pack_4(x6[0], x6[1]);
x7[0] = pack_4(x7[0], x7[1]);
x8[0] = _mm_add_epi32(s8[0], s10[0]);
x9[0] = _mm_add_epi32(s9[0], s11[0]);
x10[0] = _mm_sub_epi32(s8[0], s10[0]);
x11[0] = _mm_sub_epi32(s9[0], s11[0]);
x12[0] = _mm_add_epi64(s12[0], s14[0]);
x12[1] = _mm_add_epi64(s12[1], s14[1]);
x13[0] = _mm_add_epi64(s13[0], s15[0]);
x13[1] = _mm_add_epi64(s13[1], s15[1]);
x14[0] = _mm_sub_epi64(s12[0], s14[0]);
x14[1] = _mm_sub_epi64(s12[1], s14[1]);
x15[0] = _mm_sub_epi64(s13[0], s15[0]);
x15[1] = _mm_sub_epi64(s13[1], s15[1]);
x12[0] = dct_const_round_shift_64bit(x12[0]);
x12[1] = dct_const_round_shift_64bit(x12[1]);
x13[0] = dct_const_round_shift_64bit(x13[0]);
x13[1] = dct_const_round_shift_64bit(x13[1]);
x14[0] = dct_const_round_shift_64bit(x14[0]);
x14[1] = dct_const_round_shift_64bit(x14[1]);
x15[0] = dct_const_round_shift_64bit(x15[0]);
x15[1] = dct_const_round_shift_64bit(x15[1]);
x12[0] = pack_4(x12[0], x12[1]);
x13[0] = pack_4(x13[0], x13[1]);
x14[0] = pack_4(x14[0], x14[1]);
x15[0] = pack_4(x15[0], x15[1]);
// stage 4
s2[0] = _mm_add_epi32(x2[0], x3[0]);
s3[0] = _mm_sub_epi32(x2[0], x3[0]);
s6[0] = _mm_add_epi32(x7[0], x6[0]);
s7[0] = _mm_sub_epi32(x7[0], x6[0]);
s10[0] = _mm_add_epi32(x11[0], x10[0]);
s11[0] = _mm_sub_epi32(x11[0], x10[0]);
s14[0] = _mm_add_epi32(x14[0], x15[0]);
s15[0] = _mm_sub_epi32(x14[0], x15[0]);
highbd_iadst_half_butterfly_sse4_1(s2[0], -cospi_16_64, s2);
highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3);
highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6);
highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7);
highbd_iadst_half_butterfly_sse4_1(s10[0], cospi_16_64, s10);
highbd_iadst_half_butterfly_sse4_1(s11[0], cospi_16_64, s11);
highbd_iadst_half_butterfly_sse4_1(s14[0], -cospi_16_64, s14);
highbd_iadst_half_butterfly_sse4_1(s15[0], cospi_16_64, s15);
x2[0] = dct_const_round_shift_64bit(s2[0]);
x2[1] = dct_const_round_shift_64bit(s2[1]);
x3[0] = dct_const_round_shift_64bit(s3[0]);
x3[1] = dct_const_round_shift_64bit(s3[1]);
x6[0] = dct_const_round_shift_64bit(s6[0]);
x6[1] = dct_const_round_shift_64bit(s6[1]);
x7[0] = dct_const_round_shift_64bit(s7[0]);
x7[1] = dct_const_round_shift_64bit(s7[1]);
x10[0] = dct_const_round_shift_64bit(s10[0]);
x10[1] = dct_const_round_shift_64bit(s10[1]);
x11[0] = dct_const_round_shift_64bit(s11[0]);
x11[1] = dct_const_round_shift_64bit(s11[1]);
x14[0] = dct_const_round_shift_64bit(s14[0]);
x14[1] = dct_const_round_shift_64bit(s14[1]);
x15[0] = dct_const_round_shift_64bit(s15[0]);
x15[1] = dct_const_round_shift_64bit(s15[1]);
x2[0] = pack_4(x2[0], x2[1]);
x3[0] = pack_4(x3[0], x3[1]);
x6[0] = pack_4(x6[0], x6[1]);
x7[0] = pack_4(x7[0], x7[1]);
x10[0] = pack_4(x10[0], x10[1]);
x11[0] = pack_4(x11[0], x11[1]);
x14[0] = pack_4(x14[0], x14[1]);
x15[0] = pack_4(x15[0], x15[1]);
io[0] = x0[0];
io[1] = _mm_sub_epi32(_mm_setzero_si128(), x8[0]);
io[2] = x12[0];
io[3] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]);
io[4] = x6[0];
io[5] = x14[0];
io[6] = x10[0];
io[7] = x2[0];
io[8] = x3[0];
io[9] = x11[0];
io[10] = x15[0];
io[11] = x7[0];
io[12] = x5[0];
io[13] = _mm_sub_epi32(_mm_setzero_si128(), x13[0]);
io[14] = x9[0];
io[15] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]);
}
void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t *input, uint16_t *dest,
int stride, int tx_type, int bd) {
int i;
__m128i out[16], *in;
if (bd == 8) {
__m128i l[16], r[16];
in = l;
for (i = 0; i < 2; i++) {
highbd_load_pack_transpose_32bit_8x8(&input[0], 16, &in[0]);
highbd_load_pack_transpose_32bit_8x8(&input[8], 16, &in[8]);
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
idct16_8col(in, in);
} else {
vpx_iadst16_8col_sse2(in);
}
in = r;
input += 128;
}
for (i = 0; i < 16; i += 8) {
int j;
transpose_16bit_8x8(l + i, out);
transpose_16bit_8x8(r + i, out + 8);
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
idct16_8col(out, out);
} else {
vpx_iadst16_8col_sse2(out);
}
for (j = 0; j < 16; ++j) {
highbd_write_buffer_8(dest + j * stride, out[j], bd);
}
dest += 8;
}
} else {
__m128i all[4][16];
for (i = 0; i < 4; i++) {
in = all[i];
highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]);
highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]);
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
vpx_highbd_idct16_4col_sse4_1(in);
} else {
highbd_iadst16_4col_sse4_1(in);
}
input += 4 * 16;
}
for (i = 0; i < 16; i += 4) {
int j;
transpose_32bit_4x4(all[0] + i, out + 0);
transpose_32bit_4x4(all[1] + i, out + 4);
transpose_32bit_4x4(all[2] + i, out + 8);
transpose_32bit_4x4(all[3] + i, out + 12);
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
vpx_highbd_idct16_4col_sse4_1(out);
} else {
highbd_iadst16_4col_sse4_1(out);
}
for (j = 0; j < 16; ++j) {
highbd_write_buffer_4(dest + j * stride, out[j], bd);
}
dest += 4;
}
}
}

View File

@@ -1,131 +0,0 @@
/*
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h"
#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/transpose_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
static INLINE void highbd_iadst4_sse4_1(__m128i *const io) {
const __m128i pair_c1 = pair_set_epi32(4 * sinpi_1_9, 0);
const __m128i pair_c2 = pair_set_epi32(4 * sinpi_2_9, 0);
const __m128i pair_c3 = pair_set_epi32(4 * sinpi_3_9, 0);
const __m128i pair_c4 = pair_set_epi32(4 * sinpi_4_9, 0);
__m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], t0[2], t1[2], t2[2];
__m128i temp[2];
transpose_32bit_4x4(io, io);
extend_64bit(io[0], temp);
s0[0] = _mm_mul_epi32(pair_c1, temp[0]);
s0[1] = _mm_mul_epi32(pair_c1, temp[1]);
s1[0] = _mm_mul_epi32(pair_c2, temp[0]);
s1[1] = _mm_mul_epi32(pair_c2, temp[1]);
extend_64bit(io[1], temp);
s2[0] = _mm_mul_epi32(pair_c3, temp[0]);
s2[1] = _mm_mul_epi32(pair_c3, temp[1]);
extend_64bit(io[2], temp);
s3[0] = _mm_mul_epi32(pair_c4, temp[0]);
s3[1] = _mm_mul_epi32(pair_c4, temp[1]);
s4[0] = _mm_mul_epi32(pair_c1, temp[0]);
s4[1] = _mm_mul_epi32(pair_c1, temp[1]);
extend_64bit(io[3], temp);
s5[0] = _mm_mul_epi32(pair_c2, temp[0]);
s5[1] = _mm_mul_epi32(pair_c2, temp[1]);
s6[0] = _mm_mul_epi32(pair_c4, temp[0]);
s6[1] = _mm_mul_epi32(pair_c4, temp[1]);
t0[0] = _mm_add_epi64(s0[0], s3[0]);
t0[1] = _mm_add_epi64(s0[1], s3[1]);
t0[0] = _mm_add_epi64(t0[0], s5[0]);
t0[1] = _mm_add_epi64(t0[1], s5[1]);
t1[0] = _mm_sub_epi64(s1[0], s4[0]);
t1[1] = _mm_sub_epi64(s1[1], s4[1]);
t1[0] = _mm_sub_epi64(t1[0], s6[0]);
t1[1] = _mm_sub_epi64(t1[1], s6[1]);
temp[0] = _mm_sub_epi32(io[0], io[2]);
temp[0] = _mm_add_epi32(temp[0], io[3]);
extend_64bit(temp[0], temp);
t2[0] = _mm_mul_epi32(pair_c3, temp[0]);
t2[1] = _mm_mul_epi32(pair_c3, temp[1]);
s0[0] = _mm_add_epi64(t0[0], s2[0]);
s0[1] = _mm_add_epi64(t0[1], s2[1]);
s1[0] = _mm_add_epi64(t1[0], s2[0]);
s1[1] = _mm_add_epi64(t1[1], s2[1]);
s3[0] = _mm_add_epi64(t0[0], t1[0]);
s3[1] = _mm_add_epi64(t0[1], t1[1]);
s3[0] = _mm_sub_epi64(s3[0], s2[0]);
s3[1] = _mm_sub_epi64(s3[1], s2[1]);
s0[0] = dct_const_round_shift_64bit(s0[0]);
s0[1] = dct_const_round_shift_64bit(s0[1]);
s1[0] = dct_const_round_shift_64bit(s1[0]);
s1[1] = dct_const_round_shift_64bit(s1[1]);
s2[0] = dct_const_round_shift_64bit(t2[0]);
s2[1] = dct_const_round_shift_64bit(t2[1]);
s3[0] = dct_const_round_shift_64bit(s3[0]);
s3[1] = dct_const_round_shift_64bit(s3[1]);
io[0] = pack_4(s0[0], s0[1]);
io[1] = pack_4(s1[0], s1[1]);
io[2] = pack_4(s2[0], s2[1]);
io[3] = pack_4(s3[0], s3[1]);
}
void vp9_highbd_iht4x4_16_add_sse4_1(const tran_low_t *input, uint16_t *dest,
int stride, int tx_type, int bd) {
__m128i io[4];
io[0] = _mm_load_si128((const __m128i *)(input + 0));
io[1] = _mm_load_si128((const __m128i *)(input + 4));
io[2] = _mm_load_si128((const __m128i *)(input + 8));
io[3] = _mm_load_si128((const __m128i *)(input + 12));
if (bd == 8) {
__m128i io_short[2];
io_short[0] = _mm_packs_epi32(io[0], io[1]);
io_short[1] = _mm_packs_epi32(io[2], io[3]);
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
idct4_sse2(io_short);
} else {
iadst4_sse2(io_short);
}
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
idct4_sse2(io_short);
} else {
iadst4_sse2(io_short);
}
io_short[0] = _mm_add_epi16(io_short[0], _mm_set1_epi16(8));
io_short[1] = _mm_add_epi16(io_short[1], _mm_set1_epi16(8));
io[0] = _mm_srai_epi16(io_short[0], 4);
io[1] = _mm_srai_epi16(io_short[1], 4);
} else {
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
highbd_idct4_sse4_1(io);
} else {
highbd_iadst4_sse4_1(io);
}
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
highbd_idct4_sse4_1(io);
} else {
highbd_iadst4_sse4_1(io);
}
io[0] = wraplow_16bit_shift4(io[0], io[1], _mm_set1_epi32(8));
io[1] = wraplow_16bit_shift4(io[2], io[3], _mm_set1_epi32(8));
}
recon_and_store_4x4(io, dest, stride, bd);
}

View File

@@ -1,255 +0,0 @@
/*
* Copyright (c) 2018 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h"
#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/transpose_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in,
const int c,
__m128i *const s) {
const __m128i pair_c = pair_set_epi32(4 * c, 0);
__m128i x[2];
extend_64bit(in, x);
s[0] = _mm_mul_epi32(pair_c, x[0]);
s[1] = _mm_mul_epi32(pair_c, x[1]);
}
static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0,
const __m128i in1,
const int c0, const int c1,
__m128i *const s0,
__m128i *const s1) {
const __m128i pair_c0 = pair_set_epi32(4 * c0, 0);
const __m128i pair_c1 = pair_set_epi32(4 * c1, 0);
__m128i t00[2], t01[2], t10[2], t11[2];
__m128i x0[2], x1[2];
extend_64bit(in0, x0);
extend_64bit(in1, x1);
t00[0] = _mm_mul_epi32(pair_c0, x0[0]);
t00[1] = _mm_mul_epi32(pair_c0, x0[1]);
t01[0] = _mm_mul_epi32(pair_c0, x1[0]);
t01[1] = _mm_mul_epi32(pair_c0, x1[1]);
t10[0] = _mm_mul_epi32(pair_c1, x0[0]);
t10[1] = _mm_mul_epi32(pair_c1, x0[1]);
t11[0] = _mm_mul_epi32(pair_c1, x1[0]);
t11[1] = _mm_mul_epi32(pair_c1, x1[1]);
s0[0] = _mm_add_epi64(t00[0], t11[0]);
s0[1] = _mm_add_epi64(t00[1], t11[1]);
s1[0] = _mm_sub_epi64(t10[0], t01[0]);
s1[1] = _mm_sub_epi64(t10[1], t01[1]);
}
static void highbd_iadst8_sse4_1(__m128i *const io) {
__m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
__m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2];
transpose_32bit_4x4x2(io, io);
// stage 1
highbd_iadst_butterfly_sse4_1(io[7], io[0], cospi_2_64, cospi_30_64, s0, s1);
highbd_iadst_butterfly_sse4_1(io[3], io[4], cospi_18_64, cospi_14_64, s4, s5);
x0[0] = _mm_add_epi64(s0[0], s4[0]);
x0[1] = _mm_add_epi64(s0[1], s4[1]);
x1[0] = _mm_add_epi64(s1[0], s5[0]);
x1[1] = _mm_add_epi64(s1[1], s5[1]);
x4[0] = _mm_sub_epi64(s0[0], s4[0]);
x4[1] = _mm_sub_epi64(s0[1], s4[1]);
x5[0] = _mm_sub_epi64(s1[0], s5[0]);
x5[1] = _mm_sub_epi64(s1[1], s5[1]);
highbd_iadst_butterfly_sse4_1(io[5], io[2], cospi_10_64, cospi_22_64, s2, s3);
highbd_iadst_butterfly_sse4_1(io[1], io[6], cospi_26_64, cospi_6_64, s6, s7);
x2[0] = _mm_add_epi64(s2[0], s6[0]);
x2[1] = _mm_add_epi64(s2[1], s6[1]);
x3[0] = _mm_add_epi64(s3[0], s7[0]);
x3[1] = _mm_add_epi64(s3[1], s7[1]);
x6[0] = _mm_sub_epi64(s2[0], s6[0]);
x6[1] = _mm_sub_epi64(s2[1], s6[1]);
x7[0] = _mm_sub_epi64(s3[0], s7[0]);
x7[1] = _mm_sub_epi64(s3[1], s7[1]);
x0[0] = dct_const_round_shift_64bit(x0[0]);
x0[1] = dct_const_round_shift_64bit(x0[1]);
x1[0] = dct_const_round_shift_64bit(x1[0]);
x1[1] = dct_const_round_shift_64bit(x1[1]);
x2[0] = dct_const_round_shift_64bit(x2[0]);
x2[1] = dct_const_round_shift_64bit(x2[1]);
x3[0] = dct_const_round_shift_64bit(x3[0]);
x3[1] = dct_const_round_shift_64bit(x3[1]);
x4[0] = dct_const_round_shift_64bit(x4[0]);
x4[1] = dct_const_round_shift_64bit(x4[1]);
x5[0] = dct_const_round_shift_64bit(x5[0]);
x5[1] = dct_const_round_shift_64bit(x5[1]);
x6[0] = dct_const_round_shift_64bit(x6[0]);
x6[1] = dct_const_round_shift_64bit(x6[1]);
x7[0] = dct_const_round_shift_64bit(x7[0]);
x7[1] = dct_const_round_shift_64bit(x7[1]);
s0[0] = pack_4(x0[0], x0[1]); // s0 = x0;
s1[0] = pack_4(x1[0], x1[1]); // s1 = x1;
s2[0] = pack_4(x2[0], x2[1]); // s2 = x2;
s3[0] = pack_4(x3[0], x3[1]); // s3 = x3;
x4[0] = pack_4(x4[0], x4[1]);
x5[0] = pack_4(x5[0], x5[1]);
x6[0] = pack_4(x6[0], x6[1]);
x7[0] = pack_4(x7[0], x7[1]);
// stage 2
x0[0] = _mm_add_epi32(s0[0], s2[0]);
x1[0] = _mm_add_epi32(s1[0], s3[0]);
x2[0] = _mm_sub_epi32(s0[0], s2[0]);
x3[0] = _mm_sub_epi32(s1[0], s3[0]);
highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5);
highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6);
x4[0] = _mm_add_epi64(s4[0], s6[0]);
x4[1] = _mm_add_epi64(s4[1], s6[1]);
x5[0] = _mm_add_epi64(s5[0], s7[0]);
x5[1] = _mm_add_epi64(s5[1], s7[1]);
x6[0] = _mm_sub_epi64(s4[0], s6[0]);
x6[1] = _mm_sub_epi64(s4[1], s6[1]);
x7[0] = _mm_sub_epi64(s5[0], s7[0]);
x7[1] = _mm_sub_epi64(s5[1], s7[1]);
x4[0] = dct_const_round_shift_64bit(x4[0]);
x4[1] = dct_const_round_shift_64bit(x4[1]);
x5[0] = dct_const_round_shift_64bit(x5[0]);
x5[1] = dct_const_round_shift_64bit(x5[1]);
x6[0] = dct_const_round_shift_64bit(x6[0]);
x6[1] = dct_const_round_shift_64bit(x6[1]);
x7[0] = dct_const_round_shift_64bit(x7[0]);
x7[1] = dct_const_round_shift_64bit(x7[1]);
x4[0] = pack_4(x4[0], x4[1]);
x5[0] = pack_4(x5[0], x5[1]);
x6[0] = pack_4(x6[0], x6[1]);
x7[0] = pack_4(x7[0], x7[1]);
// stage 3
s2[0] = _mm_add_epi32(x2[0], x3[0]);
s3[0] = _mm_sub_epi32(x2[0], x3[0]);
s6[0] = _mm_add_epi32(x6[0], x7[0]);
s7[0] = _mm_sub_epi32(x6[0], x7[0]);
highbd_iadst_half_butterfly_sse4_1(s2[0], cospi_16_64, s2);
highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3);
highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6);
highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7);
x2[0] = dct_const_round_shift_64bit(s2[0]);
x2[1] = dct_const_round_shift_64bit(s2[1]);
x3[0] = dct_const_round_shift_64bit(s3[0]);
x3[1] = dct_const_round_shift_64bit(s3[1]);
x6[0] = dct_const_round_shift_64bit(s6[0]);
x6[1] = dct_const_round_shift_64bit(s6[1]);
x7[0] = dct_const_round_shift_64bit(s7[0]);
x7[1] = dct_const_round_shift_64bit(s7[1]);
x2[0] = pack_4(x2[0], x2[1]);
x3[0] = pack_4(x3[0], x3[1]);
x6[0] = pack_4(x6[0], x6[1]);
x7[0] = pack_4(x7[0], x7[1]);
io[0] = x0[0];
io[1] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]);
io[2] = x6[0];
io[3] = _mm_sub_epi32(_mm_setzero_si128(), x2[0]);
io[4] = x3[0];
io[5] = _mm_sub_epi32(_mm_setzero_si128(), x7[0]);
io[6] = x5[0];
io[7] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]);
}
void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest,
int stride, int tx_type, int bd) {
__m128i io[16];
io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0));
io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4));
io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0));
io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4));
io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0));
io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4));
io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0));
io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4));
io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0));
io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4));
io[9] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 0));
io[13] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 4));
io[10] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 0));
io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4));
io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0));
io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4));
if (bd == 8) {
__m128i io_short[8];
io_short[0] = _mm_packs_epi32(io[0], io[4]);
io_short[1] = _mm_packs_epi32(io[1], io[5]);
io_short[2] = _mm_packs_epi32(io[2], io[6]);
io_short[3] = _mm_packs_epi32(io[3], io[7]);
io_short[4] = _mm_packs_epi32(io[8], io[12]);
io_short[5] = _mm_packs_epi32(io[9], io[13]);
io_short[6] = _mm_packs_epi32(io[10], io[14]);
io_short[7] = _mm_packs_epi32(io[11], io[15]);
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
vpx_idct8_sse2(io_short);
} else {
iadst8_sse2(io_short);
}
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
vpx_idct8_sse2(io_short);
} else {
iadst8_sse2(io_short);
}
round_shift_8x8(io_short, io);
} else {
__m128i temp[4];
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
vpx_highbd_idct8x8_half1d_sse4_1(io);
vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
} else {
highbd_iadst8_sse4_1(io);
highbd_iadst8_sse4_1(&io[8]);
}
temp[0] = io[4];
temp[1] = io[5];
temp[2] = io[6];
temp[3] = io[7];
io[4] = io[8];
io[5] = io[9];
io[6] = io[10];
io[7] = io[11];
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
vpx_highbd_idct8x8_half1d_sse4_1(io);
io[8] = temp[0];
io[9] = temp[1];
io[10] = temp[2];
io[11] = temp[3];
vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
} else {
highbd_iadst8_sse4_1(io);
io[8] = temp[0];
io[9] = temp[1];
io[10] = temp[2];
io[11] = temp[3];
highbd_iadst8_sse4_1(&io[8]);
}
highbd_idct8x8_final_round(io);
}
recon_and_store_8x8(io, dest, stride, bd);
}

View File

@@ -10,6 +10,8 @@
#include "./vp9_rtcd.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
@@ -20,23 +22,23 @@ void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in[1] = load_input_data8(input + 8);
switch (tx_type) {
case DCT_DCT:
case 0: // DCT_DCT
idct4_sse2(in);
idct4_sse2(in);
break;
case ADST_DCT:
case 1: // ADST_DCT
idct4_sse2(in);
iadst4_sse2(in);
break;
case DCT_ADST:
case 2: // DCT_ADST
iadst4_sse2(in);
idct4_sse2(in);
break;
default:
assert(tx_type == ADST_ADST);
case 3: // ADST_ADST
iadst4_sse2(in);
iadst4_sse2(in);
break;
default: assert(0); break;
}
// Final round and shift
@@ -65,23 +67,23 @@ void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in[7] = load_input_data8(input + 8 * 7);
switch (tx_type) {
case DCT_DCT:
vpx_idct8_sse2(in);
vpx_idct8_sse2(in);
case 0: // DCT_DCT
idct8_sse2(in);
idct8_sse2(in);
break;
case ADST_DCT:
vpx_idct8_sse2(in);
case 1: // ADST_DCT
idct8_sse2(in);
iadst8_sse2(in);
break;
case DCT_ADST:
case 2: // DCT_ADST
iadst8_sse2(in);
vpx_idct8_sse2(in);
idct8_sse2(in);
break;
default:
assert(tx_type == ADST_ADST);
case 3: // ADST_ADST
iadst8_sse2(in);
iadst8_sse2(in);
break;
default: assert(0); break;
}
// Final rounding and shift
@@ -199,23 +201,23 @@ void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
load_buffer_8x16(input, in1);
switch (tx_type) {
case DCT_DCT:
case 0: // DCT_DCT
idct16_sse2(in0, in1);
idct16_sse2(in0, in1);
break;
case ADST_DCT:
case 1: // ADST_DCT
idct16_sse2(in0, in1);
iadst16_sse2(in0, in1);
break;
case DCT_ADST:
case 2: // DCT_ADST
iadst16_sse2(in0, in1);
idct16_sse2(in0, in1);
break;
default:
assert(tx_type == ADST_ADST);
case 3: // ADST_ADST
iadst16_sse2(in0, in1);
iadst16_sse2(in0, in1);
break;
default: assert(0); break;
}
write_buffer_8x16(dest, in0, stride);

View File

@@ -464,6 +464,10 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
cr->rate_ratio_qdelta = VPXMAX(cr->rate_ratio_qdelta, 2.5);
}
}
if (cpi->svc.spatial_layer_id > 0) {
cr->motion_thresh = 4;
cr->rate_boost_fac = 12;
}
if (cpi->oxcf.rc_mode == VPX_VBR) {
// To be adjusted for VBR mode, e.g., based on gf period and boost.
// For now use smaller qp-delta (than CBR), no second boosted seg, and

View File

@@ -12,10 +12,7 @@
#include "vp9/encoder/vp9_encoder.h"
static const BLOCK_SIZE square[] = {
BLOCK_8X8,
BLOCK_16X16,
BLOCK_32X32,
BLOCK_64X64,
BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64,
};
static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,

View File

@@ -189,12 +189,11 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx,
int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv,
int num_spatial_layers, int width, int lst_fb_idx, int gld_fb_idx,
int use_svc, int spatial_layer) {
int use_svc) {
const int sse_diff = (ctx->newmv_sse == UINT_MAX)
? 0
: ((int)ctx->zeromv_sse - (int)ctx->newmv_sse);
int frame;
int denoise_layer_idx = 0;
MACROBLOCKD *filter_mbd = &mb->e_mbd;
MODE_INFO *mi = filter_mbd->mi[0];
MODE_INFO saved_mi;
@@ -255,10 +254,6 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
frame = lst_fb_idx + 1;
else if (frame == GOLDEN_FRAME)
frame = gld_fb_idx + 1;
// Shift for the second spatial layer.
if (num_spatial_layers - spatial_layer == 2)
frame = frame + denoiser->num_ref_frames;
denoise_layer_idx = num_spatial_layers - spatial_layer - 1;
}
if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) {
@@ -294,21 +289,18 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col);
filter_mbd->plane[2].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;
filter_mbd->plane[0].dst.buf = block_start(
denoiser->mc_running_avg_y[denoise_layer_idx].y_buffer,
denoiser->mc_running_avg_y[denoise_layer_idx].y_stride, mi_row, mi_col);
filter_mbd->plane[0].dst.stride =
denoiser->mc_running_avg_y[denoise_layer_idx].y_stride;
filter_mbd->plane[1].dst.buf = block_start(
denoiser->mc_running_avg_y[denoise_layer_idx].u_buffer,
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col);
filter_mbd->plane[1].dst.stride =
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride;
filter_mbd->plane[2].dst.buf = block_start(
denoiser->mc_running_avg_y[denoise_layer_idx].v_buffer,
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col);
filter_mbd->plane[2].dst.stride =
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride;
filter_mbd->plane[0].dst.buf =
block_start(denoiser->mc_running_avg_y.y_buffer,
denoiser->mc_running_avg_y.y_stride, mi_row, mi_col);
filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride;
filter_mbd->plane[1].dst.buf =
block_start(denoiser->mc_running_avg_y.u_buffer,
denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride;
filter_mbd->plane[2].dst.buf =
block_start(denoiser->mc_running_avg_y.v_buffer,
denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride;
set_ref_ptrs(cm, filter_mbd, saved_frame, NONE);
vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs);
@@ -332,17 +324,9 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
int zeromv_filter = 0;
VP9_DENOISER *denoiser = &cpi->denoiser;
VP9_DENOISER_DECISION decision = COPY_BLOCK;
const int shift =
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2
? denoiser->num_ref_frames
: 0;
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME + shift];
const int denoise_layer_index =
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id - 1;
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y[denoise_layer_index];
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
uint8_t *mc_avg_start =
block_start(mc_avg.y_buffer, mc_avg.y_stride, mi_row, mi_col);
struct buf_2d src = mb->plane[0].src;
@@ -397,7 +381,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
&cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,
motion_magnitude, is_skin, &zeromv_filter, consec_zeromv,
cpi->svc.number_spatial_layers, cpi->Source->y_width, cpi->lst_fb_idx,
cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id);
cpi->gld_fb_idx, cpi->use_svc);
if (decision == FILTER_BLOCK) {
decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start,
@@ -448,8 +432,7 @@ void vp9_denoiser_update_frame_info(
VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,
int svc_base_is_key, int second_spatial_layer) {
const int shift = second_spatial_layer ? denoiser->num_ref_frames : 0;
int svc_base_is_key) {
// Copy source into denoised reference buffers on KEY_FRAME or
// if the just encoded frame was resized. For SVC, copy source if the base
// spatial layer was key frame.
@@ -458,8 +441,8 @@ void vp9_denoiser_update_frame_info(
int i;
// Start at 1 so as not to overwrite the INTRA_FRAME
for (i = 1; i < denoiser->num_ref_frames; ++i) {
if (denoiser->running_avg_y[i + shift].buffer_alloc != NULL)
copy_frame(&denoiser->running_avg_y[i + shift], &src);
if (denoiser->running_avg_y[i].buffer_alloc != NULL)
copy_frame(&denoiser->running_avg_y[i], &src);
}
denoiser->reset = 0;
return;
@@ -468,29 +451,29 @@ void vp9_denoiser_update_frame_info(
// If more than one refresh occurs, must copy frame buffer.
if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > 1) {
if (refresh_alt_ref_frame) {
copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1 + shift],
&denoiser->running_avg_y[INTRA_FRAME + shift]);
copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME]);
}
if (refresh_golden_frame) {
copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1 + shift],
&denoiser->running_avg_y[INTRA_FRAME + shift]);
copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME]);
}
if (refresh_last_frame) {
copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1 + shift],
&denoiser->running_avg_y[INTRA_FRAME + shift]);
copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME]);
}
} else {
if (refresh_alt_ref_frame) {
swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1 + shift],
&denoiser->running_avg_y[INTRA_FRAME + shift]);
swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME]);
}
if (refresh_golden_frame) {
swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1 + shift],
&denoiser->running_avg_y[INTRA_FRAME + shift]);
swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME]);
}
if (refresh_last_frame) {
swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1 + shift],
&denoiser->running_avg_y[INTRA_FRAME + shift]);
swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME]);
}
}
}
@@ -539,90 +522,44 @@ static int vp9_denoiser_realloc_svc_helper(VP9_COMMON *cm,
}
int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser,
int svc_buf_shift, int refresh_alt,
int refresh_gld, int refresh_lst, int alt_fb_idx,
int gld_fb_idx, int lst_fb_idx) {
int refresh_alt, int refresh_gld, int refresh_lst,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx) {
int fail = 0;
if (refresh_alt) {
// Increase the frame buffer index by 1 to map it to the buffer index in the
// denoiser.
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
alt_fb_idx + 1 + svc_buf_shift);
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, alt_fb_idx + 1);
if (fail) return 1;
}
if (refresh_gld) {
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
gld_fb_idx + 1 + svc_buf_shift);
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, gld_fb_idx + 1);
if (fail) return 1;
}
if (refresh_lst) {
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
lst_fb_idx + 1 + svc_buf_shift);
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, lst_fb_idx + 1);
if (fail) return 1;
}
return 0;
}
int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
int use_svc, int noise_sen, int width, int height,
int ssx, int ssy,
int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser,
int width, int height, int ssx, int ssy,
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth,
#endif
int border) {
int i, layer, fail, init_num_ref_frames;
int i, fail, init_num_ref_frames;
const int legacy_byte_alignment = 0;
int num_layers = 1;
int scaled_width = width;
int scaled_height = height;
if (use_svc) {
LAYER_CONTEXT *lc = &svc->layer_context[svc->spatial_layer_id *
svc->number_temporal_layers +
svc->temporal_layer_id];
get_layer_resolution(width, height, lc->scaling_factor_num,
lc->scaling_factor_den, &scaled_width, &scaled_height);
// For SVC: only denoise at most 2 spatial (highest) layers.
if (noise_sen >= 2)
// Denoise from one spatial layer below the top.
svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 2, 0);
else
// Only denoise the top spatial layer.
svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 1, 0);
num_layers = svc->number_spatial_layers - svc->first_layer_denoise;
}
assert(denoiser != NULL);
denoiser->num_ref_frames = use_svc ? SVC_REF_FRAMES : NONSVC_REF_FRAMES;
init_num_ref_frames = use_svc ? MAX_REF_FRAMES : NONSVC_REF_FRAMES;
denoiser->num_layers = num_layers;
CHECK_MEM_ERROR(cm, denoiser->running_avg_y,
vpx_calloc(denoiser->num_ref_frames * num_layers,
sizeof(denoiser->running_avg_y[0])));
CHECK_MEM_ERROR(
cm, denoiser->mc_running_avg_y,
vpx_calloc(num_layers, sizeof(denoiser->mc_running_avg_y[0])));
for (layer = 0; layer < num_layers; ++layer) {
const int denoise_width = (layer == 0) ? width : scaled_width;
const int denoise_height = (layer == 0) ? height : scaled_height;
for (i = 0; i < init_num_ref_frames; ++i) {
fail = vpx_alloc_frame_buffer(
&denoiser->running_avg_y[i + denoiser->num_ref_frames * layer],
denoise_width, denoise_height, ssx, ssy,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
#endif
border, legacy_byte_alignment);
if (fail) {
vp9_denoiser_free(denoiser);
return 1;
}
#ifdef OUTPUT_YUV_DENOISED
make_grayscale(&denoiser->running_avg_y[i]);
#endif
}
fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y[layer],
denoise_width, denoise_height, ssx, ssy,
cm, denoiser->running_avg_y,
vpx_calloc(denoiser->num_ref_frames, sizeof(denoiser->running_avg_y[0])));
for (i = 0; i < init_num_ref_frames; ++i) {
fail = vpx_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height,
ssx, ssy,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
#endif
@@ -631,10 +568,22 @@ int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
vp9_denoiser_free(denoiser);
return 1;
}
#ifdef OUTPUT_YUV_DENOISED
make_grayscale(&denoiser->running_avg_y[i]);
#endif
}
fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height, ssx,
ssy,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
#endif
border, legacy_byte_alignment);
if (fail) {
vp9_denoiser_free(denoiser);
return 1;
}
// denoiser->last_source only used for noise_estimation, so only for top
// layer.
fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height, ssx, ssy,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
@@ -660,18 +609,12 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser) {
return;
}
denoiser->frame_buffer_initialized = 0;
for (i = 0; i < denoiser->num_ref_frames * denoiser->num_layers; ++i) {
for (i = 0; i < denoiser->num_ref_frames; ++i) {
vpx_free_frame_buffer(&denoiser->running_avg_y[i]);
}
vpx_free(denoiser->running_avg_y);
denoiser->running_avg_y = NULL;
for (i = 0; i < denoiser->num_layers; ++i) {
vpx_free_frame_buffer(&denoiser->mc_running_avg_y[i]);
}
vpx_free(denoiser->mc_running_avg_y);
denoiser->mc_running_avg_y = NULL;
vpx_free_frame_buffer(&denoiser->mc_running_avg_y);
vpx_free_frame_buffer(&denoiser->last_source);
}

View File

@@ -44,12 +44,11 @@ typedef enum vp9_denoiser_level {
typedef struct vp9_denoiser {
YV12_BUFFER_CONFIG *running_avg_y;
YV12_BUFFER_CONFIG *mc_running_avg_y;
YV12_BUFFER_CONFIG mc_running_avg_y;
YV12_BUFFER_CONFIG last_source;
int frame_buffer_initialized;
int reset;
int num_ref_frames;
int num_layers;
VP9_DENOISER_LEVEL denoising_level;
VP9_DENOISER_LEVEL prev_denoising_level;
} VP9_DENOISER;
@@ -67,13 +66,12 @@ typedef struct {
} VP9_PICKMODE_CTX_DEN;
struct VP9_COMP;
struct SVC;
void vp9_denoiser_update_frame_info(
VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,
int svc_base_is_key, int second_spatial_layer);
int svc_base_is_key);
void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,
@@ -86,13 +84,11 @@ void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse,
PICK_MODE_CONTEXT *ctx);
int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser,
int svc_buf_shift, int refresh_alt,
int refresh_gld, int refresh_lst, int alt_fb_idx,
int gld_fb_idx, int lst_fb_idx);
int refresh_alt, int refresh_gld, int refresh_lst,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx);
int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
int use_svc, int noise_sen, int width, int height,
int ssx, int ssy,
int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser,
int width, int height, int ssx, int ssy,
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth,
#endif

View File

@@ -1513,9 +1513,9 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
}
}
if (is_key_frame ||
(low_res && vt.split[i].split[j].part_variances.none.variance >
threshold_4x4avg)) {
if (is_key_frame || (low_res &&
vt.split[i].split[j].part_variances.none.variance >
threshold_4x4avg)) {
force_split[split_index] = 0;
// Go down to 4x4 down-sampling for variance.
variance4x4downsample[i2 + j] = 1;
@@ -3403,10 +3403,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
// Rate and distortion based partition search termination clause.
if (!cpi->sf.ml_partition_search_early_termination &&
!x->e_mbd.lossless &&
((best_rdc.dist < (dist_breakout_thr >> 2)) ||
(best_rdc.dist < dist_breakout_thr &&
best_rdc.rate < rate_breakout_thr))) {
!x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
(best_rdc.dist < dist_breakout_thr &&
best_rdc.rate < rate_breakout_thr))) {
do_rect = 0;
}
}
@@ -4621,9 +4620,8 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
CHECK_MEM_ERROR(
cm, cpi->tile_data,
vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
CHECK_MEM_ERROR(cm, cpi->tile_data, vpx_malloc(tile_cols * tile_rows *
sizeof(*cpi->tile_data)));
cpi->allocated_tiles = tile_cols * tile_rows;
for (tile_row = 0; tile_row < tile_rows; ++tile_row)

View File

@@ -50,8 +50,7 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
}
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
{ 10, 6 },
{ 8, 5 },
{ 10, 6 }, { 8, 5 },
};
// 'num' can be negative, but 'shift' must be non-negative.
@@ -201,9 +200,9 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
const int band_next = band_translate[i + 1];
const int token_next =
(i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
[ENTROPY_TOKENS] =
token_costs + band_next;
unsigned int(
*const token_costs_next)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
token_costs + band_next;
token_cache[rc] = vp9_pt_energy_class[t0];
ctx_next = get_coef_context(nb, token_cache, i + 1);
token_tree_sel_next = (x == 0);

View File

@@ -65,12 +65,12 @@
#define AM_SEGMENT_ID_INACTIVE 7
#define AM_SEGMENT_ID_ACTIVE 0
// Whether to use high precision mv for altref computation.
#define ALTREF_HIGH_PRECISION_MV 1
// Q threshold for high precision mv. Choose a very high value for now so that
// HIGH_PRECISION is always chosen.
#define HIGH_PRECISION_MV_QTHRESH 200
#define ALTREF_HIGH_PRECISION_MV 1 // Whether to use high precision mv
// for altref computation.
#define HIGH_PRECISION_MV_QTHRESH 200 // Q threshold for high precision
// mv. Choose a very high value for
// now so that HIGH_PRECISION is always
// chosen.
#define FRAME_SIZE_FACTOR 128 // empirical params for context model threshold
#define FRAME_RATE_FACTOR 8
@@ -437,37 +437,34 @@ static int is_psnr_calc_enabled(VP9_COMP *cpi) {
/* clang-format off */
const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
// sample rate size breadth bitrate cpb
{ LEVEL_1, 829440, 36864, 512, 200, 400, 2, 1, 4, 8 },
{ LEVEL_1_1, 2764800, 73728, 768, 800, 1000, 2, 1, 4, 8 },
{ LEVEL_2, 4608000, 122880, 960, 1800, 1500, 2, 1, 4, 8 },
{ LEVEL_2_1, 9216000, 245760, 1344, 3600, 2800, 2, 2, 4, 8 },
{ LEVEL_3, 20736000, 552960, 2048, 7200, 6000, 2, 4, 4, 8 },
{ LEVEL_3_1, 36864000, 983040, 2752, 12000, 10000, 2, 4, 4, 8 },
{ LEVEL_4, 83558400, 2228224, 4160, 18000, 16000, 4, 4, 4, 8 },
{ LEVEL_4_1, 160432128, 2228224, 4160, 30000, 18000, 4, 4, 5, 6 },
{ LEVEL_5, 311951360, 8912896, 8384, 60000, 36000, 6, 8, 6, 4 },
{ LEVEL_5_1, 588251136, 8912896, 8384, 120000, 46000, 8, 8, 10, 4 },
{ LEVEL_1, 829440, 36864, 200, 400, 2, 1, 4, 8 },
{ LEVEL_1_1, 2764800, 73728, 800, 1000, 2, 1, 4, 8 },
{ LEVEL_2, 4608000, 122880, 1800, 1500, 2, 1, 4, 8 },
{ LEVEL_2_1, 9216000, 245760, 3600, 2800, 2, 2, 4, 8 },
{ LEVEL_3, 20736000, 552960, 7200, 6000, 2, 4, 4, 8 },
{ LEVEL_3_1, 36864000, 983040, 12000, 10000, 2, 4, 4, 8 },
{ LEVEL_4, 83558400, 2228224, 18000, 16000, 4, 4, 4, 8 },
{ LEVEL_4_1, 160432128, 2228224, 30000, 18000, 4, 4, 5, 6 },
{ LEVEL_5, 311951360, 8912896, 60000, 36000, 6, 8, 6, 4 },
{ LEVEL_5_1, 588251136, 8912896, 120000, 46000, 8, 8, 10, 4 },
// TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
// they are finalized (currently tentative).
{ LEVEL_5_2, 1176502272, 8912896, 8384, 180000, 90000, 8, 8, 10, 4 },
{ LEVEL_6, 1176502272, 35651584, 16832, 180000, 90000, 8, 16, 10, 4 },
{ LEVEL_6_1, 2353004544u, 35651584, 16832, 240000, 180000, 8, 16, 10, 4 },
{ LEVEL_6_2, 4706009088u, 35651584, 16832, 480000, 360000, 8, 16, 10, 4 },
{ LEVEL_5_2, 1176502272, 8912896, 180000, 90000, 8, 8, 10, 4 },
{ LEVEL_6, 1176502272, 35651584, 180000, 90000, 8, 16, 10, 4 },
{ LEVEL_6_1, 2353004544u, 35651584, 240000, 180000, 8, 16, 10, 4 },
{ LEVEL_6_2, 4706009088u, 35651584, 480000, 360000, 8, 16, 10, 4 },
};
/* clang-format on */
static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] = {
"The average bit-rate is too high.",
"The picture size is too large.",
"The picture width/height is too large.",
"The luma sample rate is too large.",
"The CPB size is too large.",
"The compression ratio is too small",
"Too many column tiles are used.",
"The alt-ref distance is too small.",
"Too many reference buffers are used."
};
static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] =
{ "The average bit-rate is too high.",
"The picture size is too large.",
"The luma sample rate is too large.",
"The CPB size is too large.",
"The compression ratio is too small",
"Too many column tiles are used.",
"The alt-ref distance is too small.",
"Too many reference buffers are used." };
static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
switch (mode) {
@@ -547,74 +544,6 @@ static void apply_active_map(VP9_COMP *cpi) {
}
}
static void apply_roi_map(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
struct segmentation *const seg = &cm->seg;
vpx_roi_map_t *roi = &cpi->roi;
const int *delta_q = roi->delta_q;
const int *delta_lf = roi->delta_lf;
const int *skip = roi->skip;
int ref_frame[8];
int internal_delta_q[MAX_SEGMENTS];
int i;
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
// TODO(jianj): Investigate why ROI not working in speed < 5 or in non
// realtime mode.
if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return;
if (!roi->enabled) return;
memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame));
vp9_enable_segmentation(seg);
vp9_clearall_segfeatures(seg);
// Select delta coding method;
seg->abs_delta = SEGMENT_DELTADATA;
memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols));
for (i = 0; i < MAX_SEGMENTS; ++i) {
// Translate the external delta q values to internal values.
internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i]));
if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i];
vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
if (internal_delta_q[i] != 0) {
vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]);
}
if (delta_lf[i] != 0) {
vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]);
}
if (skip[i] != 0) {
vp9_enable_segfeature(seg, i, SEG_LVL_SKIP);
vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]);
}
if (ref_frame[i] >= 0) {
int valid_ref = 1;
// ALTREF is not used as reference for nonrd_pickmode with 0 lag.
if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode)
valid_ref = 0;
// If GOLDEN is selected, make sure it's set as reference.
if (ref_frame[i] == GOLDEN_FRAME &&
!(cpi->ref_frame_flags & flag_list[ref_frame[i]])) {
valid_ref = 0;
}
// GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are
// same reference.
if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0)
ref_frame[i] = LAST_FRAME;
if (valid_ref) {
vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME);
vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]);
}
}
}
roi->enabled = 1;
}
static void init_level_info(Vp9LevelInfo *level_info) {
Vp9LevelStats *const level_stats = &level_info->level_stats;
Vp9LevelSpec *const level_spec = &level_info->level_spec;
@@ -625,13 +554,6 @@ static void init_level_info(Vp9LevelInfo *level_info) {
level_spec->min_altref_distance = INT_MAX;
}
static int check_seg_range(int seg_data[8], int range) {
return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
abs(seg_data[6]) > range || abs(seg_data[7]) > range);
}
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
int i;
const Vp9LevelSpec *this_level;
@@ -644,8 +566,6 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
(double)this_level->max_luma_sample_rate *
(1 + SAMPLE_RATE_GRACE_P) ||
level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
level_spec->max_luma_picture_breadth >
this_level->max_luma_picture_breadth ||
level_spec->average_bitrate > this_level->average_bitrate ||
level_spec->max_cpb_size > this_level->max_cpb_size ||
level_spec->compression_ratio < this_level->compression_ratio ||
@@ -658,61 +578,6 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
}
int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
unsigned int cols, int delta_q[8], int delta_lf[8],
int skip[8], int ref_frame[8]) {
VP9_COMMON *cm = &cpi->common;
vpx_roi_map_t *roi = &cpi->roi;
const int range = 63;
const int ref_frame_range = 3; // Alt-ref
const int skip_range = 1;
const int frame_rows = cpi->common.mi_rows;
const int frame_cols = cpi->common.mi_cols;
// Check number of rows and columns match
if (frame_rows != (int)rows || frame_cols != (int)cols) {
return -1;
}
if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
!check_seg_range(ref_frame, ref_frame_range) ||
!check_seg_range(skip, skip_range))
return -1;
// Also disable segmentation if no deltas are specified.
if (!map ||
(!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] |
delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] |
delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] |
delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] |
skip[5] | skip[6] | skip[7]) &&
(ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 &&
ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 &&
ref_frame[6] == -1 && ref_frame[7] == -1))) {
vp9_disable_segmentation(&cm->seg);
cpi->roi.enabled = 0;
return 0;
}
if (roi->roi_map) {
vpx_free(roi->roi_map);
roi->roi_map = NULL;
}
CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
// Copy to ROI sturcture in the compressor.
memcpy(roi->roi_map, map, rows * cols);
memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0]));
memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0]));
memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0]));
memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0]));
roi->enabled = 1;
roi->rows = rows;
roi->cols = cols;
return 0;
}
int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
int cols) {
if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
@@ -947,9 +812,6 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vpx_free(cpi->active_map.map);
cpi->active_map.map = NULL;
vpx_free(cpi->roi.roi_map);
cpi->roi.roi_map = NULL;
vpx_free(cpi->consec_zero_mv);
cpi->consec_zero_mv = NULL;
@@ -1254,9 +1116,8 @@ static void alloc_util_frame_buffers(VP9_COMP *cpi) {
// For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
// buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
// target of 1/4x1/4. number_spatial_layers must be greater than 2.
if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc &&
cpi->svc.number_spatial_layers > 2) {
// target of 1/4x1/4.
if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc) {
cpi->svc.scaled_temp_is_alloc = 1;
if (vpx_realloc_frame_buffer(
&cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1,
@@ -1358,8 +1219,8 @@ static void set_tile_limits(VP9_COMP *cpi) {
}
if (cpi->oxcf.target_level == LEVEL_AUTO) {
const int level_tile_cols =
log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
const uint32_t pic_size = cpi->common.width * cpi->common.height;
const int level_tile_cols = log_tile_cols_from_picsize_level(pic_size);
if (cm->log2_tile_cols > level_tile_cols) {
cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
}
@@ -1987,8 +1848,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_reset_resize(cpi);
rc->rc_1_frame = 0;
rc->rc_2_frame = 0;
}
if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
@@ -1999,24 +1858,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
(int)cpi->oxcf.target_bandwidth);
}
// Check for resetting the rc flags (rc_1_frame, rc_2_frame) if the
// configuration change has a large change in avg_frame_bandwidth.
// For SVC check for resetting based on spatial layer average bandwidth.
// Also reset buffer level to optimal level.
if (cm->current_video_frame > 0) {
if (cpi->use_svc) {
vp9_svc_check_reset_layer_rc_flag(cpi);
} else {
if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
rc->rc_1_frame = 0;
rc->rc_2_frame = 0;
rc->bits_off_target = rc->optimal_buffer_level;
rc->buffer_level = rc->optimal_buffer_level;
}
}
}
cpi->alt_ref_source = NULL;
rc->is_src_frame_alt_ref = 0;
@@ -2151,9 +1992,8 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
realloc_segmentation_maps(cpi);
CHECK_MEM_ERROR(
cm, cpi->skin_map,
vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
CHECK_MEM_ERROR(cm, cpi->skin_map, vpx_calloc(cm->mi_rows * cm->mi_cols,
sizeof(cpi->skin_map[0])));
CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
@@ -3016,26 +2856,18 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
cpi->denoiser.denoising_level > kDenLowLow) {
int svc_base_is_key = 0;
int denoise_svc_second_layer = 0;
if (cpi->use_svc) {
int realloc_fail = 0;
const int svc_buf_shift =
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2
? cpi->denoiser.num_ref_frames
: 0;
int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
cpi->svc.temporal_layer_id,
cpi->svc.number_temporal_layers);
LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
svc_base_is_key = lc->is_key_frame;
denoise_svc_second_layer =
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 ? 1
: 0;
// Check if we need to allocate extra buffers in the denoiser
// for
// Check if we need to allocate extra buffers in the denoiser for
// refreshed frames.
realloc_fail = vp9_denoiser_realloc_svc(
cm, &cpi->denoiser, svc_buf_shift, cpi->refresh_alt_ref_frame,
cm, &cpi->denoiser, cpi->refresh_alt_ref_frame,
cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx,
cpi->gld_fb_idx, cpi->lst_fb_idx);
if (realloc_fail)
@@ -3046,8 +2878,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
&cpi->denoiser, *cpi->Source, cpi->common.frame_type,
cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame,
cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx,
cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key,
denoise_svc_second_layer);
cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key);
}
#endif
if (is_one_pass_cbr_svc(cpi)) {
@@ -3482,9 +3313,8 @@ static void setup_denoiser_buffer(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
if (cpi->oxcf.noise_sensitivity > 0 &&
!cpi->denoiser.frame_buffer_initialized) {
if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
cpi->oxcf.noise_sensitivity, cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
if (vp9_denoiser_alloc(cm, cpi->use_svc, &cpi->denoiser, cm->width,
cm->height, cm->subsampling_x, cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth,
#endif
@@ -3765,8 +3595,6 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// it may be pretty bad for rate-control,
// and I should handle it somehow
vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
} else if (cpi->roi.enabled && cm->frame_type != KEY_FRAME) {
apply_roi_map(cpi);
}
apply_active_map(cpi);
@@ -4497,15 +4325,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
struct segmentation *const seg = &cm->seg;
TX_SIZE t;
// SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
!cpi->svc.rc_drop_superframe && cpi->oxcf.target_bandwidth == 0) {
cpi->svc.skip_enhancement_layer = 1;
vp9_rc_postencode_update_drop_frame(cpi);
cpi->ext_refresh_frame_flags_pending = 0;
return;
}
set_ext_overrides(cpi);
vpx_clear_system_state();
@@ -4597,6 +4416,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
if (vp9_rc_drop_frame(cpi) ||
(is_one_pass_cbr_svc(cpi) && cpi->svc.rc_drop_superframe == 1)) {
vp9_rc_postencode_update_drop_frame(cpi);
++cm->current_video_frame;
cpi->ext_refresh_frame_flags_pending = 0;
cpi->svc.rc_drop_superframe = 1;
cpi->last_frame_dropped = 1;
@@ -5009,7 +4829,6 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
int i, idx;
uint64_t luma_samples, dur_end;
const uint32_t luma_pic_size = cm->width * cm->height;
const uint32_t luma_pic_breadth = VPXMAX(cm->width, cm->height);
LevelConstraint *const level_constraint = &cpi->level_constraint;
const int8_t level_index = level_constraint->level_index;
double cpb_data_size;
@@ -5113,11 +4932,6 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
level_spec->max_luma_picture_size = luma_pic_size;
}
// update max_luma_picture_breadth
if (luma_pic_breadth > level_spec->max_luma_picture_breadth) {
level_spec->max_luma_picture_breadth = luma_pic_breadth;
}
// update compression_ratio
level_spec->compression_ratio = (double)level_stats->total_uncompressed_size *
cm->bit_depth /
@@ -5138,15 +4952,6 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]);
}
if (level_spec->max_luma_picture_breadth >
vp9_level_defs[level_index].max_luma_picture_breadth) {
level_constraint->fail_flag |= (1 << LUMA_PIC_BREADTH_TOO_LARGE);
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Failed to encode to the target level %d. %s",
vp9_level_defs[level_index].level,
level_fail_messages[LUMA_PIC_BREADTH_TOO_LARGE]);
}
if ((double)level_spec->max_luma_sample_rate >
(double)vp9_level_defs[level_index].max_luma_sample_rate *
(1 + SAMPLE_RATE_GRACE_P)) {
@@ -5347,6 +5152,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cm->intra_only = 0;
// if the flags indicate intra frame, but if the current picture is for
// non-zero spatial layer, it should not be an intra picture.
// TODO(Won Kap): this needs to change if per-layer intra frame is
// allowed.
if ((source->flags & VPX_EFLAG_FORCE_KF) &&
cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
@@ -5479,6 +5286,21 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
#endif // CONFIG_REALTIME_ONLY
#if 1
{
VP9_COMMON *const cm = &cpi->common;
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
printf("Frame=%d, gf_group_update_type[gf_group_index=%d]=%d, "
"show_frame=%d\n",
cm->current_video_frame, gf_group->index,
gf_group->update_type[gf_group->index],
cm->show_frame);
}
#endif // 0
if (cm->refresh_frame_context)
cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
@@ -5512,6 +5334,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
if (oxcf->pass != 1) {
double samples = 0.0;
cpi->bytes += (int)(*size);
#if 1
{
printf("Frame %d: rate: %d\n",
cm->current_video_frame, (int)(*size));
}
#endif // 0
if (cm->show_frame) {
uint32_t bit_depth = 8;
@@ -5541,6 +5370,19 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cpi->total_sq_error += psnr.sse[0];
cpi->total_samples += psnr.samples[0];
samples = psnr.samples[0];
#if 1
{
const int rddiv = cpi->rd.RDDIV;
const int rdmult = cpi->rd.RDMULT;
const int64_t rdcost = RDCOST(
rdmult, rddiv, (int)(*size) * 8, psnr.sse[0]);
printf("Frame %d: distortion: %" PRIu64 " rdcost: %" PRId64 "\n",
cm->current_video_frame, psnr.sse[0], rdcost);
printf("%d %d\n", rddiv, rdmult);
}
#endif // 0
{
PSNR_STATS psnr2;

View File

@@ -383,7 +383,6 @@ typedef struct {
VP9_LEVEL level;
uint64_t max_luma_sample_rate;
uint32_t max_luma_picture_size;
uint32_t max_luma_picture_breadth;
double average_bitrate; // in kilobits per second
double max_cpb_size; // in kilobits
double compression_ratio;
@@ -423,15 +422,14 @@ typedef struct {
typedef enum {
BITRATE_TOO_LARGE = 0,
LUMA_PIC_SIZE_TOO_LARGE,
LUMA_PIC_BREADTH_TOO_LARGE,
LUMA_SAMPLE_RATE_TOO_LARGE,
CPB_TOO_LARGE,
COMPRESSION_RATIO_TOO_SMALL,
TOO_MANY_COLUMN_TILE,
ALTREF_DIST_TOO_SMALL,
TOO_MANY_REF_BUFFER,
TARGET_LEVEL_FAIL_IDS
LUMA_PIC_SIZE_TOO_LARGE = 1,
LUMA_SAMPLE_RATE_TOO_LARGE = 2,
CPB_TOO_LARGE = 3,
COMPRESSION_RATIO_TOO_SMALL = 4,
TOO_MANY_COLUMN_TILE = 5,
ALTREF_DIST_TOO_SMALL = 6,
TOO_MANY_REF_BUFFER = 7,
TARGET_LEVEL_FAIL_IDS = 8
} TARGET_LEVEL_FAIL_ID;
typedef struct {
@@ -723,8 +721,6 @@ typedef struct VP9_COMP {
uint8_t *count_arf_frame_usage;
uint8_t *count_lastgolden_frame_usage;
vpx_roi_map_t roi;
} VP9_COMP;
void vp9_initialize_enc(void);
@@ -870,8 +866,9 @@ static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) {
#if CONFIG_VP9_TEMPORAL_DENOISING
static INLINE int denoise_svc(const struct VP9_COMP *const cpi) {
return (!cpi->use_svc || (cpi->use_svc && cpi->svc.spatial_layer_id >=
cpi->svc.first_layer_denoise));
return (!cpi->use_svc ||
(cpi->use_svc &&
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1));
}
#endif
@@ -923,14 +920,10 @@ static INLINE int get_level_index(VP9_LEVEL level) {
// Return the log2 value of max column tiles corresponding to the level that
// the picture size fits into.
static INLINE int log_tile_cols_from_picsize_level(uint32_t width,
uint32_t height) {
static INLINE int log_tile_cols_from_picsize_level(uint32_t pic_size) {
int i;
const uint32_t pic_size = width * height;
const uint32_t pic_breadth = VPXMAX(width, height);
for (i = LEVEL_1; i < LEVEL_MAX; ++i) {
if (vp9_level_defs[i].max_luma_picture_size >= pic_size &&
vp9_level_defs[i].max_luma_picture_breadth >= pic_breadth) {
if (vp9_level_defs[i].max_luma_picture_size > pic_size) {
return get_msb(vp9_level_defs[i].max_col_tiles);
}
}
@@ -939,10 +932,6 @@ static INLINE int log_tile_cols_from_picsize_level(uint32_t width,
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);
int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
unsigned int cols, int delta_q[8], int delta_lf[8],
int skip[8], int ref_frame[8]);
void vp9_new_framerate(VP9_COMP *cpi, double framerate);
void vp9_set_row_mt(VP9_COMP *cpi);

View File

@@ -66,8 +66,8 @@ static int get_max_tile_cols(VP9_COMP *cpi) {
log2_tile_cols =
clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
if (cpi->oxcf.target_level == LEVEL_AUTO) {
const int level_tile_cols =
log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
const uint32_t pic_size = cpi->common.width * cpi->common.height;
const int level_tile_cols = log_tile_cols_from_picsize_level(pic_size);
if (log2_tile_cols > level_tile_cols) {
log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
}
@@ -390,9 +390,8 @@ void vp9_row_mt_sync_write_dummy(VP9RowMTSync *const row_mt_sync, int r, int c,
}
#if !CONFIG_REALTIME_ONLY
static int first_pass_worker_hook(void *arg1, void *arg2) {
EncWorkerData *const thread_data = (EncWorkerData *)arg1;
MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
static int first_pass_worker_hook(EncWorkerData *const thread_data,
MultiThreadHandle *multi_thread_ctxt) {
VP9_COMP *const cpi = thread_data->cpi;
const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -471,8 +470,8 @@ void vp9_encode_fp_row_mt(VP9_COMP *cpi) {
}
}
launch_enc_workers(cpi, first_pass_worker_hook, multi_thread_ctxt,
num_workers);
launch_enc_workers(cpi, (VPxWorkerHook)first_pass_worker_hook,
multi_thread_ctxt, num_workers);
first_tile_col = &cpi->tile_data[0];
for (i = 1; i < tile_cols; i++) {
@@ -481,9 +480,8 @@ void vp9_encode_fp_row_mt(VP9_COMP *cpi) {
}
}
static int temporal_filter_worker_hook(void *arg1, void *arg2) {
EncWorkerData *const thread_data = (EncWorkerData *)arg1;
MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
static int temporal_filter_worker_hook(EncWorkerData *const thread_data,
MultiThreadHandle *multi_thread_ctxt) {
VP9_COMP *const cpi = thread_data->cpi;
const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -555,14 +553,13 @@ void vp9_temporal_filter_row_mt(VP9_COMP *cpi) {
}
}
launch_enc_workers(cpi, temporal_filter_worker_hook, multi_thread_ctxt,
num_workers);
launch_enc_workers(cpi, (VPxWorkerHook)temporal_filter_worker_hook,
multi_thread_ctxt, num_workers);
}
#endif // !CONFIG_REALTIME_ONLY
static int enc_row_mt_worker_hook(void *arg1, void *arg2) {
EncWorkerData *const thread_data = (EncWorkerData *)arg1;
MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,
MultiThreadHandle *multi_thread_ctxt) {
VP9_COMP *const cpi = thread_data->cpi;
const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -651,8 +648,8 @@ void vp9_encode_tiles_row_mt(VP9_COMP *cpi) {
}
}
launch_enc_workers(cpi, enc_row_mt_worker_hook, multi_thread_ctxt,
num_workers);
launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook,
multi_thread_ctxt, num_workers);
for (i = 0; i < num_workers; i++) {
VPxWorker *const worker = &cpi->workers[i];

View File

@@ -44,6 +44,7 @@
#define COMPLEXITY_STATS_OUTPUT 0
#define FIRST_PASS_Q 10.0
#define GF_MAX_BOOST 96.0
#define INTRA_MODE_PENALTY 1024
#define MIN_ARF_GF_BOOST 240
#define MIN_DECAY_FACTOR 0.01
@@ -731,8 +732,9 @@ static void first_pass_stat_calc(VP9_COMP *cpi, FIRSTPASS_STATS *fps,
// Exclude any image dead zone
if (fp_acc_data->image_data_start_row > 0) {
fp_acc_data->intra_skip_count =
VPXMAX(0, fp_acc_data->intra_skip_count -
(fp_acc_data->image_data_start_row * cm->mb_cols * 2));
VPXMAX(0,
fp_acc_data->intra_skip_count -
(fp_acc_data->image_data_start_row * cm->mb_cols * 2));
}
fp_acc_data->intra_factor = fp_acc_data->intra_factor / (double)num_mbs;
@@ -1947,7 +1949,6 @@ static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats,
}
#define BASELINE_ERR_PER_MB 12500.0
#define GF_MAX_BOOST 96.0
static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame,
double this_frame_mv_in_out) {
double frame_boost;
@@ -2237,6 +2238,9 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
}
gf_group->arf_update_idx[0] = arf_buffer_indices[0];
gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
// Step over the golden frame / overlay frame
if (EOF == input_stats(twopass, &frame_stats)) return;
}
// Deduct the boost bits for arf (or gf if it is not a key frame)
@@ -2281,8 +2285,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
// Define middle frame
mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
normal_frames =
rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending);
normal_frames = (rc->baseline_gf_interval - rc->source_alt_ref_pending);
if (normal_frames > 1)
normal_frame_bits = (int)(total_group_bits / normal_frames);
else
@@ -2380,8 +2383,6 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise,
// Analyse and define a gf/arf group.
#define ARF_DECAY_BREAKOUT 0.10
#define ARF_ABS_ZOOM_THRESH 4.0
static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
@@ -2410,6 +2411,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double mv_in_out_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
double mv_ratio_accumulator_thresh;
double mv_in_out_thresh;
double abs_mv_in_out_thresh;
double sr_accumulator = 0.0;
const double av_err = get_distribution_av_err(cpi, twopass);
@@ -2455,7 +2457,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Motion breakout threshold for loop below depends on image size.
mv_ratio_accumulator_thresh =
(cpi->initial_height + cpi->initial_width) / 4.0;
abs_mv_in_out_thresh = ARF_ABS_ZOOM_THRESH;
mv_in_out_thresh = (cpi->initial_height + cpi->initial_width) / 300.0;
abs_mv_in_out_thresh = (cpi->initial_height + cpi->initial_width) / 200.0;
// Set a maximum and minimum interval for the GF group.
// If the image appears almost completely static we can extend beyond this.
@@ -2540,17 +2543,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Update the accumulator for second ref error difference.
// This is intended to give an indication of how much the coded error is
// increasing over time.
if (i == 1) {
sr_accumulator += next_frame.coded_error;
} else {
sr_accumulator += (next_frame.sr_coded_error - next_frame.coded_error);
}
sr_accumulator += (next_frame.sr_coded_error - next_frame.coded_error);
sr_accumulator = VPXMAX(0.0, sr_accumulator);
}
// Break out conditions.
// Break at maximum of active_max_gf_interval unless almost totally static.
if (((twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) &&
(i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) ||
if (
// Break at active_max_gf_interval unless almost totally static.
((i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) ||
(
// Don't break out with a very short interval.
(i >= active_min_gf_interval) &&
@@ -2559,6 +2559,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
(!flash_detected) &&
((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
(abs_mv_in_out_accumulator > abs_mv_in_out_thresh) ||
(mv_in_out_accumulator < -mv_in_out_thresh) ||
(sr_accumulator > next_frame.intra_error)))) {
break;
}
@@ -2570,8 +2571,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
// Should we use the alternate reference frame.
if ((twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) && allow_alt_ref &&
(i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) {
if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) &&
(i >= rc->min_gf_interval)) {
const int forward_frames = (rc->frames_to_key - i >= i - 1)
? i - 1
: VPXMAX(0, rc->frames_to_key - i);
@@ -2599,10 +2600,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#endif
// Set the interval until the next gf.
rc->baseline_gf_interval =
(twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH)
? (i - (is_key_frame || rc->source_alt_ref_pending))
: i;
// rc->baseline_gf_interval = 8;
rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
// Only encode alt reference frame in temporal base layer. So
// baseline_gf_interval should be multiple of a temporal layer group
@@ -2700,26 +2699,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#endif
}
// Intra / Inter threshold very low
#define VERY_LOW_II 1.5
// Clean slide transitions we expect a sharp single frame spike in error.
#define ERROR_SPIKE 5.0
// Slide show transition detection.
// Tests for case where there is very low error either side of the current frame
// but much higher just for this frame. This can help detect key frames in
// slide shows even where the slides are pictures of different sizes.
// Also requires that intra and inter errors are very similar to help eliminate
// harmful false positives.
// It will not help if the transition is a fade or other multi-frame effect.
static int slide_transition(const FIRSTPASS_STATS *this_frame,
const FIRSTPASS_STATS *last_frame,
const FIRSTPASS_STATS *next_frame) {
return (this_frame->intra_error < (this_frame->coded_error * VERY_LOW_II)) &&
(this_frame->coded_error > (last_frame->coded_error * ERROR_SPIKE)) &&
(this_frame->coded_error > (next_frame->coded_error * ERROR_SPIKE));
}
// Threshold for use of the lagging second reference frame. High second ref
// usage may point to a transient event like a flash or occlusion rather than
// a real scene cut.
@@ -2764,7 +2743,6 @@ static int test_candidate_kf(TWO_PASS *twopass,
if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
(next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) ||
(slide_transition(this_frame, last_frame, next_frame)) ||
((pcnt_intra > MIN_INTRA_LEVEL) &&
(pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) &&
((this_frame->intra_error /
@@ -2836,7 +2814,6 @@ static int test_candidate_kf(TWO_PASS *twopass,
#define FRAMES_TO_CHECK_DECAY 8
#define MIN_KF_TOT_BOOST 300
#define KF_BOOST_SCAN_MAX_FRAMES 32
#define KF_ABS_ZOOM_THRESH 6.0
#ifdef AGGRESSIVE_VBR
#define KF_MAX_FRAME_BOOST 80.0
@@ -2864,7 +2841,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double kf_group_err = 0.0;
double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
double sr_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
const double av_err = get_distribution_av_err(cpi, twopass);
vp9_zero(next_frame);
@@ -3029,14 +3005,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double zm_factor;
// Monitor for static sections.
// First frame in kf group the second ref indicator is invalid.
if (i > 0) {
zero_motion_accumulator = VPXMIN(
zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
} else {
zero_motion_accumulator =
next_frame.pcnt_inter - next_frame.pcnt_motion;
}
zero_motion_accumulator = VPXMIN(
zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
// Factor 0.75-1.25 based on how much of frame is static.
zm_factor = (0.75 + (zero_motion_accumulator / 2.0));
@@ -3050,14 +3020,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
KF_MAX_FRAME_BOOST * zm_factor);
boost_score += frame_boost;
// Measure of zoom. Large zoom tends to indicate reduced boost.
abs_mv_in_out_accumulator +=
fabs(next_frame.mv_in_out_count * next_frame.pcnt_motion);
if ((frame_boost < 25.00) ||
(abs_mv_in_out_accumulator > KF_ABS_ZOOM_THRESH))
break;
if (frame_boost < 25.00) break;
} else {
break;
}
@@ -3072,16 +3035,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->section_intra_rating = calculate_section_intra_ratio(
start_position, twopass->stats_in_end, rc->frames_to_key);
// Special case for static / slide show content but dont apply
// if the kf group is very short.
if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) {
rc->kf_boost = VPXMAX((rc->frames_to_key * 100), MAX_KF_TOT_BOOST);
} else {
// Apply various clamps for min and max boost
rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
}
// Apply various clamps for min and max boost
rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
// Work out how many bits to allocate for the key frame itself.
kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost,

View File

@@ -120,12 +120,12 @@ typedef enum {
typedef struct {
unsigned char index;
unsigned char first_inter_index;
RATE_FACTOR_LEVEL rf_level[MAX_STATIC_GF_GROUP_LENGTH + 1];
FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 1];
unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 1];
unsigned char arf_update_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
unsigned char arf_ref_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 1];
RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1];
unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1];
unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1];
int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1];
} GF_GROUP;
typedef struct {

View File

@@ -25,9 +25,7 @@ typedef struct {
} ref[MAX_REF_FRAMES];
} MBGRAPH_MB_STATS;
typedef struct {
MBGRAPH_MB_STATS *mb_stats;
} MBGRAPH_FRAME_STATS;
typedef struct { MBGRAPH_MB_STATS *mb_stats; } MBGRAPH_FRAME_STATS;
struct VP9_COMP;

View File

@@ -1785,10 +1785,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
}
static const MV search_pos[4] = {
{ -1, 0 },
{ 0, -1 },
{ 0, 1 },
{ 1, 0 },
{ -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
};
unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
@@ -1879,10 +1876,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
{
const uint8_t *const pos[4] = {
ref_buf - ref_stride,
ref_buf - 1,
ref_buf + 1,
ref_buf + ref_stride,
ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
};
cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);

View File

@@ -21,15 +21,6 @@
#include "vp9/encoder/vp9_noise_estimate.h"
#include "vp9/encoder/vp9_encoder.h"
#if CONFIG_VP9_TEMPORAL_DENOISING
// For SVC: only do noise estimation on top spatial layer.
static INLINE int noise_est_svc(const struct VP9_COMP *const cpi) {
return (!cpi->use_svc ||
(cpi->use_svc &&
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1));
}
#endif
void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) {
ne->enabled = 0;
ne->level = kLowLow;
@@ -54,7 +45,7 @@ static int enable_noise_estimation(VP9_COMP *const cpi) {
#endif
// Enable noise estimation if denoising is on.
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) &&
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
cpi->common.width >= 320 && cpi->common.height >= 180)
return 1;
#endif
@@ -120,7 +111,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
// Estimate is between current source and last source.
YV12_BUFFER_CONFIG *last_source = cpi->Last_Source;
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) {
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) {
last_source = &cpi->denoiser.last_source;
// Tune these thresholds for different resolutions when denoising is
// enabled.
@@ -140,7 +131,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
(cpi->svc.number_spatial_layers == 1 &&
(ne->last_w != cm->width || ne->last_h != cm->height))) {
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))
copy_frame(&cpi->denoiser.last_source, cpi->Source);
#endif
if (last_source != NULL) {
@@ -155,7 +146,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
ne->count = 0;
ne->num_frames_estimate = 10;
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) &&
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
cpi->svc.current_superframe > 1) {
vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
copy_frame(&cpi->denoiser.last_source, cpi->Source);
@@ -258,7 +249,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
// Normalize.
avg_est = avg_est / num_samples;
// Update noise estimate.
ne->value = (int)((3 * ne->value + avg_est) >> 2);
ne->value = (int)((15 * ne->value + avg_est) >> 4);
ne->count++;
if (ne->count == ne->num_frames_estimate) {
// Reset counter and check noise level condition.
@@ -266,14 +257,14 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
ne->count = 0;
ne->level = vp9_noise_estimate_extract_level(ne);
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))
vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
#endif
}
}
}
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))
copy_frame(&cpi->denoiser.last_source, cpi->Source);
#endif
}

View File

@@ -1488,6 +1488,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int skip_ref_find_pred[4] = { 0 };
unsigned int sse_zeromv_normalized = UINT_MAX;
unsigned int best_sse_sofar = UINT_MAX;
unsigned int thresh_svc_skip_golden = 500;
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_PICKMODE_CTX_DEN ctx_den;
int64_t zero_last_cost_orig = INT64_MAX;
@@ -1495,23 +1496,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
#endif
INTERP_FILTER filter_gf_svc = EIGHTTAP;
MV_REFERENCE_FRAME best_second_ref_frame = NONE;
const struct segmentation *const seg = &cm->seg;
int comp_modes = 0;
int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES;
int flag_svc_subpel = 0;
int svc_mv_col = 0;
int svc_mv_row = 0;
unsigned int thresh_svc_skip_golden = 500;
// Lower the skip threshold if lower spatial layer is better quality relative
// to current layer.
if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex > 150 &&
cm->base_qindex > cpi->svc.lower_layer_qindex + 15)
thresh_svc_skip_golden = 100;
// Increase skip threshold if lower spatial layer is lower quality relative
// to current layer.
else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex < 140 &&
cm->base_qindex < cpi->svc.lower_layer_qindex - 20)
thresh_svc_skip_golden = 1000;
init_ref_frame_cost(cm, xd, ref_frame_cost);
@@ -1649,16 +1635,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
cpi->sf.use_compound_nonrd_pickmode && usable_ref_frame == ALTREF_FRAME)
comp_modes = 2;
// If the segment reference frame feature is enabled and it's set to GOLDEN
// reference, then make sure we don't skip checking GOLDEN, this is to
// prevent possibility of not picking any mode.
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) == GOLDEN_FRAME) {
usable_ref_frame = GOLDEN_FRAME;
skip_ref_find_pred[GOLDEN_FRAME] = 0;
thresh_svc_skip_golden = 0;
}
for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
if (!skip_ref_find_pred[ref_frame]) {
find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
@@ -1671,18 +1647,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (cpi->use_svc || cpi->oxcf.speed <= 7 || bsize < BLOCK_32X32)
x->sb_use_mv_part = 0;
// Set the flag_svc_subpel to 1 for SVC if the lower spatial layer used
// an averaging filter for downsampling (phase = 8). If so, we will test
// a nonzero motion mode on the spatial (goldeen) reference.
// The nonzero motion is half pixel shifted to left and top (-4, -4).
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
svc_force_zero_mode[GOLDEN_FRAME - 1] &&
cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id - 1] == 8) {
svc_mv_col = -4;
svc_mv_row = -4;
flag_svc_subpel = 1;
}
for (idx = 0; idx < num_inter_modes + comp_modes; ++idx) {
int rate_mv = 0;
int mode_rd_thresh;
@@ -1696,7 +1660,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int inter_mv_mode = 0;
int skip_this_mv = 0;
int comp_pred = 0;
int force_gf_mv = 0;
PREDICTION_MODE this_mode;
second_ref_frame = NONE;
@@ -1717,29 +1680,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
comp_pred = 1;
}
if (ref_frame > usable_ref_frame) continue;
if (skip_ref_find_pred[ref_frame]) continue;
// If the segment reference frame feature is enabled then do nothing if the
// current ref frame is not allowed.
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
continue;
if (flag_svc_subpel && ref_frame == GOLDEN_FRAME) {
force_gf_mv = 1;
// Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row),
// otherwise set NEWMV to (svc_mv_col, svc_mv_row).
if (this_mode == NEWMV) {
frame_mv[this_mode][ref_frame].as_mv.col = svc_mv_col;
frame_mv[this_mode][ref_frame].as_mv.row = svc_mv_row;
} else if (frame_mv[this_mode][ref_frame].as_mv.col != svc_mv_col ||
frame_mv[this_mode][ref_frame].as_mv.row != svc_mv_row) {
continue;
}
}
if (comp_pred) {
const struct segmentation *const seg = &cm->seg;
if (!cpi->allow_comp_inter_inter) continue;
// Skip compound inter modes if ARF is not available.
if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
@@ -1748,6 +1690,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) continue;
}
if (ref_frame > usable_ref_frame) continue;
if (skip_ref_find_pred[ref_frame]) continue;
// For SVC, skip the golden (spatial) reference search if sse of zeromv_last
// is below threshold.
if (cpi->use_svc && ref_frame == GOLDEN_FRAME &&
@@ -1792,7 +1737,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
// is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
// later.
if (!force_gf_mv && force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
if (force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
frame_mv[this_mode][ref_frame].as_int != 0) {
continue;
}
@@ -1806,39 +1751,34 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
if (cpi->use_svc) {
if (!force_gf_mv && svc_force_zero_mode[ref_frame - 1] &&
if (svc_force_zero_mode[ref_frame - 1] &&
frame_mv[this_mode][ref_frame].as_int != 0)
continue;
}
// Disable this drop out case if the ref frame segment level feature is
// enabled for this segment. This is to prevent the possibility that we end
// up unable to pick any mode.
if (!segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) {
if (sf->reference_masking &&
!(frame_mv[this_mode][ref_frame].as_int == 0 &&
ref_frame == LAST_FRAME)) {
if (usable_ref_frame < ALTREF_FRAME) {
if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) {
i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
if ((cpi->ref_frame_flags & flag_list[i]))
if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
ref_frame_skip_mask |= (1 << ref_frame);
}
} else if (!cpi->rc.is_src_frame_alt_ref &&
!(frame_mv[this_mode][ref_frame].as_int == 0 &&
ref_frame == ALTREF_FRAME)) {
int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME;
int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME;
if (((cpi->ref_frame_flags & flag_list[ref1]) &&
(x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) ||
((cpi->ref_frame_flags & flag_list[ref2]) &&
(x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1))))
ref_frame_skip_mask |= (1 << ref_frame);
if (sf->reference_masking &&
!(frame_mv[this_mode][ref_frame].as_int == 0 &&
ref_frame == LAST_FRAME)) {
if (usable_ref_frame < ALTREF_FRAME) {
if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) {
i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
if ((cpi->ref_frame_flags & flag_list[i]))
if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
ref_frame_skip_mask |= (1 << ref_frame);
}
} else if (!cpi->rc.is_src_frame_alt_ref &&
!(frame_mv[this_mode][ref_frame].as_int == 0 &&
ref_frame == ALTREF_FRAME)) {
int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME;
int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME;
if (((cpi->ref_frame_flags & flag_list[ref1]) &&
(x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) ||
((cpi->ref_frame_flags & flag_list[ref2]) &&
(x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1))))
ref_frame_skip_mask |= (1 << ref_frame);
}
if (ref_frame_skip_mask & (1 << ref_frame)) continue;
}
if (ref_frame_skip_mask & (1 << ref_frame)) continue;
// Select prediction reference frames.
for (i = 0; i < MAX_MB_PLANE; i++) {
@@ -1868,7 +1808,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
&rd_thresh_freq_fact[mode_index])))
continue;
if (this_mode == NEWMV && !force_gf_mv) {
if (this_mode == NEWMV) {
if (ref_frame > LAST_FRAME && !cpi->use_svc &&
cpi->oxcf.rc_mode == VPX_CBR) {
int tmp_sad;
@@ -2009,7 +1949,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
pred_filter_search &&
(ref_frame == LAST_FRAME ||
(ref_frame == GOLDEN_FRAME && !force_gf_mv &&
(ref_frame == GOLDEN_FRAME &&
(cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) &&
(((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
int pf_rate[3];
@@ -2233,11 +2173,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// For spatial enhancemanent layer: perform intra prediction only if base
// layer is chosen as the reference. Always perform intra prediction if
// LAST is the only reference, or is_key_frame is set, or on base
// temporal layer.
// LAST is the only reference or is_key_frame is set.
if (cpi->svc.spatial_layer_id) {
perform_intra_pred =
cpi->svc.temporal_layer_id == 0 ||
cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame ||
!(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) ||
(!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
@@ -2247,13 +2185,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
cpi->rc.is_src_frame_alt_ref)
perform_intra_pred = 0;
// If the segment reference frame feature is enabled and set then
// skip the intra prediction.
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) > 0)
perform_intra_pred = 0;
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
if (best_rdc.rdcost == INT64_MAX ||

View File

@@ -31,13 +31,10 @@
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/encoder/vp9_ratectrl.h"
// Max rate per frame for 1080P and below encodes if no level requirement given.
// For larger formats limit to MAX_MB_RATE bits per MB
// 4Mbits is derived from the level requirement for level 4 (1080P 30) which
// requires that HW can sustain a rate of 16Mbits over a 4 frame group.
// If a lower level requirement is specified then this may over ride this value.
// Max rate target for 1080P and below encodes under normal circumstances
// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
#define MAX_MB_RATE 250
#define MAXRATE_1080P 4000000
#define MAXRATE_1080P 2025000
#define DEFAULT_KF_BOOST 2000
#define DEFAULT_GF_BOOST 2000
@@ -1103,9 +1100,6 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
// Baseline value derived from cpi->active_worst_quality and kf boost.
active_best_quality =
get_kf_active_quality(rc, active_worst_quality, cm->bit_depth);
if (cpi->twopass.kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) {
active_best_quality /= 4;
}
// Allow somewhat lower kf minq with small image formats.
if ((cm->width * cm->height) <= (352 * 288)) {
@@ -1494,22 +1488,15 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
cpi->rc.last_frame_is_src_altref = cpi->rc.is_src_frame_alt_ref;
}
if (cm->frame_type != KEY_FRAME) rc->reset_high_source_sad = 0;
rc->last_avg_frame_bandwidth = rc->avg_frame_bandwidth;
if (cpi->use_svc &&
cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
cpi->svc.lower_layer_qindex = cm->base_qindex;
}
void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
// Update buffer level with zero size, update frame counters, and return.
update_buffer_level(cpi, 0);
cpi->common.current_video_frame++;
cpi->rc.frames_since_key++;
cpi->rc.frames_to_key--;
cpi->rc.rc_2_frame = 0;
cpi->rc.rc_1_frame = 0;
cpi->rc.last_avg_frame_bandwidth = cpi->rc.avg_frame_bandwidth;
}
static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
@@ -1593,8 +1580,9 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
// Adjust boost and af_ratio based on avg_frame_low_motion, which varies
// between 0 and 100 (stationary, 100% zero/small motion).
rc->gfu_boost =
VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /
(rc->avg_frame_low_motion + 100));
VPXMAX(500,
DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /
(rc->avg_frame_low_motion + 100));
rc->af_ratio_onepass_vbr = VPXMIN(15, VPXMAX(5, 3 * rc->gfu_boost / 400));
}
adjust_gfint_frame_constraint(cpi, rc->frames_to_key);
@@ -1869,8 +1857,13 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
cpi->framerate, rc->min_gf_interval);
// Extended max interval for genuinely static scenes like slide shows.
rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH;
// Extended interval for genuinely static scenes
rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
if (is_altref_enabled(cpi)) {
if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
}
if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
rc->max_gf_interval = rc->static_scene_max_gf_interval;
@@ -1880,12 +1873,9 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
if (oxcf->target_level == LEVEL_AUTO) {
const uint32_t pic_size = cpi->common.width * cpi->common.height;
const uint32_t pic_breadth =
VPXMAX(cpi->common.width, cpi->common.height);
int i;
for (i = LEVEL_1; i < LEVEL_MAX; ++i) {
if (vp9_level_defs[i].max_luma_picture_size >= pic_size &&
vp9_level_defs[i].max_luma_picture_breadth >= pic_breadth) {
if (vp9_level_defs[i].max_luma_picture_size > pic_size) {
if (rc->min_gf_interval <=
(int)vp9_level_defs[i].min_altref_distance) {
rc->min_gf_interval =
@@ -1914,12 +1904,12 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) {
VPXMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
// A maximum bitrate for a frame is defined.
// However this limit is extended if a very high rate is given on the command
// line or the the rate cannnot be acheived because of a user specificed max q
// (e.g. when the user specifies lossless encode).
//
// If a level is specified that requires a lower maximum rate then the level
// value take precedence.
// The baseline for this aligns with HW implementations that
// can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
// per 16x16 MB (averaged over a frame). However this limit is extended if
// a very high rate is given on the command line or the the rate cannnot
// be acheived because of a user specificed max q (e.g. when the user
// specifies lossless encode.
vbr_max_bits =
(int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) /
100);

View File

@@ -34,14 +34,6 @@ extern "C" {
#define FRAME_OVERHEAD_BITS 200
// Threshold used to define a KF group as static (e.g. a slide show).
// Essentially this means that no frame in the group has more than 1% of MBs
// that are not marked as coded with 0,0 motion in the first pass.
#define STATIC_KF_GROUP_THRESH 99
// The maximum duration of a GF group that is static (for example a slide show).
#define MAX_STATIC_GF_GROUP_LENGTH 250
typedef enum {
INTER_NORMAL = 0,
INTER_HIGH = 1,
@@ -160,8 +152,6 @@ typedef struct {
int rc_2_frame;
int q_1_frame;
int q_2_frame;
// Keep track of the last target average frame bandwidth.
int last_avg_frame_bandwidth;
// Auto frame-scaling variables.
FRAME_SCALE_LEVEL frame_size_selector;

View File

@@ -59,9 +59,7 @@ typedef struct {
MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;
typedef struct {
MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
struct rdcost_block_args {
const VP9_COMP *cpi;

View File

@@ -37,16 +37,14 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->scaled_one_half = 0;
svc->current_superframe = 0;
svc->non_reference_frame = 0;
svc->skip_enhancement_layer = 0;
for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
svc->ext_frame_flags[sl] = 0;
svc->ext_lst_fb_idx[sl] = 0;
svc->ext_gld_fb_idx[sl] = 1;
svc->ext_alt_fb_idx[sl] = 2;
svc->downsample_filter_type[sl] = BILINEAR;
svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter.
svc->downsample_filter_type[sl] = EIGHTTAP;
svc->downsample_filter_phase[sl] = 0; // Set to 8 for averaging filter.
}
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
@@ -155,8 +153,6 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
int sl, tl, layer = 0, spatial_layer_target;
float bitrate_alloc = 1.0;
cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
@@ -393,9 +389,9 @@ int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
.is_key_frame;
}
void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den, int *width_out,
int *height_out) {
static void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den, int *width_out,
int *height_out) {
int w, h;
if (width_out == NULL || height_out == NULL || den == 0) return;
@@ -549,8 +545,6 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
if (!spatial_id) {
cpi->ref_frame_flags = VP9_LAST_FLAG;
} else {
if (spatial_id == cpi->svc.number_spatial_layers - 1)
cpi->ext_refresh_alt_ref_frame = 0;
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
}
@@ -610,7 +604,6 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
int width = 0, height = 0;
LAYER_CONTEXT *lc = NULL;
cpi->svc.skip_enhancement_layer = 0;
if (cpi->svc.number_spatial_layers > 1) cpi->svc.use_base_mv = 1;
cpi->svc.force_zero_mode_spatial_ref = 1;
cpi->svc.mi_stride[cpi->svc.spatial_layer_id] = cpi->common.mi_stride;
@@ -663,14 +656,10 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
lc->scaling_factor_num, lc->scaling_factor_den, &width,
&height);
// For resolutions <= VGA: set phase of the filter = 8 (for symmetric
// For resolutions <= QVGA: set phase of the filter = 8 (for symmetric
// averaging filter), use bilinear for now.
if (width * height <= 640 * 480) {
if (width * height <= 320 * 240) {
cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] = BILINEAR;
// Use Eightap_smooth for low resolutions.
if (width * height <= 320 * 240)
cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] =
EIGHTTAP_SMOOTH;
cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id] = 8;
}
@@ -872,28 +861,3 @@ void vp9_svc_reset_key_frame(VP9_COMP *const cpi) {
vp9_update_temporal_layer_framerate(cpi);
vp9_restore_layer_context(cpi);
}
void vp9_svc_check_reset_layer_rc_flag(VP9_COMP *const cpi) {
SVC *svc = &cpi->svc;
int sl, tl;
for (sl = 0; sl < svc->number_spatial_layers; ++sl) {
// Check for reset based on avg_frame_bandwidth for spatial layer sl.
int layer = LAYER_IDS_TO_IDX(sl, svc->number_temporal_layers - 1,
svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
if (lrc->avg_frame_bandwidth > (3 * lrc->last_avg_frame_bandwidth >> 1) ||
lrc->avg_frame_bandwidth < (lrc->last_avg_frame_bandwidth >> 1)) {
// Reset for all temporal layers with spatial layer sl.
for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
lrc->rc_1_frame = 0;
lrc->rc_2_frame = 0;
lrc->bits_off_target = lrc->optimal_buffer_level;
lrc->buffer_level = lrc->optimal_buffer_level;
}
}
}
}

View File

@@ -49,7 +49,7 @@ typedef struct {
uint8_t speed;
} LAYER_CONTEXT;
typedef struct SVC {
typedef struct {
int spatial_layer_id;
int temporal_layer_id;
int number_spatial_layers;
@@ -99,12 +99,6 @@ typedef struct SVC {
BLOCK_SIZE *prev_partition_svc;
int mi_stride[VPX_MAX_LAYERS];
int first_layer_denoise;
int skip_enhancement_layer;
int lower_layer_qindex;
} SVC;
struct VP9_COMP;
@@ -134,10 +128,6 @@ void vp9_save_layer_context(struct VP9_COMP *const cpi);
// Initialize second pass rc for spatial svc.
void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi);
void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den, int *width_out,
int *height_out);
// Increment number of video frames in layer
void vp9_inc_frame_in_layer(struct VP9_COMP *const cpi);
@@ -158,8 +148,6 @@ void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi);
void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi);
void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi);
#ifdef __cplusplus
} // extern "C"
#endif

View File

@@ -170,13 +170,13 @@ void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
default:
assert(tx_type == ADST_ADST);
case ADST_ADST:
load_buffer_4x4(input, in, stride);
fadst4_sse2(in);
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
default: assert(0); break;
}
}
@@ -1097,14 +1097,14 @@ void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
default:
assert(tx_type == ADST_ADST);
case ADST_ADST:
load_buffer_8x8(input, in, stride);
fadst8_sse2(in);
fadst8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
default: assert(0); break;
}
}
@@ -1963,13 +1963,13 @@ void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
default:
assert(tx_type == ADST_ADST);
case ADST_ADST:
load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
default: assert(0); break;
}
}

View File

@@ -1,7 +1,7 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Usee of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may

View File

@@ -1,140 +0,0 @@
/*
* Copyright (c) 2017 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <assert.h>
#include <immintrin.h> // AVX2
#include "./vp9_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/x86/bitdepth_conversion_avx2.h"
#include "vpx_dsp/x86/quantize_x86.h"
// Zero fill 8 positions in the output buffer.
static INLINE void store_zero_tran_low(tran_low_t *a) {
const __m256i zero = _mm256_setzero_si256();
#if CONFIG_VP9_HIGHBITDEPTH
_mm256_storeu_si256((__m256i *)(a), zero);
_mm256_storeu_si256((__m256i *)(a + 8), zero);
#else
_mm256_storeu_si256((__m256i *)(a), zero);
#endif
}
static INLINE __m256i scan_eob_256(const __m256i *iscan_ptr,
__m256i *coeff256) {
const __m256i iscan = _mm256_loadu_si256(iscan_ptr);
const __m256i zero256 = _mm256_setzero_si256();
#if CONFIG_VP9_HIGHBITDEPTH
// The _mm256_packs_epi32() in load_tran_low() packs the 64 bit coeff as
// B1 A1 B0 A0. Shuffle to B1 B0 A1 A0 in order to scan eob correctly.
const __m256i _coeff256 = _mm256_permute4x64_epi64(*coeff256, 0xd8);
const __m256i zero_coeff0 = _mm256_cmpeq_epi16(_coeff256, zero256);
#else
const __m256i zero_coeff0 = _mm256_cmpeq_epi16(*coeff256, zero256);
#endif
const __m256i nzero_coeff0 = _mm256_cmpeq_epi16(zero_coeff0, zero256);
// Add one to convert from indices to counts
const __m256i iscan_plus_one = _mm256_sub_epi16(iscan, nzero_coeff0);
return _mm256_and_si256(iscan_plus_one, nzero_coeff0);
}
void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr,
const int16_t *iscan_ptr) {
__m128i eob;
__m256i round256, quant256, dequant256;
__m256i eob256, thr256;
(void)scan_ptr;
(void)skip_block;
assert(!skip_block);
coeff_ptr += n_coeffs;
iscan_ptr += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
{
__m256i coeff256;
// Setup global values
{
const __m128i round = _mm_load_si128((const __m128i *)round_ptr);
const __m128i quant = _mm_load_si128((const __m128i *)quant_ptr);
const __m128i dequant = _mm_load_si128((const __m128i *)dequant_ptr);
round256 = _mm256_castsi128_si256(round);
round256 = _mm256_permute4x64_epi64(round256, 0x54);
quant256 = _mm256_castsi128_si256(quant);
quant256 = _mm256_permute4x64_epi64(quant256, 0x54);
dequant256 = _mm256_castsi128_si256(dequant);
dequant256 = _mm256_permute4x64_epi64(dequant256, 0x54);
}
{
__m256i qcoeff256;
__m256i qtmp256;
coeff256 = load_tran_low(coeff_ptr + n_coeffs);
qcoeff256 = _mm256_abs_epi16(coeff256);
qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs);
coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
}
eob256 = scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256);
n_coeffs += 8 * 2;
}
// remove dc constants
dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31);
quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31);
round256 = _mm256_permute2x128_si256(round256, round256, 0x31);
thr256 = _mm256_srai_epi16(dequant256, 1);
// AC only loop
while (n_coeffs < 0) {
__m256i coeff256 = load_tran_low(coeff_ptr + n_coeffs);
__m256i qcoeff256 = _mm256_abs_epi16(coeff256);
int32_t nzflag =
_mm256_movemask_epi8(_mm256_cmpgt_epi16(qcoeff256, thr256));
if (nzflag) {
__m256i qtmp256;
qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs);
coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
eob256 = _mm256_max_epi16(
eob256,
scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256));
} else {
store_zero_tran_low(qcoeff_ptr + n_coeffs);
store_zero_tran_low(dqcoeff_ptr + n_coeffs);
}
n_coeffs += 8 * 2;
}
eob = _mm_max_epi16(_mm256_castsi256_si128(eob256),
_mm256_extracti128_si256(eob256, 1));
*eob_ptr = accumulate_eob(eob);
}

Some files were not shown because too many files have changed in this diff Show More