Compare commits

..

1 Commits

Author SHA1 Message Date
wangch
eea111f16a Test gerrit. 2017-12-05 18:07:21 -05:00
43 changed files with 934 additions and 1302 deletions

View File

@@ -3,7 +3,6 @@ Aex Converse <aconverse@google.com>
Aex Converse <aconverse@google.com> <alex.converse@gmail.com>
Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
Alpha Lam <hclam@google.com> <hclam@chromium.org>
Chris Cunningham <chcunningham@chromium.org>
Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
Deb Mukherjee <debargha@google.com>
Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
@@ -22,21 +21,18 @@ Marco Paniconi <marpan@google.com>
Marco Paniconi <marpan@google.com> <marpan@chromium.org>
Pascal Massimino <pascal.massimino@gmail.com>
Paul Wilkins <paulwilkins@google.com>
Peter Boström <pbos@chromium.org> <pbos@google.com>
Peter de Rivaz <peter.derivaz@gmail.com>
Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com>
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
Sami Pietilä <samipietila@google.com>
Shiyou Yin <yinshiyou-hf@loongson.cn>
Tamar Levy <tamar.levy@intel.com>
Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com>
Tom Finegan <tomfinegan@google.com>
Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
Urvang Joshi <urvang@google.com> <urvang@chromium.org>
Yaowu Xu <yaowu@google.com> <adam@xuyaowu.com>
Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
Yaowu Xu <yaowu@google.com> <Yaowu Xu>

16
AUTHORS
View File

@@ -3,13 +3,13 @@
Aaron Watry <awatry@gmail.com>
Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
Adam Xu <adam@xuyaowu.com>
Adrian Grange <agrange@google.com>
Aex Converse <aconverse@google.com>
Ahmad Sharif <asharif@google.com>
Aleksey Vasenev <margtu-fivt@ya.ru>
Alexander Potapenko <glider@google.com>
Alexander Voronov <avoronov@graphics.cs.msu.ru>
Alexandra Hájková <alexandra.khirnova@gmail.com>
Alexis Ballier <aballier@gentoo.org>
Alok Ahuja <waveletcoeff@gmail.com>
Alpha Lam <hclam@google.com>
@@ -17,7 +17,6 @@ A.Mahfoodh <ab.mahfoodh@gmail.com>
Ami Fischman <fischman@chromium.org>
Andoni Morales Alastruey <ylatuya@gmail.com>
Andres Mejia <mcitadel@gmail.com>
Andrew Lewis <andrewlewis@google.com>
Andrew Russell <anrussell@google.com>
Angie Chiang <angiebird@google.com>
Aron Rosenberg <arosenberg@logitech.com>
@@ -25,9 +24,7 @@ Attila Nagy <attilanagy@google.com>
Brion Vibber <bvibber@wikimedia.org>
changjun.yang <changjun.yang@intel.com>
Charles 'Buck' Krasic <ckrasic@google.com>
Cheng Chen <chengchen@google.com>
chm <chm@rock-chips.com>
Chris Cunningham <chcunningham@chromium.org>
Christian Duvivier <cduvivier@google.com>
Daniele Castagna <dcastagna@chromium.org>
Daniel Kang <ddkang@google.com>
@@ -49,12 +46,10 @@ Geza Lore <gezalore@gmail.com>
Ghislain MARY <ghislainmary2@gmail.com>
Giuseppe Scrivano <gscrivano@gnu.org>
Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
Gregor Jasny <gjasny@gmail.com>
Guillaume Martres <gmartres@google.com>
Guillermo Ballester Valor <gbvalor@gmail.com>
Hangyu Kuang <hkuang@google.com>
Hanno Böck <hanno@hboeck.de>
Han Shen <shenhan@google.com>
Henrik Lundin <hlundin@google.com>
Hui Su <huisu@google.com>
Ivan Krasin <krasin@chromium.org>
@@ -88,7 +83,6 @@ Justin Clift <justin@salasaga.org>
Justin Lebar <justin.lebar@gmail.com>
Kaustubh Raste <kaustubh.raste@imgtec.com>
KO Myung-Hun <komh@chollian.net>
Kyle Siefring <kylesiefring@gmail.com>
Lawrence Velázquez <larryv@macports.org>
Linfeng Zhang <linfengz@google.com>
Lou Quillio <louquillio@google.com>
@@ -107,7 +101,6 @@ Mikhal Shemer <mikhal@google.com>
Min Chen <chenm003@gmail.com>
Minghai Shang <minghai@google.com>
Min Ye <yeemmi@google.com>
Moriyoshi Koizumi <mozo@mozo.jp>
Morton Jonuschat <yabawock@gmail.com>
Nathan E. Egge <negge@mozilla.com>
Nico Weber <thakis@chromium.org>
@@ -118,15 +111,12 @@ Paul Wilkins <paulwilkins@google.com>
Pavol Rusnak <stick@gk2.sk>
Paweł Hajdan <phajdan@google.com>
Pengchong Jin <pengchong@google.com>
Peter Boström <pbos@chromium.org>
Peter Collingbourne <pcc@chromium.org>
Peter Boström <pbos@google.com>
Peter de Rivaz <peter.derivaz@gmail.com>
Philip Jägenstedt <philipj@opera.com>
Priit Laes <plaes@plaes.org>
Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
Rafaël Carré <funman@videolan.org>
Rafael de Lucena Valle <rafaeldelucena@gmail.com>
Rahul Chaudhry <rahulchaudhry@google.com>
Ralph Giles <giles@xiph.org>
Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com>
Rob Bradford <rob@linux.intel.com>
@@ -145,7 +135,6 @@ Shiyou Yin <yinshiyou-hf@loongson.cn>
Shunyao Li <shunyaoli@google.com>
Stefan Holmer <holmer@google.com>
Suman Sunkara <sunkaras@google.com>
Sylvestre Ledru <sylvestre@mozilla.com>
Taekhyun Kim <takim@nvidia.com>
Takanori MATSUURA <t.matsuu@gmail.com>
Tamar Levy <tamar.levy@intel.com>
@@ -158,7 +147,6 @@ Tom Finegan <tomfinegan@google.com>
Tristan Matthews <le.businessman@gmail.com>
Urvang Joshi <urvang@google.com>
Vignesh Venkatasubramanian <vigneshv@google.com>
Vlad Tsyrklevich <vtsyrklevich@chromium.org>
Yaowu Xu <yaowu@google.com>
Yi Luo <luoyi@google.com>
Yongzhe Wang <yongzhe@google.com>

View File

@@ -1,28 +1,3 @@
2017-01-04 v1.7.0 "Mandarin Duck"
This release focused on high bit depth performance (10/12 bit) and vp9
encoding improvements.
- Upgrading:
This release is ABI incompatible due to new vp9 encoder features.
Frame parallel decoding for vp9 has been removed.
- Enhancements:
vp9 encoding supports additional threads with --row-mt. This can be greater
than the number of tiles.
Two new vp9 encoder options have been added:
--corpus-complexity
--tune-content=film
Additional tooling for respecting the vp9 "level" profiles has been added.
- Bug fixes:
A variety of fuzzing issues.
vp8 threading fix for ARM.
Codec control VP9_SET_SKIP_LOOP_FILTER fixed.
Reject invalid multi resolution configurations.
2017-01-09 v1.6.1 "Long Tailed Duck"
This release improves upon the VP9 encoder and speeds up the encoding and
decoding processes.

4
README
View File

@@ -1,4 +1,4 @@
README - 24 January 2018
README - 26 January 2017
Welcome to the WebM VP8/VP9 Codec SDK!
@@ -63,8 +63,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
armv8-linux-gcc
mips32-linux-gcc
mips64-linux-gcc
ppc64-linux-gcc
ppc64le-linux-gcc
sparc-solaris-gcc
x86-android-gcc
x86-darwin8-gcc

View File

@@ -1312,11 +1312,6 @@ EOF
check_gcc_machine_option ${ext%_*} $ext
fi
fi
# https://bugs.chromium.org/p/webm/issues/detail?id=1464
# The assembly optimizations for vpx_sub_pixel_variance do not link with
# gcc 6.
enabled sse2 && soft_enable pic
done
if enabled external_build; then

View File

@@ -1,13 +1,4 @@
#!/usr/bin/env perl
##
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
## tree. An additional intellectual property rights grant can be found
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
no strict 'refs';
use warnings;
@@ -209,7 +200,6 @@ sub filter {
sub common_top() {
my $include_guard = uc($opts{sym})."_H_";
print <<EOF;
// This file is generated. Do not edit.
#ifndef ${include_guard}
#define ${include_guard}

View File

@@ -60,7 +60,6 @@ if [ ${bare} ]; then
echo "${changelog_version}${git_version_id}" > $$.tmp
else
cat<<EOF>$$.tmp
// This file is generated. Do not edit.
#define VERSION_MAJOR $major_version
#define VERSION_MINOR $minor_version
#define VERSION_PATCH $patch_version

2
configure vendored
View File

@@ -665,7 +665,7 @@ process_toolchain() {
gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
all_targets="${all_targets} solution"
INLINE="__inline"
INLINE="__forceinline"
;;
esac

View File

@@ -32,21 +32,13 @@ static const char *exec_name;
void usage_exit(void) { exit(EXIT_FAILURE); }
// Denoiser states for vp8, for temporal denoising.
enum denoiserStateVp8 {
kVp8DenoiserOff,
kVp8DenoiserOnYOnly,
kVp8DenoiserOnYUV,
kVp8DenoiserOnYUVAggressive,
kVp8DenoiserOnAdaptive
};
// Denoiser states for vp9, for temporal denoising.
enum denoiserStateVp9 {
kVp9DenoiserOff,
kVp9DenoiserOnYOnly,
// For SVC: denoise the top two spatial layers.
kVp9DenoiserOnYTwoSpatialLayers
// Denoiser states, for temporal denoising.
enum denoiserState {
kDenoiserOff,
kDenoiserOnYOnly,
kDenoiserOnYUV,
kDenoiserOnYUVAggressive,
kDenoiserOnAdaptive
};
static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 };
@@ -763,7 +755,7 @@ int main(int argc, char **argv) {
if (strncmp(encoder->name, "vp8", 3) == 0) {
vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kVp8DenoiserOff);
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0);
#if VP8_ROI_MAP
@@ -780,7 +772,7 @@ int main(int argc, char **argv) {
vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0);
vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kVp9DenoiserOff);
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));

View File

@@ -233,8 +233,8 @@ OBJS-yes += $(LIBVPX_OBJS)
LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
SO_VERSION_MAJOR := 5
SO_VERSION_MINOR := 0
SO_VERSION_MAJOR := 4
SO_VERSION_MINOR := 1
SO_VERSION_PATCH := 0
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib

View File

@@ -1216,17 +1216,16 @@ class DatarateOnePassCbrSvc
}
virtual void ResetModel() {
last_pts_ = 0;
bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
frame_number_ = 0;
first_drop_ = 0;
bits_total_ = 0;
duration_ = 0.0;
mismatch_psnr_ = 0.0;
mismatch_nframes_ = 0;
denoiser_on_ = 0;
tune_content_ = 0;
base_speed_setting_ = 5;
spatial_layer_id_ = 0;
temporal_layer_id_ = 0;
memset(bits_in_buffer_model_, 0, sizeof(bits_in_buffer_model_));
memset(bits_total_, 0, sizeof(bits_total_));
memset(layer_target_avg_bandwidth_, 0, sizeof(layer_target_avg_bandwidth_));
}
virtual void BeginPassHook(unsigned int /*pass*/) {}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
@@ -1257,94 +1256,32 @@ class DatarateOnePassCbrSvc
timebase_ = static_cast<double>(tb.num) / tb.den;
duration_ = 0;
}
virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {
vpx_svc_layer_id_t layer_id;
encoder->Control(VP9E_GET_SVC_LAYER_ID, &layer_id);
spatial_layer_id_ = layer_id.spatial_layer_id;
temporal_layer_id_ = layer_id.temporal_layer_id;
// Update buffer with per-layer target frame bandwidth, this is done
// for every frame passed to the encoder (encoded or dropped).
// For temporal layers, update the cumulative buffer level.
for (int sl = 0; sl < number_spatial_layers_; ++sl) {
for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) {
const int layer = sl * number_temporal_layers_ + tl;
bits_in_buffer_model_[layer] +=
static_cast<int64_t>(layer_target_avg_bandwidth_[layer]);
}
}
}
vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz,
uint32_t sizes[8], int *count) {
uint8_t marker;
marker = *(data + data_sz - 1);
*count = 0;
if ((marker & 0xe0) == 0xc0) {
const uint32_t frames = (marker & 0x7) + 1;
const uint32_t mag = ((marker >> 3) & 0x3) + 1;
const size_t index_sz = 2 + mag * frames;
// This chunk is marked as having a superframe index but doesn't have
// enough data for it, thus it's an invalid superframe index.
if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME;
{
const uint8_t marker2 = *(data + data_sz - index_sz);
// This chunk is marked as having a superframe index but doesn't have
// the matching marker byte at the front of the index therefore it's an
// invalid chunk.
if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME;
}
{
uint32_t i, j;
const uint8_t *x = &data[data_sz - index_sz + 1];
for (i = 0; i < frames; ++i) {
uint32_t this_sz = 0;
for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8);
sizes[i] = this_sz;
}
*count = frames;
}
}
return VPX_CODEC_OK;
}
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
uint32_t sizes[8] = { 0 };
int count = 0;
last_pts_ = pkt->data.frame.pts;
vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
if (last_pts_ == 0) duration = 1;
bits_in_buffer_model_ += static_cast<int64_t>(
duration * timebase_ * cfg_.rc_target_bitrate * 1000);
const bool key_frame =
(pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
parse_superframe_index(static_cast<const uint8_t *>(pkt->data.frame.buf),
pkt->data.frame.sz, sizes, &count);
ASSERT_EQ(count, number_spatial_layers_);
for (int sl = 0; sl < number_spatial_layers_; ++sl) {
sizes[sl] = sizes[sl] << 3;
// Update the total encoded bits per layer.
// For temporal layers, update the cumulative encoded bits per layer.
for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) {
const int layer = sl * number_temporal_layers_ + tl;
bits_total_[layer] += static_cast<int64_t>(sizes[sl]);
// Update the per-layer buffer level with the encoded frame size.
bits_in_buffer_model_[layer] -= static_cast<int64_t>(sizes[sl]);
// There should be no buffer underrun, except on the base
// temporal layer, since there may be key frames there.
if (!key_frame && tl > 0) {
ASSERT_GE(bits_in_buffer_model_[layer], 0)
<< "Buffer Underrun at frame " << pkt->data.frame.pts;
}
}
if (!key_frame) {
// TODO(marpan): This check currently fails for some of the SVC tests,
// re-enable when issue (webm:1350) is resolved.
// ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
// << pkt->data.frame.pts;
}
const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
bits_in_buffer_model_ -= static_cast<int64_t>(frame_size_in_bits);
bits_total_ += frame_size_in_bits;
if (!first_drop_ && duration > 1) first_drop_ = last_pts_ + 1;
last_pts_ = pkt->data.frame.pts;
bits_in_last_frame_ = frame_size_in_bits;
++frame_number_;
}
virtual void EndPassHook(void) {
for (int sl = 0; sl < number_spatial_layers_; ++sl) {
for (int tl = 0; tl < number_temporal_layers_; ++tl) {
const int layer = sl * number_temporal_layers_ + tl;
const double file_size_in_kb = bits_total_[layer] / 1000.;
duration_ = (last_pts_ + 1) * timebase_;
file_datarate_[layer] = file_size_in_kb / duration_;
}
if (bits_total_) {
const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit
duration_ = (last_pts_ + 1) * timebase_;
file_datarate_ = file_size_in_kb / duration_;
}
}
@@ -1357,11 +1294,13 @@ class DatarateOnePassCbrSvc
unsigned int GetMismatchFrames() { return mismatch_nframes_; }
vpx_codec_pts_t last_pts_;
int64_t bits_in_buffer_model_[VPX_MAX_LAYERS];
int64_t bits_in_buffer_model_;
double timebase_;
int64_t bits_total_[VPX_MAX_LAYERS];
int frame_number_;
vpx_codec_pts_t first_drop_;
int64_t bits_total_;
double duration_;
double file_datarate_[VPX_MAX_LAYERS];
double file_datarate_;
size_t bits_in_last_frame_;
vpx_svc_extra_cfg_t svc_params_;
int speed_setting_;
@@ -1370,22 +1309,14 @@ class DatarateOnePassCbrSvc
int denoiser_on_;
int tune_content_;
int base_speed_setting_;
int spatial_layer_id_;
int temporal_layer_id_;
int number_spatial_layers_;
int number_temporal_layers_;
int layer_target_avg_bandwidth_[VPX_MAX_LAYERS];
};
static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
const vpx_svc_extra_cfg_t *svc_params,
int spatial_layers, int temporal_layers,
int temporal_layering_mode,
int *layer_target_avg_bandwidth,
int64_t *bits_in_buffer_model) {
int temporal_layering_mode) {
int sl, spatial_layer_target;
float total = 0;
float alloc_ratio[VPX_MAX_LAYERS] = { 0 };
float framerate = 30.0;
for (sl = 0; sl < spatial_layers; ++sl) {
if (svc_params->scaling_factor_den[sl] > 0) {
alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] * 1.0 /
@@ -1405,41 +1336,8 @@ static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
} else if (temporal_layering_mode == 2) {
enc_cfg->layer_target_bitrate[index] = spatial_layer_target * 2 / 3;
enc_cfg->layer_target_bitrate[index + 1] = spatial_layer_target;
} else if (temporal_layering_mode <= 1) {
enc_cfg->layer_target_bitrate[index] = spatial_layer_target;
}
}
for (sl = 0; sl < spatial_layers; ++sl) {
for (int tl = 0; tl < temporal_layers; ++tl) {
const int layer = sl * temporal_layers + tl;
float layer_framerate = framerate;
if (temporal_layers == 2 && tl == 0) layer_framerate = framerate / 2;
if (temporal_layers == 3 && tl == 0) layer_framerate = framerate / 4;
if (temporal_layers == 3 && tl == 1) layer_framerate = framerate / 2;
layer_target_avg_bandwidth[layer] = static_cast<int>(
enc_cfg->layer_target_bitrate[layer] * 1000.0 / layer_framerate);
bits_in_buffer_model[layer] =
enc_cfg->layer_target_bitrate[layer] * enc_cfg->rc_buf_initial_sz;
}
}
}
static void CheckLayerRateTargeting(vpx_codec_enc_cfg_t *const cfg,
int number_spatial_layers,
int number_temporal_layers,
double *file_datarate,
double thresh_overshoot,
double thresh_undershoot) {
for (int sl = 0; sl < number_spatial_layers; ++sl)
for (int tl = 0; tl < number_temporal_layers; ++tl) {
const int layer = sl * number_temporal_layers + tl;
ASSERT_GE(cfg->layer_target_bitrate[layer],
file_datarate[layer] * thresh_overshoot)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg->layer_target_bitrate[layer],
file_datarate[layer] * thresh_undershoot)
<< " The datarate for the file is lower than the target by too much!";
}
}
// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1
@@ -1465,19 +1363,14 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TLScreenContent1) {
svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 10;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
cfg_.rc_target_bitrate = 500;
ResetModel();
tune_content_ = 1;
base_speed_setting_ = speed_setting_;
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
}
@@ -1505,28 +1398,26 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL) {
svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 200);
// TODO(marpan): Check that effective_datarate for each layer hits the
// layer target_bitrate.
for (int i = 200; i <= 800; i += 200) {
cfg_.rc_target_bitrate = i;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78,
1.15);
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 200 (half of the sequence)
// Since frame dropper is off, we can expcet 100 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
EXPECT_EQ(static_cast<unsigned int>(100), GetMismatchFrames());
#endif
}
}
@@ -1555,41 +1446,33 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLDenoiserOn) {
svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
// TODO(marpan): Check that effective_datarate for each layer hits the
// layer target_bitrate.
// For SVC, noise_sen = 1 means denoising only the top spatial layer
// noise_sen = 2 means denoising the two top spatial layers.
for (int noise_sen = 1; noise_sen <= 2; noise_sen++) {
for (int i = 600; i <= 1000; i += 200) {
cfg_.rc_target_bitrate = i;
ResetModel();
denoiser_on_ = noise_sen;
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78,
1.15);
for (int i = 600; i <= 1000; i += 200) {
cfg_.rc_target_bitrate = i;
ResetModel();
denoiser_on_ = 1;
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC
// pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 200 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
// Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expcet 150 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
#endif
}
}
}
// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 3
// temporal layers. Run CIF clip with 1 thread, and few short key frame periods.
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLSmallKf) {
TEST_P(DatarateOnePassCbrSvc, DISABLED_OnePassCbrSvc2SL3TLSmallKf) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
@@ -1610,23 +1493,21 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLSmallKf) {
svc_params_.scaling_factor_num[1] = 288;
svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 10;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 200);
cfg_.rc_target_bitrate = 400;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
// For this 3 temporal layer case, pattern repeats every 4 frames, so choose
// 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
for (int j = 64; j <= 67; j++) {
cfg_.kf_max_dist = j;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78,
1.15);
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.80)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
}
}
@@ -1654,23 +1535,22 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL4Threads) {
svc_params_.scaling_factor_den[1] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
cfg_.rc_target_bitrate = 800;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 30 (half of the sequence)
// Since frame dropper is off, we can expcet 150 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(30), GetMismatchFrames());
EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
#endif
}
@@ -1700,24 +1580,22 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL) {
svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
cfg_.rc_target_bitrate = 800;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 200 (half of the sequence)
// Since frame dropper is off, we can expcet 150 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
#endif
}
@@ -1746,23 +1624,20 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) {
svc_params_.scaling_factor_num[2] = 288;
svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 10;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
cfg_.rc_target_bitrate = 800;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
// For this 3 temporal layer case, pattern repeats every 4 frames, so choose
// 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
for (int j = 32; j <= 35; j++) {
cfg_.kf_max_dist = j;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78,
1.15);
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.80)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.30)
<< " The datarate for the file is lower than the target by too much!";
}
}
@@ -1792,23 +1667,22 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4threads) {
svc_params_.scaling_factor_den[2] = 288;
cfg_.rc_dropframe_thresh = 0;
cfg_.kf_max_dist = 9999;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
cfg_.rc_target_bitrate = 800;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
layer_target_avg_bandwidth_, bits_in_buffer_model_);
cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22)
<< " The datarate for the file is lower than the target by too much!";
#if CONFIG_VP9_DECODER
// Number of temporal layers > 1, so half of the frames in this SVC pattern
// will be non-reference frame and hence encoder will avoid loopfilter.
// Since frame dropper is off, we can expect 30 (half of the sequence)
// Since frame dropper is off, we can expcet 150 (half of the sequence)
// mismatched frames.
EXPECT_EQ(static_cast<unsigned int>(30), GetMismatchFrames());
EXPECT_EQ(static_cast<unsigned int>(150), GetMismatchFrames());
#endif
}
@@ -1840,19 +1714,9 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TL5x5MultipleRuns) {
cfg_.layer_target_bitrate[0] = 300;
cfg_.layer_target_bitrate[1] = 1400;
cfg_.rc_target_bitrate = 1700;
number_spatial_layers_ = cfg_.ss_number_layers;
number_temporal_layers_ = cfg_.ts_number_layers;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
ResetModel();
layer_target_avg_bandwidth_[0] = cfg_.layer_target_bitrate[0] * 1000 / 30;
bits_in_buffer_model_[0] =
cfg_.layer_target_bitrate[0] * cfg_.rc_buf_initial_sz;
layer_target_avg_bandwidth_[1] = cfg_.layer_target_bitrate[1] * 1000 / 30;
bits_in_buffer_model_[1] =
cfg_.layer_target_bitrate[1] * cfg_.rc_buf_initial_sz;
::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
number_temporal_layers_, file_datarate_, 0.78, 1.15);
EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
}

View File

@@ -106,90 +106,4 @@ TEST(EncodeAPI, ImageSizeSetting) {
}
#endif
// Set up 2 spatial streams with 2 temporal layers per stream, and generate
// invalid configuration by setting the temporal layer rate allocation
// (ts_target_bitrate[]) to 0 for both layers. This should fail independent of
// CONFIG_MULTI_RES_ENCODING.
TEST(EncodeAPI, MultiResEncode) {
static const vpx_codec_iface_t *kCodecs[] = {
#if CONFIG_VP8_ENCODER
&vpx_codec_vp8_cx_algo,
#endif
#if CONFIG_VP9_ENCODER
&vpx_codec_vp9_cx_algo,
#endif
};
const int width = 1280;
const int height = 720;
const int width_down = width / 2;
const int height_down = height / 2;
const int target_bitrate = 1000;
const int framerate = 30;
for (int c = 0; c < NELEMENTS(kCodecs); ++c) {
const vpx_codec_iface_t *const iface = kCodecs[c];
vpx_codec_ctx_t enc[2];
vpx_codec_enc_cfg_t cfg[2];
vpx_rational_t dsf[2] = { { 2, 1 }, { 2, 1 } };
memset(enc, 0, sizeof(enc));
for (int i = 0; i < 2; i++) {
vpx_codec_enc_config_default(iface, &cfg[i], 0);
}
/* Highest-resolution encoder settings */
cfg[0].g_w = width;
cfg[0].g_h = height;
cfg[0].rc_dropframe_thresh = 0;
cfg[0].rc_end_usage = VPX_CBR;
cfg[0].rc_resize_allowed = 0;
cfg[0].rc_min_quantizer = 2;
cfg[0].rc_max_quantizer = 56;
cfg[0].rc_undershoot_pct = 100;
cfg[0].rc_overshoot_pct = 15;
cfg[0].rc_buf_initial_sz = 500;
cfg[0].rc_buf_optimal_sz = 600;
cfg[0].rc_buf_sz = 1000;
cfg[0].g_error_resilient = 1; /* Enable error resilient mode */
cfg[0].g_lag_in_frames = 0;
cfg[0].kf_mode = VPX_KF_AUTO;
cfg[0].kf_min_dist = 3000;
cfg[0].kf_max_dist = 3000;
cfg[0].rc_target_bitrate = target_bitrate; /* Set target bitrate */
cfg[0].g_timebase.num = 1; /* Set fps */
cfg[0].g_timebase.den = framerate;
memcpy(&cfg[1], &cfg[0], sizeof(cfg[0]));
cfg[1].rc_target_bitrate = 500;
cfg[1].g_w = width_down;
cfg[1].g_h = height_down;
for (int i = 0; i < 2; i++) {
cfg[i].ts_number_layers = 2;
cfg[i].ts_periodicity = 2;
cfg[i].ts_rate_decimator[0] = 2;
cfg[i].ts_rate_decimator[1] = 1;
cfg[i].ts_layer_id[0] = 0;
cfg[i].ts_layer_id[1] = 1;
// Invalid parameters.
cfg[i].ts_target_bitrate[0] = 0;
cfg[i].ts_target_bitrate[1] = 0;
}
// VP9 should report incapable, VP8 invalid for all configurations.
const char kVP9Name[] = "WebM Project VP9";
const bool is_vp9 = strncmp(kVP9Name, vpx_codec_iface_name(iface),
sizeof(kVP9Name) - 1) == 0;
EXPECT_EQ(is_vp9 ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM,
vpx_codec_enc_init_multi(&enc[0], iface, &cfg[0], 2, 0, &dsf[0]));
for (int i = 0; i < 2; i++) {
vpx_codec_destroy(&enc[i]);
}
}
}
} // namespace

View File

@@ -201,8 +201,6 @@ void EncoderTest::RunLoop(VideoSource *video) {
PreEncodeFrameHook(video, encoder.get());
encoder->EncodeFrame(video, frame_flags_);
PostEncodeFrameHook(encoder.get());
CxDataIterator iter = encoder->GetCxData();
bool has_cxdata = false;

View File

@@ -219,8 +219,6 @@ class EncoderTest {
virtual void PreEncodeFrameHook(VideoSource * /*video*/,
Encoder * /*encoder*/) {}
virtual void PostEncodeFrameHook(Encoder * /*encoder*/) {}
// Hook to be called on every compressed data packet.
virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}

View File

@@ -114,18 +114,6 @@ void InitInput(Pixel *s, Pixel *ref_s, ACMRandom *rnd, const uint8_t limit,
}
}
uint8_t GetOuterThresh(ACMRandom *rnd) {
return static_cast<uint8_t>(rnd->RandRange(3 * MAX_LOOP_FILTER + 5));
}
uint8_t GetInnerThresh(ACMRandom *rnd) {
return static_cast<uint8_t>(rnd->RandRange(MAX_LOOP_FILTER + 1));
}
uint8_t GetHevThresh(ACMRandom *rnd) {
return static_cast<uint8_t>(rnd->RandRange(MAX_LOOP_FILTER + 1) >> 4);
}
class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
public:
virtual ~Loop8Test6Param() {}
@@ -174,15 +162,15 @@ TEST_P(Loop8Test6Param, OperationCheck) {
int first_failure = -1;
for (int i = 0; i < count_test_block; ++i) {
int err_count = 0;
uint8_t tmp = GetOuterThresh(&rnd);
uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t,
blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t,
limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd);
tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t,
thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -233,15 +221,15 @@ TEST_P(Loop8Test6Param, ValueCheck) {
for (int i = 0; i < count_test_block; ++i) {
int err_count = 0;
uint8_t tmp = GetOuterThresh(&rnd);
uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t,
blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t,
limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd);
tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t,
thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -283,27 +271,27 @@ TEST_P(Loop8Test9Param, OperationCheck) {
int first_failure = -1;
for (int i = 0; i < count_test_block; ++i) {
int err_count = 0;
uint8_t tmp = GetOuterThresh(&rnd);
uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t,
blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t,
limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd);
tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t,
thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetOuterThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t,
blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t,
limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd);
tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t,
thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -346,27 +334,27 @@ TEST_P(Loop8Test9Param, ValueCheck) {
int first_failure = -1;
for (int i = 0; i < count_test_block; ++i) {
int err_count = 0;
uint8_t tmp = GetOuterThresh(&rnd);
uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t,
blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t,
limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd);
tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t,
thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetOuterThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
DECLARE_ALIGNED(16, const uint8_t,
blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetInnerThresh(&rnd);
tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
DECLARE_ALIGNED(16, const uint8_t,
limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
tmp = GetHevThresh(&rnd);
tmp = rnd.Rand8();
DECLARE_ALIGNED(16, const uint8_t,
thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };

View File

@@ -14,9 +14,9 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "./vp9_rtcd.h"
#include "test/acm_random.h"
#include "test/buffer.h"
#include "test/clear_system_state.h"
@@ -42,7 +42,7 @@ typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
uint16_t *eob, const int16_t *scan,
const int16_t *iscan);
typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
int /*max_size*/, bool /*is_fp*/>
int /*max_size*/>
QuantizeParam;
// Wrapper for FP version which does not use zbin or quant_shift.
@@ -69,15 +69,11 @@ void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
class VP9QuantizeBase {
public:
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
: bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) {
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size)
: bit_depth_(bit_depth), max_size_(max_size) {
max_value_ = (1 << bit_depth_) - 1;
zbin_ptr_ =
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
round_fp_ptr_ = reinterpret_cast<int16_t *>(
vpx_memalign(16, 8 * sizeof(*round_fp_ptr_)));
quant_fp_ptr_ = reinterpret_cast<int16_t *>(
vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_)));
round_ptr_ =
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*round_ptr_)));
quant_ptr_ =
@@ -90,15 +86,11 @@ class VP9QuantizeBase {
~VP9QuantizeBase() {
vpx_free(zbin_ptr_);
vpx_free(round_fp_ptr_);
vpx_free(quant_fp_ptr_);
vpx_free(round_ptr_);
vpx_free(quant_ptr_);
vpx_free(quant_shift_ptr_);
vpx_free(dequant_ptr_);
zbin_ptr_ = NULL;
round_fp_ptr_ = NULL;
quant_fp_ptr_ = NULL;
round_ptr_ = NULL;
quant_ptr_ = NULL;
quant_shift_ptr_ = NULL;
@@ -108,8 +100,6 @@ class VP9QuantizeBase {
protected:
int16_t *zbin_ptr_;
int16_t *round_fp_ptr_;
int16_t *quant_fp_ptr_;
int16_t *round_ptr_;
int16_t *quant_ptr_;
int16_t *quant_shift_ptr_;
@@ -117,103 +107,29 @@ class VP9QuantizeBase {
const vpx_bit_depth_t bit_depth_;
int max_value_;
const int max_size_;
const bool is_fp_;
};
class VP9QuantizeTest : public VP9QuantizeBase,
public ::testing::TestWithParam<QuantizeParam> {
public:
VP9QuantizeTest()
: VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)),
quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
: VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3)), quantize_op_(GET_PARAM(0)),
ref_quantize_op_(GET_PARAM(1)) {}
protected:
const QuantizeFunc quantize_op_;
const QuantizeFunc ref_quantize_op_;
};
// This quantizer compares the AC coefficients to the quantization step size to
// determine if further multiplication operations are needed.
// Based on vp9_quantize_fp_sse2().
void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan) {
int i, eob = -1;
const int thr = dequant_ptr[1] >> 1;
(void)iscan;
(void)skip_block;
assert(!skip_block);
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
for (i = 0; i < n_coeffs; i += 16) {
int y;
int nzflag_cnt = 0;
int abs_coeff[16];
int coeff_sign[16];
// count nzflag for each row (16 tran_low_t)
for (y = 0; y < 16; ++y) {
const int rc = i + y;
const int coeff = coeff_ptr[rc];
coeff_sign[y] = (coeff >> 31);
abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y];
// The first 16 are skipped in the sse2 code. Do the same here to match.
if (i >= 16 && (abs_coeff[y] <= thr)) {
nzflag_cnt++;
}
}
for (y = 0; y < 16; ++y) {
const int rc = i + y;
// If all of the AC coeffs in a row has magnitude less than the
// quantization step_size/2, quantize to zero.
if (nzflag_cnt < 16) {
int tmp =
clamp(abs_coeff[y] + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y];
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
} else {
qcoeff_ptr[rc] = 0;
dqcoeff_ptr[rc] = 0;
}
}
}
// Scan for eob.
for (i = 0; i < n_coeffs; i++) {
// Use the scan order to find the correct eob.
const int rc = scan[i];
if (qcoeff_ptr[rc]) {
eob = i;
}
}
*eob_ptr = eob + 1;
}
void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
int16_t *quant, int16_t *quant_shift,
int16_t *dequant, int16_t *round_fp,
int16_t *quant_fp) {
// Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
const int max_qrounding_factor_fp = 64;
int16_t *dequant) {
for (int j = 0; j < 2; j++) {
// The range is 4 to 1828 in the VP9 tables.
const int qlookup = rnd->RandRange(1825) + 4;
round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7;
quant_fp[j] = (1 << 16) / qlookup;
// Values determined by deconstructing vp9_init_quantizer().
// zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
// values or U/V values of any bit depth. This is because y_delta is not
// factored into the vp9_ac_quant() call.
zbin[j] = rnd->RandRange(1200);
// round may be up to 685 for Y values or 914 for U/V.
round[j] = rnd->RandRange(914);
// quant ranges from 1 to -32703
@@ -225,8 +141,6 @@ void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
}
for (int j = 2; j < 8; j++) {
zbin[j] = zbin[1];
round_fp[j] = round_fp[1];
quant_fp[j] = quant_fp[1];
round[j] = round[1];
quant[j] = quant[1];
quant_shift[j] = quant_shift[1];
@@ -265,19 +179,19 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
const int count = (4 << sz) * (4 << sz);
coeff.Set(&rnd, -max_value_, max_value_);
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
quant_fp_ptr_);
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
scan_order->scan, scan_order->iscan);
quant_shift_ptr_, dequant_ptr_);
ASM_REGISTER_STATE_CHECK(quantize_op_(
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
round_ptr_, quant_ptr_, quant_shift_ptr_,
ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
dequant_ptr_, &ref_eob, scan_order->scan,
scan_order->iscan);
ASM_REGISTER_STATE_CHECK(
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
round_ptr_, quant_ptr_, quant_shift_ptr_,
qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
@@ -327,19 +241,19 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
coeff.TopLeftPixel()[rnd(count)] =
static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
quant_fp_ptr_);
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
scan_order->scan, scan_order->iscan);
quant_shift_ptr_, dequant_ptr_);
ASM_REGISTER_STATE_CHECK(quantize_op_(
coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
round_ptr_, quant_ptr_, quant_shift_ptr_,
ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
dequant_ptr_, &ref_eob, scan_order->scan,
scan_order->iscan);
ASM_REGISTER_STATE_CHECK(
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
round_ptr_, quant_ptr_, quant_shift_ptr_,
qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
@@ -385,10 +299,7 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
const int count = (4 << sz) * (4 << sz);
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
quant_fp_ptr_);
int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
quant_shift_ptr_, dequant_ptr_);
if (i == 0) {
// When |coeff values| are less than zbin the results are 0.
@@ -408,10 +319,10 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
vpx_usec_timer timer;
vpx_usec_timer_start(&timer);
for (int j = 0; j < 100000000 / count; ++j) {
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(),
dqcoeff.TopLeftPixel(), dequant_ptr_, &eob,
scan_order->scan, scan_order->iscan);
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
round_ptr_, quant_ptr_, quant_shift_ptr_,
qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
dequant_ptr_, &eob, scan_order->scan, scan_order->iscan);
}
vpx_usec_timer_mark(&timer);
const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
@@ -434,55 +345,50 @@ INSTANTIATE_TEST_CASE_P(
SSE2, VP9QuantizeTest,
::testing::Values(
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
VPX_BITS_8, 16, false),
VPX_BITS_8, 16),
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
VPX_BITS_10, 16, false),
VPX_BITS_10, 16),
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
VPX_BITS_12, 16, false),
VPX_BITS_12, 16),
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false),
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32),
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false),
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32),
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false)));
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32)));
#else
INSTANTIATE_TEST_CASE_P(
SSE2, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c,
VPX_BITS_8, 16, false),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
16, true)));
INSTANTIATE_TEST_CASE_P(SSE2, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_sse2,
&vpx_quantize_b_c,
VPX_BITS_8, 16)));
#endif // CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
DISABLED_SSE2, VP9QuantizeTest,
::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
16)));
#endif // HAVE_SSE2
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(
SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
VPX_BITS_8, 16, false),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
16, true)));
#else
INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_ssse3,
&vpx_quantize_b_c,
VPX_BITS_8, 16, false)));
#endif
VPX_BITS_8, 16)));
#if ARCH_X86_64
// TODO(johannkoenig): SSSE3 optimizations do not yet pass this test.
INSTANTIATE_TEST_CASE_P(
DISABLED_SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_32x32_ssse3,
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
false),
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
16),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
VPX_BITS_8, 32, true)));
VPX_BITS_8, 32)));
#endif // ARCH_X86_64
#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
@@ -492,43 +398,36 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
AVX, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c,
VPX_BITS_8, 16, false),
VPX_BITS_8, 16),
// Even though SSSE3 and AVX do not match the reference
// code, we can keep them in sync with each other.
make_tuple(&vpx_quantize_b_32x32_avx,
&vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32,
false)));
&vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32)));
#endif // HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH
// TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
NEON, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c,
VPX_BITS_8, 16, false),
make_tuple(&vpx_quantize_b_32x32_neon,
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
false),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
16, true),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
VPX_BITS_8, 32, true)));
::testing::Values(
make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c, VPX_BITS_8, 16),
make_tuple(&vpx_quantize_b_32x32_neon, &vpx_quantize_b_32x32_c,
VPX_BITS_8, 32),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32)));
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
// Only useful to compare "Speed" test results.
INSTANTIATE_TEST_CASE_P(
DISABLED_C, VP9QuantizeTest,
::testing::Values(
make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16, false),
make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16),
make_tuple(&vpx_quantize_b_32x32_c, &vpx_quantize_b_32x32_c, VPX_BITS_8,
32, false),
32),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
make_tuple(&QuantFPWrapper<quantize_fp_nz_c>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16),
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
true)));
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32)));
} // namespace

72
tools/all_builds.py Executable file
View File

@@ -0,0 +1,72 @@
#!/usr/bin/python
import getopt
import subprocess
import sys
LONG_OPTIONS = ["shard=", "shards="]
BASE_COMMAND = "./configure --enable-internal-stats --enable-experimental"
def RunCommand(command):
run = subprocess.Popen(command, shell=True)
output = run.communicate()
if run.returncode:
print "Non-zero return code: " + str(run.returncode) + " => exiting!"
sys.exit(1)
def list_of_experiments():
experiments = []
configure_file = open("configure")
list_start = False
for line in configure_file.read().split("\n"):
if line == 'EXPERIMENT_LIST="':
list_start = True
elif line == '"':
list_start = False
elif list_start:
currently_broken = ["csm"]
experiment = line[4:]
if experiment not in currently_broken:
experiments.append(experiment)
return experiments
def main(argv):
# Parse arguments
options = {"--shard": 0, "--shards": 1}
if "--" in argv:
opt_end_index = argv.index("--")
else:
opt_end_index = len(argv)
try:
o, _ = getopt.getopt(argv[1:opt_end_index], None, LONG_OPTIONS)
except getopt.GetoptError, err:
print str(err)
print "Usage: %s [--shard=<n> --shards=<n>] -- [configure flag ...]"%argv[0]
sys.exit(2)
options.update(o)
extra_args = argv[opt_end_index + 1:]
# Shard experiment list
shard = int(options["--shard"])
shards = int(options["--shards"])
experiments = list_of_experiments()
base_command = " ".join([BASE_COMMAND] + extra_args)
configs = [base_command]
configs += ["%s --enable-%s" % (base_command, e) for e in experiments]
my_configs = zip(configs, range(len(configs)))
my_configs = filter(lambda x: x[1] % shards == shard, my_configs)
my_configs = [e[0] for e in my_configs]
# Run configs for this shard
for config in my_configs:
test_build(config)
def test_build(configure_command):
print "\033[34m\033[47mTesting %s\033[0m" % (configure_command)
RunCommand(configure_command)
RunCommand("make clean")
RunCommand("make")
if __name__ == "__main__":
main(sys.argv)

15
tools/author_first_release.sh Executable file
View File

@@ -0,0 +1,15 @@
#!/bin/bash
##
## List the release each author first contributed to.
##
## Usage: author_first_release.sh [TAGS]
##
## If the TAGS arguments are unspecified, all tags reported by `git tag`
## will be considered.
##
tags=${@:-$(git tag)}
for tag in $tags; do
git shortlog -n -e -s $tag |
cut -f2- |
awk "{print \"${tag#v}\t\"\$0}"
done | sort -k2 | uniq -f2

158
tools/ftfy.sh Executable file
View File

@@ -0,0 +1,158 @@
#!/bin/sh
self="$0"
dirname_self=$(dirname "$self")
usage() {
cat <<EOF >&2
Usage: $self [option]
This script applies a whitespace transformation to the commit at HEAD. If no
options are given, then the modified files are left in the working tree.
Options:
-h, --help Shows this message
-n, --dry-run Shows a diff of the changes to be made.
--amend Squashes the changes into the commit at HEAD
This option will also reformat the commit message.
--commit Creates a new commit containing only the whitespace changes
--msg-only Reformat the commit message only, ignore the patch itself.
EOF
rm -f ${CLEAN_FILES}
exit 1
}
log() {
echo "${self##*/}: $@" >&2
}
vpx_style() {
for f; do
case "$f" in
*.h|*.c|*.cc)
clang-format -i --style=file "$f"
;;
esac
done
}
apply() {
[ $INTERSECT_RESULT -ne 0 ] && patch -p1 < "$1"
}
commit() {
LAST_CHANGEID=$(git show | awk '/Change-Id:/{print $2}')
if [ -z "$LAST_CHANGEID" ]; then
log "HEAD doesn't have a Change-Id, unable to generate a new commit"
exit 1
fi
# Build a deterministic Change-Id from the parent's
NEW_CHANGEID=${LAST_CHANGEID}-styled
NEW_CHANGEID=I$(echo $NEW_CHANGEID | git hash-object --stdin)
# Commit, preserving authorship from the parent commit.
git commit -a -C HEAD > /dev/null
git commit --amend -F- << EOF
Cosmetic: Fix whitespace in change ${LAST_CHANGEID:0:9}
Change-Id: ${NEW_CHANGEID}
EOF
}
show_commit_msg_diff() {
if [ $DIFF_MSG_RESULT -ne 0 ]; then
log "Modified commit message:"
diff -u "$ORIG_COMMIT_MSG" "$NEW_COMMIT_MSG" | tail -n +3
fi
}
amend() {
show_commit_msg_diff
if [ $DIFF_MSG_RESULT -ne 0 ] || [ $INTERSECT_RESULT -ne 0 ]; then
git commit -a --amend -F "$NEW_COMMIT_MSG"
fi
}
diff_msg() {
git log -1 --format=%B > "$ORIG_COMMIT_MSG"
"${dirname_self}"/wrap-commit-msg.py \
< "$ORIG_COMMIT_MSG" > "$NEW_COMMIT_MSG"
cmp -s "$ORIG_COMMIT_MSG" "$NEW_COMMIT_MSG"
DIFF_MSG_RESULT=$?
}
# Temporary files
ORIG_DIFF=orig.diff.$$
MODIFIED_DIFF=modified.diff.$$
FINAL_DIFF=final.diff.$$
ORIG_COMMIT_MSG=orig.commit-msg.$$
NEW_COMMIT_MSG=new.commit-msg.$$
CLEAN_FILES="${ORIG_DIFF} ${MODIFIED_DIFF} ${FINAL_DIFF}"
CLEAN_FILES="${CLEAN_FILES} ${ORIG_COMMIT_MSG} ${NEW_COMMIT_MSG}"
# Preconditions
[ $# -lt 2 ] || usage
if ! clang-format -version >/dev/null 2>&1; then
log "clang-format not found"
exit 1
fi
if ! git diff --quiet HEAD; then
log "Working tree is dirty, commit your changes first"
exit 1
fi
# Need to be in the root
cd "$(git rev-parse --show-toplevel)"
# Collect the original diff
git show > "${ORIG_DIFF}"
# Apply the style guide on new and modified files and collect its diff
for f in $(git diff HEAD^ --name-only -M90 --diff-filter=AM); do
case "$f" in
third_party/*) continue;;
esac
vpx_style "$f"
done
git diff --no-color --no-ext-diff > "${MODIFIED_DIFF}"
# Intersect the two diffs
"${dirname_self}"/intersect-diffs.py \
"${ORIG_DIFF}" "${MODIFIED_DIFF}" > "${FINAL_DIFF}"
INTERSECT_RESULT=$?
git reset --hard >/dev/null
# Fixup the commit message
diff_msg
# Handle options
if [ -n "$1" ]; then
case "$1" in
-h|--help) usage;;
-n|--dry-run) cat "${FINAL_DIFF}"; show_commit_msg_diff;;
--commit) apply "${FINAL_DIFF}"; commit;;
--amend) apply "${FINAL_DIFF}"; amend;;
--msg-only) amend;;
*) usage;;
esac
else
apply "${FINAL_DIFF}"
if ! git diff --quiet; then
log "Formatting changes applied, verify and commit."
log "See also: http://www.webmproject.org/code/contribute/conventions/"
git diff --stat
fi
fi
rm -f ${CLEAN_FILES}

View File

@@ -461,87 +461,96 @@ void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr,
);
}
/* clang-format off */
#define VP8_MBLOOP_HPSRAB \
"punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" \
"punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" \
"psrah %[ftmp10], %[ftmp10], %[ftmp9] \n\t" \
"psrah %[ftmp11], %[ftmp11], %[ftmp9] \n\t" \
"packsshb %[ftmp0], %[ftmp10], %[ftmp11] \n\t"
"xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" \
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" \
"psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t" \
"psrah %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
"packsshb %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
#define VP8_MBLOOP_HPSRAB_ADD(reg) \
"punpcklbh %[ftmp1], %[ftmp0], %[ftmp12] \n\t" \
"punpckhbh %[ftmp2], %[ftmp0], %[ftmp12] \n\t" \
"pmulhh %[ftmp1], %[ftmp1], " #reg " \n\t" \
"pmulhh %[ftmp2], %[ftmp2], " #reg " \n\t" \
"paddh %[ftmp1], %[ftmp1], %[ff_ph_003f] \n\t" \
"paddh %[ftmp2], %[ftmp2], %[ff_ph_003f] \n\t" \
"psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t" \
"psrah %[ftmp2], %[ftmp2], %[ftmp9] \n\t" \
"packsshb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
/* clang-format on */
#define VP8_MBLOOP_HPSRAB_PMULHH(reg1, reg2) \
"pmulhh " #reg1 ", " #reg1 ", " #reg2 " \n\t"
#define VP8_MBLOOP_HPSRAB_ADD(reg) \
"xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" \
"punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" \
VP8_MBLOOP_HPSRAB_PMULHH(%[ftmp3], reg) \
VP8_MBLOOP_HPSRAB_PMULHH(%[ftmp8], reg) \
"paddh %[ftmp3], %[ftmp3], %[ff_ph_003f] \n\t" \
"paddh %[ftmp8], %[ftmp8], %[ff_ph_003f] \n\t" \
"psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t" \
"psrah %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
"packsshb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
void vp8_mbloop_filter_horizontal_edge_mmi(
unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit,
const unsigned char *limit, const unsigned char *thresh, int count) {
uint32_t tmp[1];
double ftmp[13];
mips_reg addr[2];
DECLARE_ALIGNED(8, const uint64_t, srct[1]);
double ftmp[10];
__asm__ volatile (
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
"1: \n\t"
"gsldlc1 %[ftmp9], 0x07(%[limit]) \n\t"
"gsldrc1 %[ftmp9], 0x00(%[limit]) \n\t"
/* ftmp1: p3 */
"gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
/* ftmp3: p2 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp3], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp3], 0x00(%[src_ptr]) \n\t"
/* ftmp4: p1 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp4], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp4], 0x00(%[src_ptr]) \n\t"
/* ftmp5: p0 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
/* ftmp6: q0 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
/* ftmp7: q1 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
/* ftmp8: q2 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
/* ftmp2: q3 */
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp2], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp2], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[ftmp12], 0x07(%[blimit]) \n\t"
"gsldrc1 %[ftmp12], 0x00(%[blimit]) \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step])
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[addr1], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp1], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp1], 0x00(%[addr1]) \n\t"
MMI_SUBU(%[addr1], %[addr0], %[tmp0])
"gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp0], %[ftmp1], %[ftmp3] \n\t"
"psubusb %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
/* ftmp4:p1 */
MMI_SLL(%[tmp0], %[src_pixel_step], 0x01)
MMI_SUBU(%[addr1], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp1], %[ftmp3], %[ftmp4] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
"pasubub %[ftmp10], %[ftmp4], %[ftmp5] \n\t"
"psubusb %[ftmp1], %[ftmp10], %[ftmp9] \n\t"
/* ftmp5:p0 */
MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp5], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp5], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp1], %[ftmp4], %[ftmp5] \n\t"
"sdc1 %[ftmp1], 0x00(%[srct]) \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
"pasubub %[ftmp11], %[ftmp7], %[ftmp6] \n\t"
"psubusb %[ftmp1], %[ftmp11], %[ftmp9] \n\t"
/* ftmp6:q0 */
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
/* ftmp7:q1 */
"gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t"
"pasubub %[ftmp1], %[ftmp7], %[ftmp6] \n\t"
"sdc1 %[ftmp1], 0x08(%[srct]) \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
MMI_ADDU(%[addr1], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp8], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp1], %[ftmp8], %[ftmp7] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
MMI_ADDU(%[addr1], %[addr0], %[tmp0])
"gsldlc1 %[ftmp2], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp2], 0x00(%[addr1]) \n\t"
"pasubub %[ftmp1], %[ftmp2], %[ftmp8] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
@@ -554,7 +563,9 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"mtc1 %[tmp0], %[ftmp9] \n\t"
"psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp12] \n\t"
"gsldlc1 %[ftmp9], 0x07(%[blimit]) \n\t"
"gsldrc1 %[ftmp9], 0x00(%[blimit]) \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
"xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
/* ftmp0: mask */
@@ -562,26 +573,29 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"gsldlc1 %[ftmp9], 0x07(%[thresh]) \n\t"
"gsldrc1 %[ftmp9], 0x00(%[thresh]) \n\t"
"psubusb %[ftmp1], %[ftmp10], %[ftmp9] \n\t"
"psubusb %[ftmp2], %[ftmp11], %[ftmp9] \n\t"
"ldc1 %[ftmp1], 0x00(%[srct]) \n\t"
"psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
"ldc1 %[ftmp2], 0x08(%[srct]) \n\t"
"psubusb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
"pcmpeqb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"pcmpeqb %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
/* ftmp1: hev */
/* ftmp1:hev*/
"xor %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
"xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t"
"xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t"
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
"psubsb %[ftmp2], %[ftmp4], %[ftmp7] \n\t"
"psubsb %[ftmp9], %[ftmp6], %[ftmp5] \n\t"
"paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
"and %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
"pandn %[ftmp12], %[ftmp1], %[ftmp2] \n\t"
"sdc1 %[ftmp2], 0x00(%[srct]) \n\t"
"and %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
"li %[tmp0], 0x0b \n\t"
@@ -592,71 +606,75 @@ void vp8_mbloop_filter_horizontal_edge_mmi(
"paddsb %[ftmp0], %[ftmp2], %[ff_pb_04] \n\t"
VP8_MBLOOP_HPSRAB
"psubsb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
"ldc1 %[ftmp2], 0x00(%[srct]) \n\t"
"pandn %[ftmp2], %[ftmp1], %[ftmp2] \n\t"
"li %[tmp0], 0x07 \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t"
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1b00])
"psubsb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"paddsb %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
"psubsb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"paddsb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
"xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t"
"xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
"gssdlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp5], 0x07(%[addr1]) \n\t"
"gssdrc1 %[ftmp5], 0x00(%[addr1]) \n\t"
"gssdlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1200])
"paddsb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
"psubsb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
"paddsb %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
"psubsb %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp4], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp4], 0x00(%[src_ptr]) \n\t"
"gssdlc1 %[ftmp7], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp7], 0x00(%[addr0]) \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x01)
MMI_SUBU(%[addr1], %[src_ptr], %[tmp0])
"gssdlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
"gssdrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_0900])
"xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
"xor %[ftmp8], %[ftmp8], %[ff_pb_80] \n\t"
"paddsb %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
"psubsb %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
"xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
"xor %[ftmp8], %[ftmp8], %[ff_pb_80] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0])
"gssdlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0])
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp3], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp3], 0x00(%[src_ptr]) \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[addr1], %[addr0], %[tmp0])
"gsldlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step])
"gsldlc1 %[ftmp7], 0x07(%[addr1]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[addr1]) \n\t"
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
"paddsb %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
"psubsb %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
"xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
"xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t"
MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp7], 0x07(%[addr1]) \n\t"
"gssdrc1 %[ftmp7], 0x00(%[addr1]) \n\t"
MMI_SUBU(%[addr1], %[addr0], %[tmp0])
"gssdlc1 %[ftmp4], 0x07(%[addr1]) \n\t"
"gssdrc1 %[ftmp4], 0x00(%[addr1]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
MMI_ADDIU(%[src_ptr], %[src_ptr], 0x08)
"addiu %[count], %[count], -0x01 \n\t"
MMI_ADDIU(%[src_ptr], %[src_ptr], 0x08)
"bnez %[count], 1b \n\t"
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]),
[src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
: [limit]"r"(limit), [blimit]"r"(blimit),
[thresh]"r"(thresh),
[tmp0]"=&r"(tmp[0]),
[addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
[src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
: [limit]"r"(limit), [blimit]"r"(blimit),
[srct]"r"(srct), [thresh]"r"(thresh),
[src_pixel_step]"r"((mips_reg)src_pixel_step),
[ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80),
[ff_pb_04]"f"(ff_pb_04), [ff_pb_03]"f"(ff_pb_03),
[ff_ph_0900]"f"(ff_ph_0900), [ff_ph_1b00]"f"(ff_ph_1b00),
[ff_ph_1200]"f"(ff_ph_1200), [ff_ph_003f]"f"(ff_ph_003f)
[ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80),
[ff_pb_04]"f"(ff_pb_04), [ff_pb_03]"f"(ff_pb_03),
[ff_ph_0900]"f"(ff_ph_0900), [ff_ph_1b00]"f"(ff_ph_1b00),
[ff_ph_1200]"f"(ff_ph_1200), [ff_ph_003f]"f"(ff_ph_003f)
: "memory"
);
}
@@ -678,60 +696,64 @@ void vp8_mbloop_filter_vertical_edge_mmi(
unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit,
const unsigned char *limit, const unsigned char *thresh, int count) {
mips_reg tmp[1];
mips_reg addr[2];
DECLARE_ALIGNED(8, const uint64_t, srct[1]);
double ftmp[14];
double ftmp[13];
__asm__ volatile (
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0])
MMI_SUBU(%[src_ptr], %[src_ptr], 0x04)
"1: \n\t"
"gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
MMI_SLL (%[tmp0], %[src_pixel_step], 0x01)
MMI_ADDU(%[addr0], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
"punpcklbh %[ftmp1], %[ftmp11], %[ftmp12] \n\t"
"punpckhbh %[ftmp2], %[ftmp11], %[ftmp12] \n\t"
"punpcklbh %[ftmp11], %[ftmp5], %[ftmp6] \n\t"
"punpckhbh %[ftmp12], %[ftmp5], %[ftmp6] \n\t"
"punpcklbh %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
"punpckhbh %[ftmp10], %[ftmp7], %[ftmp8] \n\t"
"gsldlc1 %[ftmp11], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp11], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
"punpcklbh %[ftmp3], %[ftmp11], %[ftmp12] \n\t"
"punpckhbh %[ftmp4], %[ftmp11], %[ftmp12] \n\t"
"punpcklhw %[ftmp1], %[ftmp12], %[ftmp10] \n\t"
"punpckhhw %[ftmp2], %[ftmp12], %[ftmp10] \n\t"
"punpcklhw %[ftmp3], %[ftmp11], %[ftmp9] \n\t"
"punpckhhw %[ftmp4], %[ftmp11], %[ftmp9] \n\t"
"punpcklhw %[ftmp5], %[ftmp4], %[ftmp2] \n\t"
"punpckhhw %[ftmp6], %[ftmp4], %[ftmp2] \n\t"
"punpcklhw %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
"punpckhhw %[ftmp8], %[ftmp3], %[ftmp1] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x01)
MMI_SUBU(%[addr0], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
MMI_SUBU(%[addr0], %[src_ptr], %[src_pixel_step])
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
"punpcklbh %[ftmp9], %[ftmp11], %[ftmp12] \n\t"
"punpckhbh %[ftmp10], %[ftmp11], %[ftmp12] \n\t"
"punpcklbh %[ftmp11], %[ftmp5], %[ftmp6] \n\t"
"punpckhbh %[ftmp12], %[ftmp5], %[ftmp6] \n\t"
"punpcklbh %[ftmp9], %[ftmp7], %[ftmp8] \n\t"
"punpckhbh %[ftmp10], %[ftmp7], %[ftmp8] \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x02)
MMI_SUBU(%[addr0], %[src_ptr], %[tmp0])
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
"gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t"
"punpcklbh %[ftmp0], %[ftmp11], %[ftmp12] \n\t"
"punpckhbh %[ftmp11], %[ftmp11], %[ftmp12] \n\t"
"punpcklhw %[ftmp5], %[ftmp12], %[ftmp10] \n\t"
"punpckhhw %[ftmp6], %[ftmp12], %[ftmp10] \n\t"
"punpcklhw %[ftmp7], %[ftmp11], %[ftmp9] \n\t"
"punpckhhw %[ftmp8], %[ftmp11], %[ftmp9] \n\t"
"punpcklhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t"
"punpckhhw %[ftmp2], %[ftmp11], %[ftmp10] \n\t"
"punpcklhw %[ftmp3], %[ftmp0], %[ftmp9] \n\t"
"punpckhhw %[ftmp4], %[ftmp0], %[ftmp9] \n\t"
"gsldlc1 %[ftmp13], 0x07(%[limit]) \n\t"
"gsldrc1 %[ftmp13], 0x00(%[limit]) \n\t"
/* ftmp9:q0 ftmp10:q1 */
"punpcklwd %[ftmp9], %[ftmp1], %[ftmp5] \n\t"
"punpckhwd %[ftmp10], %[ftmp1], %[ftmp5] \n\t"
@@ -749,61 +771,60 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"punpcklwd %[ftmp5], %[ftmp4], %[ftmp8] \n\t"
"punpckhwd %[ftmp6], %[ftmp4], %[ftmp8] \n\t"
"gsldlc1 %[ftmp8], 0x07(%[limit]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[limit]) \n\t"
/* abs (q3-q2) */
"pasubub %[ftmp7], %[ftmp12], %[ftmp11] \n\t"
"psubusb %[ftmp0], %[ftmp7], %[ftmp13] \n\t"
"psubusb %[ftmp0], %[ftmp7], %[ftmp8] \n\t"
/* abs (q2-q1) */
"pasubub %[ftmp7], %[ftmp11], %[ftmp10] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* ftmp3: abs(q1-q0) */
"pasubub %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
"psubusb %[ftmp7], %[ftmp3], %[ftmp13] \n\t"
"psubusb %[ftmp7], %[ftmp3], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* ftmp4: abs(p1-p0) */
"pasubub %[ftmp4], %[ftmp5], %[ftmp6] \n\t"
"psubusb %[ftmp7], %[ftmp4], %[ftmp13] \n\t"
"psubusb %[ftmp7], %[ftmp4], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* abs (p2-p1) */
"pasubub %[ftmp7], %[ftmp2], %[ftmp5] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
/* abs (p3-p2) */
"pasubub %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
"psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
"gsldlc1 %[ftmp13], 0x07(%[blimit]) \n\t"
"gsldrc1 %[ftmp13], 0x00(%[blimit]) \n\t"
"gsldlc1 %[ftmp7], 0x07(%[thresh]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[thresh]) \n\t"
/* abs (p0-q0) * 2 */
/* abs (p0-q0) */
"pasubub %[ftmp1], %[ftmp9], %[ftmp6] \n\t"
"paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
/* abs (p1-q1) / 2 */
/* abs (p1-q1) */
"pasubub %[ftmp12], %[ftmp10], %[ftmp5] \n\t"
"and %[ftmp12], %[ftmp12], %[ff_pb_fe] \n\t"
"li %[tmp0], 0x01 \n\t"
"mtc1 %[tmp0], %[ftmp8] \n\t"
"psrlh %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
"paddusb %[ftmp12], %[ftmp1], %[ftmp12] \n\t"
"psubusb %[ftmp12], %[ftmp12], %[ftmp13] \n\t"
"gsldlc1 %[ftmp8], 0x07(%[blimit]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[blimit]) \n\t"
"psubusb %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
"or %[ftmp0], %[ftmp0], %[ftmp12] \n\t"
"xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t"
/* ftmp0: mask */
"pcmpeqb %[ftmp0], %[ftmp0], %[ftmp12] \n\t"
/* abs(p1-p0) - thresh */
"psubusb %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
/* abs(q1-q0) - thresh */
"psubusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
"gsldlc1 %[ftmp8], 0x07(%[thresh]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[thresh]) \n\t"
/* ftmp3: abs(q1-q0) ftmp4: abs(p1-p0) */
"psubusb %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
"psubusb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
"or %[ftmp3], %[ftmp4], %[ftmp3] \n\t"
"pcmpeqb %[ftmp3], %[ftmp3], %[ftmp12] \n\t"
"pcmpeqb %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
/* ftmp1: hev */
"xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t"
/* ftmp2:ps2, ftmp5:ps1, ftmp6:ps0, ftmp9:qs0, ftmp10:qs1, ftmp11:qs2 */
"xor %[ftmp11], %[ftmp11], %[ff_pb_80] \n\t"
"xor %[ftmp10], %[ftmp10], %[ff_pb_80] \n\t"
"xor %[ftmp9], %[ftmp9], %[ff_pb_80] \n\t"
@@ -816,30 +837,30 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
"paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
"paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
/* filter_value &= mask */
"and %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
/* Filter2 = filter_value & hev */
"and %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
/* filter_value &= ~hev */
"pandn %[ftmp0], %[ftmp1], %[ftmp0] \n\t"
"paddsb %[ftmp4], %[ftmp3], %[ff_pb_04] \n\t"
"li %[tmp0], 0x0b \n\t"
"mtc1 %[tmp0], %[ftmp12] \n\t"
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"punpcklbh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
"psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
"psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
"packsshb %[ftmp4], %[ftmp7], %[ftmp8] \n\t"
/* ftmp9: qs0 */
"psubsb %[ftmp9], %[ftmp9], %[ftmp4] \n\t"
"paddsb %[ftmp3], %[ftmp3], %[ff_pb_03] \n\t"
"xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
"xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
"punpcklbh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
"punpckhbh %[ftmp8], %[ftmp8], %[ftmp3] \n\t"
"psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
"psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
"packsshb %[ftmp3], %[ftmp7], %[ftmp8] \n\t"
/* ftmp6: ps0 */
"paddsb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"li %[tmp0], 0x07 \n\t"
@@ -851,10 +872,8 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"pmulhh %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
VP8_MBLOOP_VPSRAB_ADDT
"psubsb %[ftmp4], %[ftmp9], %[ftmp3] \n\t"
/* ftmp9: oq0 */
"xor %[ftmp9], %[ftmp4], %[ff_pb_80] \n\t"
"paddsb %[ftmp4], %[ftmp6], %[ftmp3] \n\t"
/* ftmp6: op0 */
"xor %[ftmp6], %[ftmp4], %[ff_pb_80] \n\t"
VP8_MBLOOP_VPSRAB_ADDH
@@ -863,10 +882,8 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"pmulhh %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
VP8_MBLOOP_VPSRAB_ADDT
"psubsb %[ftmp4], %[ftmp10], %[ftmp3] \n\t"
/* ftmp10: oq1 */
"xor %[ftmp10], %[ftmp4], %[ff_pb_80] \n\t"
"paddsb %[ftmp4], %[ftmp5], %[ftmp3] \n\t"
/* ftmp5: op1 */
"xor %[ftmp5], %[ftmp4], %[ff_pb_80] \n\t"
VP8_MBLOOP_VPSRAB_ADDH
@@ -874,10 +891,8 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"pmulhh %[ftmp8], %[ftmp8], %[ff_ph_0900] \n\t"
VP8_MBLOOP_VPSRAB_ADDT
"psubsb %[ftmp4], %[ftmp11], %[ftmp3] \n\t"
/* ftmp11: oq2 */
"xor %[ftmp11], %[ftmp4], %[ff_pb_80] \n\t"
"paddsb %[ftmp4], %[ftmp2], %[ftmp3] \n\t"
/* ftmp2: op2 */
"xor %[ftmp2], %[ftmp4], %[ff_pb_80] \n\t"
"ldc1 %[ftmp12], 0x00(%[srct]) \n\t"
@@ -901,40 +916,41 @@ void vp8_mbloop_filter_vertical_edge_mmi(
"punpcklhw %[ftmp10], %[ftmp1], %[ftmp3] \n\t"
"punpckhhw %[ftmp11], %[ftmp1], %[ftmp3] \n\t"
"punpcklwd %[ftmp0], %[ftmp7], %[ftmp11] \n\t"
"punpckhwd %[ftmp1], %[ftmp7], %[ftmp11] \n\t"
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
"punpcklwd %[ftmp0], %[ftmp6], %[ftmp10] \n\t"
"punpckhwd %[ftmp1], %[ftmp6], %[ftmp10] \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
"punpcklwd %[ftmp0], %[ftmp7], %[ftmp11] \n\t"
"punpckhwd %[ftmp1], %[ftmp7], %[ftmp11] \n\t"
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
MMI_ADDU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
"punpcklwd %[ftmp1], %[ftmp5], %[ftmp9] \n\t"
"punpckhwd %[ftmp0], %[ftmp5], %[ftmp9] \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[addr0], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
MMI_SUBU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
"punpcklwd %[ftmp1], %[ftmp4], %[ftmp8] \n\t"
"punpckhwd %[ftmp0], %[ftmp4], %[ftmp8] \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t"
MMI_SUBU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
MMI_SUBU(%[addr0], %[addr0], %[src_pixel_step])
"gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
"gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
"addiu %[count], %[count], -0x01 \n\t"
MMI_SLL(%[tmp0], %[src_pixel_step], 0x03)
@@ -946,9 +962,9 @@ void vp8_mbloop_filter_vertical_edge_mmi(
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
[tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr),
[count]"+&r"(count)
[ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]),
[addr0]"=&r"(addr[0]),
[src_ptr]"+&r"(src_ptr), [count]"+&r"(count)
: [limit]"r"(limit), [blimit]"r"(blimit),
[srct]"r"(srct), [thresh]"r"(thresh),
[src_pixel_step]"r"((mips_reg)src_pixel_step),

View File

@@ -86,7 +86,6 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
register double ftmp8 asm("$f18");
register double ftmp9 asm("$f20");
register double ftmp10 asm("$f22");
register double ftmp11 asm("$f24");
#else
register double fzero asm("$f0");
register double ftmp0 asm("$f1");
@@ -100,7 +99,6 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
register double ftmp8 asm("$f9");
register double ftmp9 asm("$f10");
register double ftmp10 asm("$f11");
register double ftmp11 asm("$f12");
#endif // _MIPS_SIM == _ABIO32
__asm__ volatile (
@@ -114,13 +112,11 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
"li %[tmp0], 0x07 \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t"
"li %[tmp0], 0x08 \n\t"
"mtc1 %[tmp0], %[ftmp11] \n\t"
"mtc1 %[tmp0], %[ftmp10] \n\t"
"1: \n\t"
"gsldlc1 %[ftmp9], 0x05(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp9], -0x02(%[src_ptr]) \n\t"
"gsldlc1 %[ftmp10], 0x06(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp10], -0x01(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp9], -0x02(%[src_ptr]) \n\t"
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp8], %[ftmp6], %[ftmp0] \n\t"
@@ -129,21 +125,24 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
"pmullh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
"gsldlc1 %[ftmp9], 0x06(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp9], -0x01(%[src_ptr]) \n\t"
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"punpckhbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
"punpckhbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
"dsrl %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
"punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t"
"dsrl %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
"punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
@@ -164,9 +163,8 @@ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr,
[ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6),
[ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8),
[ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10),
[ftmp11]"=&f"(ftmp11), [tmp0]"=&r"(tmp[0]),
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height),
[src_ptr]"+&r"(src_ptr)
[tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr),
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height)
: [src_pixels_per_line]"r"((mips_reg)src_pixels_per_line),
[vp8_filter]"r"(vp8_filter), [output_width]"r"(output_width),
[ff_ph_40]"f"(ff_ph_40)
@@ -192,11 +190,6 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
register double ftmp6 asm("$f14");
register double ftmp7 asm("$f16");
register double ftmp8 asm("$f18");
register double ftmp9 asm("$f20");
register double ftmp10 asm("$f22");
register double ftmp11 asm("$f24");
register double ftmp12 asm("$f26");
register double ftmp13 asm("$f28");
#else
register double fzero asm("$f0");
register double ftmp0 asm("$f1");
@@ -208,11 +201,6 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
register double ftmp6 asm("$f7");
register double ftmp7 asm("$f8");
register double ftmp8 asm("$f9");
register double ftmp9 asm("$f10");
register double ftmp10 asm("$f11");
register double ftmp11 asm("$f12");
register double ftmp12 asm("$f13");
register double ftmp13 asm("$f14");
#endif // _MIPS_SIM == _ABIO32
__asm__ volatile (
@@ -222,56 +210,52 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
"ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t"
"ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t"
"ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t"
MMI_SUBU(%[src_ptr], %[src_ptr], %[pixels_per_line_x2])
"xor %[fzero], %[fzero], %[fzero] \n\t"
"li %[tmp0], 0x07 \n\t"
"mtc1 %[tmp0], %[ftmp13] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t"
/* In order to make full use of memory load delay slot,
* Operation of memory loading and calculating has been rearranged.
*/
"1: \n\t"
"gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t"
"pmullh %[ftmp8], %[ftmp6], %[ftmp0] \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line])
"gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t"
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2])
"gsldlc1 %[ftmp8], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp8], 0x00(%[addr0]) \n\t"
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4])
"gsldlc1 %[ftmp9], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp9], 0x00(%[addr0]) \n\t"
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line])
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2])
"gsldlc1 %[ftmp10], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp10], 0x00(%[addr0]) \n\t"
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4])
"gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t"
"gsldlc1 %[ftmp6], 0x07(%[addr0]) \n\t"
"gsldrc1 %[ftmp6], 0x00(%[addr0]) \n\t"
"pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
"pmullh %[ftmp12], %[ftmp6], %[ftmp0] \n\t"
"pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp7] \n\t"
"pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp8] \n\t"
"pmullh %[ftmp9], %[ftmp9], %[ftmp4] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp9] \n\t"
"pmullh %[ftmp10], %[ftmp10], %[ftmp3] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp10] \n\t"
"pmullh %[ftmp11], %[ftmp11], %[ftmp5] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ftmp11] \n\t"
"paddsh %[ftmp12], %[ftmp12], %[ff_ph_40] \n\t"
"psrah %[ftmp12], %[ftmp12], %[ftmp13] \n\t"
"packushb %[ftmp12], %[ftmp12], %[fzero] \n\t"
"gsswlc1 %[ftmp12], 0x03(%[output_ptr]) \n\t"
"gsswrc1 %[ftmp12], 0x00(%[output_ptr]) \n\t"
"paddsh %[ftmp8], %[ftmp8], %[ff_ph_40] \n\t"
"psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
"packushb %[ftmp8], %[ftmp8], %[fzero] \n\t"
"gsswlc1 %[ftmp8], 0x03(%[output_ptr]) \n\t"
"gsswrc1 %[ftmp8], 0x00(%[output_ptr]) \n\t"
MMI_ADDIU(%[output_height], %[output_height], -0x01)
MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch])
@@ -281,11 +265,9 @@ static INLINE void vp8_filter_block1dc_v6_mmi(
[ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4),
[ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6),
[ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8),
[ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10),
[ftmp11]"=&f"(ftmp11), [ftmp12]"=&f"(ftmp12),
[ftmp13]"=&f"(ftmp13), [tmp0]"=&r"(tmp[0]),
[addr0]"=&r"(addr[0]), [src_ptr]"+&r"(src_ptr),
[output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height)
[tmp0]"=&r"(tmp[0]), [addr0]"=&r"(addr[0]),
[src_ptr]"+&r"(src_ptr), [output_ptr]"+&r"(output_ptr),
[output_height]"+&r"(output_height)
: [pixels_per_line]"r"((mips_reg)pixels_per_line),
[pixels_per_line_x2]"r"((mips_reg)(pixels_per_line<<1)),
[pixels_per_line_x4]"r"((mips_reg)(pixels_per_line<<2)),
@@ -319,7 +301,6 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
"1: \n\t"
"gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixels_per_line])
"punpcklbh %[ftmp1], %[ftmp0], %[fzero] \n\t"
"gssdlc1 %[ftmp1], 0x07(%[output_ptr]) \n\t"
@@ -327,6 +308,7 @@ static INLINE void vp8_filter_block1d_h6_filter0_mmi(
"addiu %[output_height], %[output_height], -0x01 \n\t"
MMI_ADDU(%[output_ptr], %[output_ptr], %[output_width])
MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixels_per_line])
"bnez %[output_height], 1b \n\t"
: [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0),
[ftmp1]"=&f"(ftmp1), [src_ptr]"+&r"(src_ptr),
@@ -356,12 +338,12 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
"1: \n\t"
"gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line])
MMI_ADDIU(%[output_height], %[output_height], -0x01)
"packushb %[ftmp1], %[ftmp0], %[fzero] \n\t"
"gsswlc1 %[ftmp1], 0x03(%[output_ptr]) \n\t"
"gsswrc1 %[ftmp1], 0x00(%[output_ptr]) \n\t"
MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line])
MMI_ADDIU(%[output_height], %[output_height], -0x01)
MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch])
"bnez %[output_height], 1b \n\t"
: [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0),
@@ -404,7 +386,7 @@ static INLINE void vp8_filter_block1dc_v6_filter0_mmi(
} \
} else { \
for (i = 0; i < loop; ++i) { \
vp8_filter_block1dc_v6_mmi(FData2 + i * 4, dst_ptr + i * 4, m, \
vp8_filter_block1dc_v6_mmi(FData2 + n * 2 + i * 4, dst_ptr + i * 4, m, \
dst_pitch, n * 2, VFilter); \
} \
} \

View File

@@ -1,13 +1,3 @@
##
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
## tree. An additional intellectual property rights grant can be found
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
sub vp8_common_forward_decls() {
print <<EOF
/*

View File

@@ -23,7 +23,6 @@
#include "modecosts.h"
#include "encodeintra.h"
#include "pickinter.h"
#include "vp8/common/common.h"
#include "vp8/common/entropymode.h"
#include "vp8/common/reconinter.h"
#include "vp8/common/reconintra.h"
@@ -960,13 +959,19 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
vp8_variance_fn_ptr_t *v_fn_ptr;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
ENTROPY_CONTEXT *ta_b;
ENTROPY_CONTEXT *tl_b;
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
vp8_zero(t_above_b);
vp8_zero(t_left_b);
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
ta_b = (ENTROPY_CONTEXT *)&t_above_b;
tl_b = (ENTROPY_CONTEXT *)&t_left_b;
br = 0;
bd = 0;
@@ -1146,13 +1151,13 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
mode_selected = this_mode;
best_label_rd = this_rd;
memcpy(&t_above_b, &t_above_s, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left_b, &t_left_s, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
}
} /*for each 4x4 mode*/
memcpy(&t_above, &t_above_b, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(&t_left, &t_left_b, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
bsi->ref_mv, x->mvcost);

View File

@@ -1,13 +1,3 @@
##
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
## tree. An additional intellectual property rights grant can be found
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
sub vp9_common_forward_decls() {
print <<EOF
/*

View File

@@ -189,12 +189,11 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx,
int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv,
int num_spatial_layers, int width, int lst_fb_idx, int gld_fb_idx,
int use_svc, int spatial_layer) {
int use_svc) {
const int sse_diff = (ctx->newmv_sse == UINT_MAX)
? 0
: ((int)ctx->zeromv_sse - (int)ctx->newmv_sse);
int frame;
int denoise_layer_idx = 0;
MACROBLOCKD *filter_mbd = &mb->e_mbd;
MODE_INFO *mi = filter_mbd->mi[0];
MODE_INFO saved_mi;
@@ -255,10 +254,6 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
frame = lst_fb_idx + 1;
else if (frame == GOLDEN_FRAME)
frame = gld_fb_idx + 1;
// Shift for the second spatial layer.
if (num_spatial_layers - spatial_layer == 2)
frame = frame + denoiser->num_ref_frames;
denoise_layer_idx = num_spatial_layers - spatial_layer - 1;
}
if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) {
@@ -294,21 +289,18 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col);
filter_mbd->plane[2].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;
filter_mbd->plane[0].dst.buf = block_start(
denoiser->mc_running_avg_y[denoise_layer_idx].y_buffer,
denoiser->mc_running_avg_y[denoise_layer_idx].y_stride, mi_row, mi_col);
filter_mbd->plane[0].dst.stride =
denoiser->mc_running_avg_y[denoise_layer_idx].y_stride;
filter_mbd->plane[1].dst.buf = block_start(
denoiser->mc_running_avg_y[denoise_layer_idx].u_buffer,
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col);
filter_mbd->plane[1].dst.stride =
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride;
filter_mbd->plane[2].dst.buf = block_start(
denoiser->mc_running_avg_y[denoise_layer_idx].v_buffer,
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col);
filter_mbd->plane[2].dst.stride =
denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride;
filter_mbd->plane[0].dst.buf =
block_start(denoiser->mc_running_avg_y.y_buffer,
denoiser->mc_running_avg_y.y_stride, mi_row, mi_col);
filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride;
filter_mbd->plane[1].dst.buf =
block_start(denoiser->mc_running_avg_y.u_buffer,
denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride;
filter_mbd->plane[2].dst.buf =
block_start(denoiser->mc_running_avg_y.v_buffer,
denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride;
set_ref_ptrs(cm, filter_mbd, saved_frame, NONE);
vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs);
@@ -332,17 +324,9 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
int zeromv_filter = 0;
VP9_DENOISER *denoiser = &cpi->denoiser;
VP9_DENOISER_DECISION decision = COPY_BLOCK;
const int shift =
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2
? denoiser->num_ref_frames
: 0;
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME + shift];
const int denoise_layer_index =
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id - 1;
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y[denoise_layer_index];
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
uint8_t *mc_avg_start =
block_start(mc_avg.y_buffer, mc_avg.y_stride, mi_row, mi_col);
struct buf_2d src = mb->plane[0].src;
@@ -397,7 +381,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
&cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,
motion_magnitude, is_skin, &zeromv_filter, consec_zeromv,
cpi->svc.number_spatial_layers, cpi->Source->y_width, cpi->lst_fb_idx,
cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id);
cpi->gld_fb_idx, cpi->use_svc);
if (decision == FILTER_BLOCK) {
decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start,
@@ -448,8 +432,7 @@ void vp9_denoiser_update_frame_info(
VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,
int svc_base_is_key, int second_spatial_layer) {
const int shift = second_spatial_layer ? denoiser->num_ref_frames : 0;
int svc_base_is_key) {
// Copy source into denoised reference buffers on KEY_FRAME or
// if the just encoded frame was resized. For SVC, copy source if the base
// spatial layer was key frame.
@@ -458,8 +441,8 @@ void vp9_denoiser_update_frame_info(
int i;
// Start at 1 so as not to overwrite the INTRA_FRAME
for (i = 1; i < denoiser->num_ref_frames; ++i) {
if (denoiser->running_avg_y[i + shift].buffer_alloc != NULL)
copy_frame(&denoiser->running_avg_y[i + shift], &src);
if (denoiser->running_avg_y[i].buffer_alloc != NULL)
copy_frame(&denoiser->running_avg_y[i], &src);
}
denoiser->reset = 0;
return;
@@ -468,29 +451,29 @@ void vp9_denoiser_update_frame_info(
// If more than one refresh occurs, must copy frame buffer.
if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > 1) {
if (refresh_alt_ref_frame) {
copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1 + shift],
&denoiser->running_avg_y[INTRA_FRAME + shift]);
copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME]);
}
if (refresh_golden_frame) {
copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1 + shift],
&denoiser->running_avg_y[INTRA_FRAME + shift]);
copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME]);
}
if (refresh_last_frame) {
copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1 + shift],
&denoiser->running_avg_y[INTRA_FRAME + shift]);
copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME]);
}
} else {
if (refresh_alt_ref_frame) {
swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1 + shift],
&denoiser->running_avg_y[INTRA_FRAME + shift]);
swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME]);
}
if (refresh_golden_frame) {
swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1 + shift],
&denoiser->running_avg_y[INTRA_FRAME + shift]);
swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME]);
}
if (refresh_last_frame) {
swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1 + shift],
&denoiser->running_avg_y[INTRA_FRAME + shift]);
swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1],
&denoiser->running_avg_y[INTRA_FRAME]);
}
}
}
@@ -539,90 +522,44 @@ static int vp9_denoiser_realloc_svc_helper(VP9_COMMON *cm,
}
int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser,
int svc_buf_shift, int refresh_alt,
int refresh_gld, int refresh_lst, int alt_fb_idx,
int gld_fb_idx, int lst_fb_idx) {
int refresh_alt, int refresh_gld, int refresh_lst,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx) {
int fail = 0;
if (refresh_alt) {
// Increase the frame buffer index by 1 to map it to the buffer index in the
// denoiser.
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
alt_fb_idx + 1 + svc_buf_shift);
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, alt_fb_idx + 1);
if (fail) return 1;
}
if (refresh_gld) {
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
gld_fb_idx + 1 + svc_buf_shift);
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, gld_fb_idx + 1);
if (fail) return 1;
}
if (refresh_lst) {
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
lst_fb_idx + 1 + svc_buf_shift);
fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, lst_fb_idx + 1);
if (fail) return 1;
}
return 0;
}
int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
int use_svc, int noise_sen, int width, int height,
int ssx, int ssy,
int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser,
int width, int height, int ssx, int ssy,
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth,
#endif
int border) {
int i, layer, fail, init_num_ref_frames;
int i, fail, init_num_ref_frames;
const int legacy_byte_alignment = 0;
int num_layers = 1;
int scaled_width = width;
int scaled_height = height;
if (use_svc) {
LAYER_CONTEXT *lc = &svc->layer_context[svc->spatial_layer_id *
svc->number_temporal_layers +
svc->temporal_layer_id];
get_layer_resolution(width, height, lc->scaling_factor_num,
lc->scaling_factor_den, &scaled_width, &scaled_height);
// For SVC: only denoise at most 2 spatial (highest) layers.
if (noise_sen >= 2)
// Denoise from one spatial layer below the top.
svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 2, 0);
else
// Only denoise the top spatial layer.
svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 1, 0);
num_layers = svc->number_spatial_layers - svc->first_layer_denoise;
}
assert(denoiser != NULL);
denoiser->num_ref_frames = use_svc ? SVC_REF_FRAMES : NONSVC_REF_FRAMES;
init_num_ref_frames = use_svc ? MAX_REF_FRAMES : NONSVC_REF_FRAMES;
denoiser->num_layers = num_layers;
CHECK_MEM_ERROR(cm, denoiser->running_avg_y,
vpx_calloc(denoiser->num_ref_frames * num_layers,
sizeof(denoiser->running_avg_y[0])));
CHECK_MEM_ERROR(
cm, denoiser->mc_running_avg_y,
vpx_calloc(num_layers, sizeof(denoiser->mc_running_avg_y[0])));
for (layer = 0; layer < num_layers; ++layer) {
const int denoise_width = (layer == 0) ? width : scaled_width;
const int denoise_height = (layer == 0) ? height : scaled_height;
for (i = 0; i < init_num_ref_frames; ++i) {
fail = vpx_alloc_frame_buffer(
&denoiser->running_avg_y[i + denoiser->num_ref_frames * layer],
denoise_width, denoise_height, ssx, ssy,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
#endif
border, legacy_byte_alignment);
if (fail) {
vp9_denoiser_free(denoiser);
return 1;
}
#ifdef OUTPUT_YUV_DENOISED
make_grayscale(&denoiser->running_avg_y[i]);
#endif
}
fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y[layer],
denoise_width, denoise_height, ssx, ssy,
cm, denoiser->running_avg_y,
vpx_calloc(denoiser->num_ref_frames, sizeof(denoiser->running_avg_y[0])));
for (i = 0; i < init_num_ref_frames; ++i) {
fail = vpx_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height,
ssx, ssy,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
#endif
@@ -631,10 +568,22 @@ int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
vp9_denoiser_free(denoiser);
return 1;
}
#ifdef OUTPUT_YUV_DENOISED
make_grayscale(&denoiser->running_avg_y[i]);
#endif
}
fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height, ssx,
ssy,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
#endif
border, legacy_byte_alignment);
if (fail) {
vp9_denoiser_free(denoiser);
return 1;
}
// denoiser->last_source only used for noise_estimation, so only for top
// layer.
fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height, ssx, ssy,
#if CONFIG_VP9_HIGHBITDEPTH
use_highbitdepth,
@@ -660,18 +609,12 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser) {
return;
}
denoiser->frame_buffer_initialized = 0;
for (i = 0; i < denoiser->num_ref_frames * denoiser->num_layers; ++i) {
for (i = 0; i < denoiser->num_ref_frames; ++i) {
vpx_free_frame_buffer(&denoiser->running_avg_y[i]);
}
vpx_free(denoiser->running_avg_y);
denoiser->running_avg_y = NULL;
for (i = 0; i < denoiser->num_layers; ++i) {
vpx_free_frame_buffer(&denoiser->mc_running_avg_y[i]);
}
vpx_free(denoiser->mc_running_avg_y);
denoiser->mc_running_avg_y = NULL;
vpx_free_frame_buffer(&denoiser->mc_running_avg_y);
vpx_free_frame_buffer(&denoiser->last_source);
}

View File

@@ -44,12 +44,11 @@ typedef enum vp9_denoiser_level {
typedef struct vp9_denoiser {
YV12_BUFFER_CONFIG *running_avg_y;
YV12_BUFFER_CONFIG *mc_running_avg_y;
YV12_BUFFER_CONFIG mc_running_avg_y;
YV12_BUFFER_CONFIG last_source;
int frame_buffer_initialized;
int reset;
int num_ref_frames;
int num_layers;
VP9_DENOISER_LEVEL denoising_level;
VP9_DENOISER_LEVEL prev_denoising_level;
} VP9_DENOISER;
@@ -67,13 +66,12 @@ typedef struct {
} VP9_PICKMODE_CTX_DEN;
struct VP9_COMP;
struct SVC;
void vp9_denoiser_update_frame_info(
VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,
int svc_base_is_key, int second_spatial_layer);
int svc_base_is_key);
void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,
@@ -86,13 +84,11 @@ void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse,
PICK_MODE_CONTEXT *ctx);
int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser,
int svc_buf_shift, int refresh_alt,
int refresh_gld, int refresh_lst, int alt_fb_idx,
int gld_fb_idx, int lst_fb_idx);
int refresh_alt, int refresh_gld, int refresh_lst,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx);
int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
int use_svc, int noise_sen, int width, int height,
int ssx, int ssy,
int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser,
int width, int height, int ssx, int ssy,
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth,
#endif

View File

@@ -437,37 +437,34 @@ static int is_psnr_calc_enabled(VP9_COMP *cpi) {
/* clang-format off */
const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
// sample rate size breadth bitrate cpb
{ LEVEL_1, 829440, 36864, 512, 200, 400, 2, 1, 4, 8 },
{ LEVEL_1_1, 2764800, 73728, 768, 800, 1000, 2, 1, 4, 8 },
{ LEVEL_2, 4608000, 122880, 960, 1800, 1500, 2, 1, 4, 8 },
{ LEVEL_2_1, 9216000, 245760, 1344, 3600, 2800, 2, 2, 4, 8 },
{ LEVEL_3, 20736000, 552960, 2048, 7200, 6000, 2, 4, 4, 8 },
{ LEVEL_3_1, 36864000, 983040, 2752, 12000, 10000, 2, 4, 4, 8 },
{ LEVEL_4, 83558400, 2228224, 4160, 18000, 16000, 4, 4, 4, 8 },
{ LEVEL_4_1, 160432128, 2228224, 4160, 30000, 18000, 4, 4, 5, 6 },
{ LEVEL_5, 311951360, 8912896, 8384, 60000, 36000, 6, 8, 6, 4 },
{ LEVEL_5_1, 588251136, 8912896, 8384, 120000, 46000, 8, 8, 10, 4 },
{ LEVEL_1, 829440, 36864, 200, 400, 2, 1, 4, 8 },
{ LEVEL_1_1, 2764800, 73728, 800, 1000, 2, 1, 4, 8 },
{ LEVEL_2, 4608000, 122880, 1800, 1500, 2, 1, 4, 8 },
{ LEVEL_2_1, 9216000, 245760, 3600, 2800, 2, 2, 4, 8 },
{ LEVEL_3, 20736000, 552960, 7200, 6000, 2, 4, 4, 8 },
{ LEVEL_3_1, 36864000, 983040, 12000, 10000, 2, 4, 4, 8 },
{ LEVEL_4, 83558400, 2228224, 18000, 16000, 4, 4, 4, 8 },
{ LEVEL_4_1, 160432128, 2228224, 30000, 18000, 4, 4, 5, 6 },
{ LEVEL_5, 311951360, 8912896, 60000, 36000, 6, 8, 6, 4 },
{ LEVEL_5_1, 588251136, 8912896, 120000, 46000, 8, 8, 10, 4 },
// TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
// they are finalized (currently tentative).
{ LEVEL_5_2, 1176502272, 8912896, 8384, 180000, 90000, 8, 8, 10, 4 },
{ LEVEL_6, 1176502272, 35651584, 16832, 180000, 90000, 8, 16, 10, 4 },
{ LEVEL_6_1, 2353004544u, 35651584, 16832, 240000, 180000, 8, 16, 10, 4 },
{ LEVEL_6_2, 4706009088u, 35651584, 16832, 480000, 360000, 8, 16, 10, 4 },
{ LEVEL_5_2, 1176502272, 8912896, 180000, 90000, 8, 8, 10, 4 },
{ LEVEL_6, 1176502272, 35651584, 180000, 90000, 8, 16, 10, 4 },
{ LEVEL_6_1, 2353004544u, 35651584, 240000, 180000, 8, 16, 10, 4 },
{ LEVEL_6_2, 4706009088u, 35651584, 480000, 360000, 8, 16, 10, 4 },
};
/* clang-format on */
static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] = {
"The average bit-rate is too high.",
"The picture size is too large.",
"The picture width/height is too large.",
"The luma sample rate is too large.",
"The CPB size is too large.",
"The compression ratio is too small",
"Too many column tiles are used.",
"The alt-ref distance is too small.",
"Too many reference buffers are used."
};
static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] =
{ "The average bit-rate is too high.",
"The picture size is too large.",
"The luma sample rate is too large.",
"The CPB size is too large.",
"The compression ratio is too small",
"Too many column tiles are used.",
"The alt-ref distance is too small.",
"Too many reference buffers are used." };
static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
switch (mode) {
@@ -569,8 +566,6 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
(double)this_level->max_luma_sample_rate *
(1 + SAMPLE_RATE_GRACE_P) ||
level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
level_spec->max_luma_picture_breadth >
this_level->max_luma_picture_breadth ||
level_spec->average_bitrate > this_level->average_bitrate ||
level_spec->max_cpb_size > this_level->max_cpb_size ||
level_spec->compression_ratio < this_level->compression_ratio ||
@@ -1224,8 +1219,8 @@ static void set_tile_limits(VP9_COMP *cpi) {
}
if (cpi->oxcf.target_level == LEVEL_AUTO) {
const int level_tile_cols =
log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
const uint32_t pic_size = cpi->common.width * cpi->common.height;
const int level_tile_cols = log_tile_cols_from_picsize_level(pic_size);
if (cm->log2_tile_cols > level_tile_cols) {
cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
}
@@ -1853,8 +1848,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_reset_resize(cpi);
rc->rc_1_frame = 0;
rc->rc_2_frame = 0;
}
if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
@@ -1865,24 +1858,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
(int)cpi->oxcf.target_bandwidth);
}
// Check for resetting the rc flags (rc_1_frame, rc_2_frame) if the
// configuration change has a large change in avg_frame_bandwidth.
// For SVC check for resetting based on spatial layer average bandwidth.
// Also reset buffer level to optimal level.
if (cm->current_video_frame > 0) {
if (cpi->use_svc) {
vp9_svc_check_reset_layer_rc_flag(cpi);
} else {
if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
rc->rc_1_frame = 0;
rc->rc_2_frame = 0;
rc->bits_off_target = rc->optimal_buffer_level;
rc->buffer_level = rc->optimal_buffer_level;
}
}
}
cpi->alt_ref_source = NULL;
rc->is_src_frame_alt_ref = 0;
@@ -2881,26 +2856,18 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
cpi->denoiser.denoising_level > kDenLowLow) {
int svc_base_is_key = 0;
int denoise_svc_second_layer = 0;
if (cpi->use_svc) {
int realloc_fail = 0;
const int svc_buf_shift =
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2
? cpi->denoiser.num_ref_frames
: 0;
int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
cpi->svc.temporal_layer_id,
cpi->svc.number_temporal_layers);
LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
svc_base_is_key = lc->is_key_frame;
denoise_svc_second_layer =
cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 ? 1
: 0;
// Check if we need to allocate extra buffers in the denoiser
// for
// Check if we need to allocate extra buffers in the denoiser for
// refreshed frames.
realloc_fail = vp9_denoiser_realloc_svc(
cm, &cpi->denoiser, svc_buf_shift, cpi->refresh_alt_ref_frame,
cm, &cpi->denoiser, cpi->refresh_alt_ref_frame,
cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx,
cpi->gld_fb_idx, cpi->lst_fb_idx);
if (realloc_fail)
@@ -2911,8 +2878,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
&cpi->denoiser, *cpi->Source, cpi->common.frame_type,
cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame,
cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx,
cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key,
denoise_svc_second_layer);
cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key);
}
#endif
if (is_one_pass_cbr_svc(cpi)) {
@@ -3347,9 +3313,8 @@ static void setup_denoiser_buffer(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
if (cpi->oxcf.noise_sensitivity > 0 &&
!cpi->denoiser.frame_buffer_initialized) {
if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
cpi->oxcf.noise_sensitivity, cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
if (vp9_denoiser_alloc(cm, cpi->use_svc, &cpi->denoiser, cm->width,
cm->height, cm->subsampling_x, cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth,
#endif
@@ -4864,7 +4829,6 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
int i, idx;
uint64_t luma_samples, dur_end;
const uint32_t luma_pic_size = cm->width * cm->height;
const uint32_t luma_pic_breadth = VPXMAX(cm->width, cm->height);
LevelConstraint *const level_constraint = &cpi->level_constraint;
const int8_t level_index = level_constraint->level_index;
double cpb_data_size;
@@ -4968,11 +4932,6 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
level_spec->max_luma_picture_size = luma_pic_size;
}
// update max_luma_picture_breadth
if (luma_pic_breadth > level_spec->max_luma_picture_breadth) {
level_spec->max_luma_picture_breadth = luma_pic_breadth;
}
// update compression_ratio
level_spec->compression_ratio = (double)level_stats->total_uncompressed_size *
cm->bit_depth /
@@ -4993,15 +4952,6 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]);
}
if (level_spec->max_luma_picture_breadth >
vp9_level_defs[level_index].max_luma_picture_breadth) {
level_constraint->fail_flag |= (1 << LUMA_PIC_BREADTH_TOO_LARGE);
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Failed to encode to the target level %d. %s",
vp9_level_defs[level_index].level,
level_fail_messages[LUMA_PIC_BREADTH_TOO_LARGE]);
}
if ((double)level_spec->max_luma_sample_rate >
(double)vp9_level_defs[level_index].max_luma_sample_rate *
(1 + SAMPLE_RATE_GRACE_P)) {
@@ -5202,6 +5152,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cm->intra_only = 0;
// if the flags indicate intra frame, but if the current picture is for
// non-zero spatial layer, it should not be an intra picture.
// TODO(Won Kap): this needs to change if per-layer intra frame is
// allowed.
if ((source->flags & VPX_EFLAG_FORCE_KF) &&
cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
@@ -5334,6 +5286,21 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
#endif // CONFIG_REALTIME_ONLY
#if 1
{
VP9_COMMON *const cm = &cpi->common;
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
printf("Frame=%d, gf_group_update_type[gf_group_index=%d]=%d, "
"show_frame=%d\n",
cm->current_video_frame, gf_group->index,
gf_group->update_type[gf_group->index],
cm->show_frame);
}
#endif // 0
if (cm->refresh_frame_context)
cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
@@ -5368,6 +5335,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
double samples = 0.0;
cpi->bytes += (int)(*size);
#if 1
{
printf("Frame %d: rate: %d\n",
cm->current_video_frame, (int)(*size));
}
#endif // 0
if (cm->show_frame) {
uint32_t bit_depth = 8;
uint32_t in_bit_depth = 8;
@@ -5397,6 +5371,19 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cpi->total_samples += psnr.samples[0];
samples = psnr.samples[0];
#if 1
{
const int rddiv = cpi->rd.RDDIV;
const int rdmult = cpi->rd.RDMULT;
const int64_t rdcost = RDCOST(
rdmult, rddiv, (int)(*size) * 8, psnr.sse[0]);
printf("Frame %d: distortion: %" PRIu64 " rdcost: %" PRId64 "\n",
cm->current_video_frame, psnr.sse[0], rdcost);
printf("%d %d\n", rddiv, rdmult);
}
#endif // 0
{
PSNR_STATS psnr2;
double frame_ssim2 = 0, weight = 0;

View File

@@ -383,7 +383,6 @@ typedef struct {
VP9_LEVEL level;
uint64_t max_luma_sample_rate;
uint32_t max_luma_picture_size;
uint32_t max_luma_picture_breadth;
double average_bitrate; // in kilobits per second
double max_cpb_size; // in kilobits
double compression_ratio;
@@ -423,15 +422,14 @@ typedef struct {
typedef enum {
BITRATE_TOO_LARGE = 0,
LUMA_PIC_SIZE_TOO_LARGE,
LUMA_PIC_BREADTH_TOO_LARGE,
LUMA_SAMPLE_RATE_TOO_LARGE,
CPB_TOO_LARGE,
COMPRESSION_RATIO_TOO_SMALL,
TOO_MANY_COLUMN_TILE,
ALTREF_DIST_TOO_SMALL,
TOO_MANY_REF_BUFFER,
TARGET_LEVEL_FAIL_IDS
LUMA_PIC_SIZE_TOO_LARGE = 1,
LUMA_SAMPLE_RATE_TOO_LARGE = 2,
CPB_TOO_LARGE = 3,
COMPRESSION_RATIO_TOO_SMALL = 4,
TOO_MANY_COLUMN_TILE = 5,
ALTREF_DIST_TOO_SMALL = 6,
TOO_MANY_REF_BUFFER = 7,
TARGET_LEVEL_FAIL_IDS = 8
} TARGET_LEVEL_FAIL_ID;
typedef struct {
@@ -870,7 +868,7 @@ static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) {
static INLINE int denoise_svc(const struct VP9_COMP *const cpi) {
return (!cpi->use_svc ||
(cpi->use_svc &&
cpi->svc.spatial_layer_id >= cpi->svc.first_layer_denoise));
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1));
}
#endif
@@ -922,14 +920,10 @@ static INLINE int get_level_index(VP9_LEVEL level) {
// Return the log2 value of max column tiles corresponding to the level that
// the picture size fits into.
static INLINE int log_tile_cols_from_picsize_level(uint32_t width,
uint32_t height) {
static INLINE int log_tile_cols_from_picsize_level(uint32_t pic_size) {
int i;
const uint32_t pic_size = width * height;
const uint32_t pic_breadth = VPXMAX(width, height);
for (i = LEVEL_1; i < LEVEL_MAX; ++i) {
if (vp9_level_defs[i].max_luma_picture_size >= pic_size &&
vp9_level_defs[i].max_luma_picture_breadth >= pic_breadth) {
if (vp9_level_defs[i].max_luma_picture_size > pic_size) {
return get_msb(vp9_level_defs[i].max_col_tiles);
}
}

View File

@@ -66,8 +66,8 @@ static int get_max_tile_cols(VP9_COMP *cpi) {
log2_tile_cols =
clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
if (cpi->oxcf.target_level == LEVEL_AUTO) {
const int level_tile_cols =
log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
const uint32_t pic_size = cpi->common.width * cpi->common.height;
const int level_tile_cols = log_tile_cols_from_picsize_level(pic_size);
if (log2_tile_cols > level_tile_cols) {
log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
}

View File

@@ -44,6 +44,7 @@
#define COMPLEXITY_STATS_OUTPUT 0
#define FIRST_PASS_Q 10.0
#define GF_MAX_BOOST 96.0
#define INTRA_MODE_PENALTY 1024
#define MIN_ARF_GF_BOOST 240
#define MIN_DECAY_FACTOR 0.01
@@ -1948,7 +1949,6 @@ static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats,
}
#define BASELINE_ERR_PER_MB 12500.0
#define GF_MAX_BOOST 96.0
static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame,
double this_frame_mv_in_out) {
double frame_boost;
@@ -2383,8 +2383,6 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise,
// Analyse and define a gf/arf group.
#define ARF_DECAY_BREAKOUT 0.10
#define ARF_ABS_ZOOM_THRESH 4.0
static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
@@ -2413,6 +2411,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double mv_in_out_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
double mv_ratio_accumulator_thresh;
double mv_in_out_thresh;
double abs_mv_in_out_thresh;
double sr_accumulator = 0.0;
const double av_err = get_distribution_av_err(cpi, twopass);
@@ -2458,7 +2457,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Motion breakout threshold for loop below depends on image size.
mv_ratio_accumulator_thresh =
(cpi->initial_height + cpi->initial_width) / 4.0;
abs_mv_in_out_thresh = ARF_ABS_ZOOM_THRESH;
mv_in_out_thresh = (cpi->initial_height + cpi->initial_width) / 300.0;
abs_mv_in_out_thresh = (cpi->initial_height + cpi->initial_width) / 200.0;
// Set a maximum and minimum interval for the GF group.
// If the image appears almost completely static we can extend beyond this.
@@ -2543,11 +2543,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Update the accumulator for second ref error difference.
// This is intended to give an indication of how much the coded error is
// increasing over time.
if (i == 1) {
sr_accumulator += next_frame.coded_error;
} else {
sr_accumulator += (next_frame.sr_coded_error - next_frame.coded_error);
}
sr_accumulator += (next_frame.sr_coded_error - next_frame.coded_error);
sr_accumulator = VPXMAX(0.0, sr_accumulator);
}
// Break out conditions.
@@ -2562,6 +2559,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
(!flash_detected) &&
((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
(abs_mv_in_out_accumulator > abs_mv_in_out_thresh) ||
(mv_in_out_accumulator < -mv_in_out_thresh) ||
(sr_accumulator > next_frame.intra_error)))) {
break;
}
@@ -2602,6 +2600,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#endif
// Set the interval until the next gf.
// rc->baseline_gf_interval = 8;
rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
// Only encode alt reference frame in temporal base layer. So

View File

@@ -21,15 +21,6 @@
#include "vp9/encoder/vp9_noise_estimate.h"
#include "vp9/encoder/vp9_encoder.h"
#if CONFIG_VP9_TEMPORAL_DENOISING
// For SVC: only do noise estimation on top spatial layer.
static INLINE int noise_est_svc(const struct VP9_COMP *const cpi) {
return (!cpi->use_svc ||
(cpi->use_svc &&
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1));
}
#endif
void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) {
ne->enabled = 0;
ne->level = kLowLow;
@@ -54,7 +45,7 @@ static int enable_noise_estimation(VP9_COMP *const cpi) {
#endif
// Enable noise estimation if denoising is on.
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) &&
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
cpi->common.width >= 320 && cpi->common.height >= 180)
return 1;
#endif
@@ -120,7 +111,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
// Estimate is between current source and last source.
YV12_BUFFER_CONFIG *last_source = cpi->Last_Source;
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) {
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) {
last_source = &cpi->denoiser.last_source;
// Tune these thresholds for different resolutions when denoising is
// enabled.
@@ -140,7 +131,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
(cpi->svc.number_spatial_layers == 1 &&
(ne->last_w != cm->width || ne->last_h != cm->height))) {
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))
copy_frame(&cpi->denoiser.last_source, cpi->Source);
#endif
if (last_source != NULL) {
@@ -155,7 +146,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
ne->count = 0;
ne->num_frames_estimate = 10;
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) &&
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
cpi->svc.current_superframe > 1) {
vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
copy_frame(&cpi->denoiser.last_source, cpi->Source);
@@ -266,14 +257,14 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
ne->count = 0;
ne->level = vp9_noise_estimate_extract_level(ne);
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))
vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
#endif
}
}
}
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi))
copy_frame(&cpi->denoiser.last_source, cpi->Source);
#endif
}

View File

@@ -1498,9 +1498,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
MV_REFERENCE_FRAME best_second_ref_frame = NONE;
int comp_modes = 0;
int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES;
int flag_svc_subpel = 0;
int svc_mv_col = 0;
int svc_mv_row = 0;
init_ref_frame_cost(cm, xd, ref_frame_cost);
@@ -1650,18 +1647,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (cpi->use_svc || cpi->oxcf.speed <= 7 || bsize < BLOCK_32X32)
x->sb_use_mv_part = 0;
// Set the flag_svc_subpel to 1 for SVC if the lower spatial layer used
// an averaging filter for downsampling (phase = 8). If so, we will test
// a nonzero motion mode on the spatial (goldeen) reference.
// The nonzero motion is half pixel shifted to left and top (-4, -4).
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
svc_force_zero_mode[GOLDEN_FRAME - 1] &&
cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id - 1] == 8) {
svc_mv_col = -4;
svc_mv_row = -4;
flag_svc_subpel = 1;
}
for (idx = 0; idx < num_inter_modes + comp_modes; ++idx) {
int rate_mv = 0;
int mode_rd_thresh;
@@ -1675,7 +1660,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int inter_mv_mode = 0;
int skip_this_mv = 0;
int comp_pred = 0;
int force_gf_mv = 0;
PREDICTION_MODE this_mode;
second_ref_frame = NONE;
@@ -1696,22 +1680,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
comp_pred = 1;
}
if (ref_frame > usable_ref_frame) continue;
if (skip_ref_find_pred[ref_frame]) continue;
if (flag_svc_subpel && ref_frame == GOLDEN_FRAME) {
force_gf_mv = 1;
// Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row),
// otherwise set NEWMV to (svc_mv_col, svc_mv_row).
if (this_mode == NEWMV) {
frame_mv[this_mode][ref_frame].as_mv.col = svc_mv_col;
frame_mv[this_mode][ref_frame].as_mv.row = svc_mv_row;
} else if (frame_mv[this_mode][ref_frame].as_mv.col != svc_mv_col ||
frame_mv[this_mode][ref_frame].as_mv.row != svc_mv_row) {
continue;
}
}
if (comp_pred) {
const struct segmentation *const seg = &cm->seg;
if (!cpi->allow_comp_inter_inter) continue;
@@ -1722,6 +1690,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) continue;
}
if (ref_frame > usable_ref_frame) continue;
if (skip_ref_find_pred[ref_frame]) continue;
// For SVC, skip the golden (spatial) reference search if sse of zeromv_last
// is below threshold.
if (cpi->use_svc && ref_frame == GOLDEN_FRAME &&
@@ -1766,7 +1737,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
// is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
// later.
if (!force_gf_mv && force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
if (force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
frame_mv[this_mode][ref_frame].as_int != 0) {
continue;
}
@@ -1780,7 +1751,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
if (cpi->use_svc) {
if (!force_gf_mv && svc_force_zero_mode[ref_frame - 1] &&
if (svc_force_zero_mode[ref_frame - 1] &&
frame_mv[this_mode][ref_frame].as_int != 0)
continue;
}
@@ -1837,7 +1808,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
&rd_thresh_freq_fact[mode_index])))
continue;
if (this_mode == NEWMV && !force_gf_mv) {
if (this_mode == NEWMV) {
if (ref_frame > LAST_FRAME && !cpi->use_svc &&
cpi->oxcf.rc_mode == VPX_CBR) {
int tmp_sad;
@@ -1978,7 +1949,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
pred_filter_search &&
(ref_frame == LAST_FRAME ||
(ref_frame == GOLDEN_FRAME && !force_gf_mv &&
(ref_frame == GOLDEN_FRAME &&
(cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) &&
(((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
int pf_rate[3];

View File

@@ -1488,8 +1488,6 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
cpi->rc.last_frame_is_src_altref = cpi->rc.is_src_frame_alt_ref;
}
if (cm->frame_type != KEY_FRAME) rc->reset_high_source_sad = 0;
rc->last_avg_frame_bandwidth = rc->avg_frame_bandwidth;
}
void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
@@ -1875,12 +1873,9 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
if (oxcf->target_level == LEVEL_AUTO) {
const uint32_t pic_size = cpi->common.width * cpi->common.height;
const uint32_t pic_breadth =
VPXMAX(cpi->common.width, cpi->common.height);
int i;
for (i = LEVEL_1; i < LEVEL_MAX; ++i) {
if (vp9_level_defs[i].max_luma_picture_size >= pic_size &&
vp9_level_defs[i].max_luma_picture_breadth >= pic_breadth) {
if (vp9_level_defs[i].max_luma_picture_size > pic_size) {
if (rc->min_gf_interval <=
(int)vp9_level_defs[i].min_altref_distance) {
rc->min_gf_interval =

View File

@@ -152,8 +152,6 @@ typedef struct {
int rc_2_frame;
int q_1_frame;
int q_2_frame;
// Keep track of the last target average frame bandwidth.
int last_avg_frame_bandwidth;
// Auto frame-scaling variables.
FRAME_SCALE_LEVEL frame_size_selector;

View File

@@ -37,7 +37,6 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->scaled_one_half = 0;
svc->current_superframe = 0;
svc->non_reference_frame = 0;
for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
svc->ext_frame_flags[sl] = 0;
@@ -390,9 +389,9 @@ int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
.is_key_frame;
}
void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den, int *width_out,
int *height_out) {
static void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den, int *width_out,
int *height_out) {
int w, h;
if (width_out == NULL || height_out == NULL || den == 0) return;
@@ -657,9 +656,9 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
lc->scaling_factor_num, lc->scaling_factor_den, &width,
&height);
// For resolutions <= VGA: set phase of the filter = 8 (for symmetric
// For resolutions <= QVGA: set phase of the filter = 8 (for symmetric
// averaging filter), use bilinear for now.
if (width * height <= 640 * 480) {
if (width * height <= 320 * 240) {
cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] = BILINEAR;
cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id] = 8;
}
@@ -862,28 +861,3 @@ void vp9_svc_reset_key_frame(VP9_COMP *const cpi) {
vp9_update_temporal_layer_framerate(cpi);
vp9_restore_layer_context(cpi);
}
void vp9_svc_check_reset_layer_rc_flag(VP9_COMP *const cpi) {
SVC *svc = &cpi->svc;
int sl, tl;
for (sl = 0; sl < svc->number_spatial_layers; ++sl) {
// Check for reset based on avg_frame_bandwidth for spatial layer sl.
int layer = LAYER_IDS_TO_IDX(sl, svc->number_temporal_layers - 1,
svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
if (lrc->avg_frame_bandwidth > (3 * lrc->last_avg_frame_bandwidth >> 1) ||
lrc->avg_frame_bandwidth < (lrc->last_avg_frame_bandwidth >> 1)) {
// Reset for all temporal layers with spatial layer sl.
for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
lrc->rc_1_frame = 0;
lrc->rc_2_frame = 0;
lrc->bits_off_target = lrc->optimal_buffer_level;
lrc->buffer_level = lrc->optimal_buffer_level;
}
}
}
}

View File

@@ -49,7 +49,7 @@ typedef struct {
uint8_t speed;
} LAYER_CONTEXT;
typedef struct SVC {
typedef struct {
int spatial_layer_id;
int temporal_layer_id;
int number_spatial_layers;
@@ -99,8 +99,6 @@ typedef struct SVC {
BLOCK_SIZE *prev_partition_svc;
int mi_stride[VPX_MAX_LAYERS];
int first_layer_denoise;
} SVC;
struct VP9_COMP;
@@ -130,10 +128,6 @@ void vp9_save_layer_context(struct VP9_COMP *const cpi);
// Initialize second pass rc for spatial svc.
void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi);
void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den, int *width_out,
int *height_out);
// Increment number of video frames in layer
void vp9_inc_frame_in_layer(struct VP9_COMP *const cpi);
@@ -154,8 +148,6 @@ void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi);
void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi);
void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi);
#ifdef __cplusplus
} // extern "C"
#endif

View File

@@ -1,7 +1,7 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Usee of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may

View File

@@ -169,7 +169,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, round, quant, \
pshuflw m7, m8, 0x1
pmaxsw m8, m7
pextrw r6, m8, 0
mov [r2], r6w
mov [r2], r6
RET
%endmacro

View File

@@ -12,11 +12,8 @@
* \brief Provides the high level interface to wrap encoder algorithms.
*
*/
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include "vp8/common/blockd.h"
#include "vpx_config.h"
#include "vpx/internal/vpx_codec_internal.h"
@@ -84,8 +81,6 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(
int i;
void *mem_loc = NULL;
if (iface->enc.mr_get_mem_loc == NULL) return VPX_CODEC_INCAPABLE;
if (!(res = iface->enc.mr_get_mem_loc(cfg, &mem_loc))) {
for (i = 0; i < num_enc; i++) {
vpx_codec_priv_enc_mr_cfg_t mr_cfg;
@@ -94,27 +89,28 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(
if (dsf->num < 1 || dsf->num > 4096 || dsf->den < 1 ||
dsf->den > dsf->num) {
res = VPX_CODEC_INVALID_PARAM;
} else {
mr_cfg.mr_low_res_mode_info = mem_loc;
mr_cfg.mr_total_resolutions = num_enc;
mr_cfg.mr_encoder_id = num_enc - 1 - i;
mr_cfg.mr_down_sampling_factor.num = dsf->num;
mr_cfg.mr_down_sampling_factor.den = dsf->den;
/* Force Key-frame synchronization. Namely, encoder at higher
* resolution always use the same frame_type chosen by the
* lowest-resolution encoder.
*/
if (mr_cfg.mr_encoder_id) cfg->kf_mode = VPX_KF_DISABLED;
ctx->iface = iface;
ctx->name = iface->name;
ctx->priv = NULL;
ctx->init_flags = flags;
ctx->config.enc = cfg;
res = ctx->iface->init(ctx, &mr_cfg);
break;
}
mr_cfg.mr_low_res_mode_info = mem_loc;
mr_cfg.mr_total_resolutions = num_enc;
mr_cfg.mr_encoder_id = num_enc - 1 - i;
mr_cfg.mr_down_sampling_factor.num = dsf->num;
mr_cfg.mr_down_sampling_factor.den = dsf->den;
/* Force Key-frame synchronization. Namely, encoder at higher
* resolution always use the same frame_type chosen by the
* lowest-resolution encoder.
*/
if (mr_cfg.mr_encoder_id) cfg->kf_mode = VPX_KF_DISABLED;
ctx->iface = iface;
ctx->name = iface->name;
ctx->priv = NULL;
ctx->init_flags = flags;
ctx->config.enc = cfg;
res = ctx->iface->init(ctx, &mr_cfg);
if (res) {
const char *error_detail = ctx->priv ? ctx->priv->err_detail : NULL;
/* Destroy current ctx */
@@ -128,14 +124,10 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(
vpx_codec_destroy(ctx);
i--;
}
#if CONFIG_MULTI_RES_ENCODING
assert(mem_loc);
free(((LOWER_RES_FRAME_INFO *)mem_loc)->mb_info);
free(mem_loc);
#endif
return SAVE_STATUS(ctx, res);
}
if (res) break;
ctx++;
cfg++;
dsf++;

View File

@@ -408,7 +408,7 @@ enum vp8e_enc_control_id {
/*!\brief Codec control function to set noise sensitivity.
*
* 0: off, 1: On(YOnly), 2: For SVC only, on top two spatial layers(YOnly)
* 0: off, 1: On(YOnly)
*
* Supported in codecs: VP9
*/

View File

@@ -179,8 +179,6 @@ VPX_CTRL_USE_TYPE(VP9_INVERT_TILE_DECODE_ORDER, int)
#define VPX_CTRL_VP9_INVERT_TILE_DECODE_ORDER
#define VPX_CTRL_VP9_DECODE_SVC_SPATIAL_LAYER
VPX_CTRL_USE_TYPE(VP9_DECODE_SVC_SPATIAL_LAYER, int)
#define VPX_CTRL_VP9_SET_SKIP_LOOP_FILTER
VPX_CTRL_USE_TYPE(VP9_SET_SKIP_LOOP_FILTER, int)
/*!\endcond */
/*! @} - end defgroup vp8_decoder */

View File

@@ -1,13 +1,3 @@
##
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
## tree. An additional intellectual property rights grant can be found
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
sub vpx_dsp_forward_decls() {
print <<EOF
/*

View File

@@ -1,13 +1,3 @@
##
## Copyright (c) 2017 The WebM project authors. All Rights Reserved.
##
## Use of this source code is governed by a BSD-style license
## that can be found in the LICENSE file in the root of the source
## tree. An additional intellectual property rights grant can be found
## in the file PATENTS. All contributing project authors may
## be found in the AUTHORS file in the root of the source tree.
##
sub vpx_scale_forward_decls() {
print <<EOF
struct yv12_buffer_config;