Merge remote-tracking branch 'origin/master' into nextgenv2

Periodic merge to get master changes into nextgenv2.

Change-Id: I6f0e4b470f193da03f1a8cb8e6a93ae39395699a
This commit is contained in:
Debargha Mukherjee 2015-09-17 11:20:03 -07:00
commit 09ff5f2792
192 changed files with 17698 additions and 8622 deletions

5
.gitignore vendored
View File

@ -30,14 +30,17 @@
/examples/decode_with_partial_drops
/examples/example_xma
/examples/postproc
/examples/resize_util
/examples/set_maps
/examples/simple_decoder
/examples/simple_encoder
/examples/twopass_encoder
/examples/vp8_multi_resolution_encoder
/examples/vp8cx_set_ref
/examples/vp9_lossless_encoder
/examples/vp9_spatial_scalable_encoder
/examples/vpx_temporal_scalable_patterns
/examples/vpx_temporal_svc_encoder
/ivfdec
/ivfdec.dox
/ivfenc
@ -45,12 +48,14 @@
/libvpx.so*
/libvpx.ver
/samples.dox
/test_intra_pred_speed
/test_libvpx
/vp8_api1_migration.dox
/vp[89x]_rtcd.h
/vpx.pc
/vpx_config.c
/vpx_config.h
/vpx_dsp_rtcd.h
/vpx_scale_rtcd.h
/vpx_version.h
/vpxdec

View File

@ -140,6 +140,8 @@ $(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx $(STACKREALIGN)
$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx $(STACKREALIGN)
$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 $(STACKREALIGN)
$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(STACKREALIGN)
$(BUILD_PFX)%vp9_reconintra.c.d: CFLAGS += $(STACKREALIGN)
$(BUILD_PFX)%vp9_reconintra.c.o: CFLAGS += $(STACKREALIGN)
$(BUILD_PFX)%.c.d: %.c
$(if $(quiet),@echo " [DEP] $@")
@ -285,7 +287,7 @@ define archive_template
# for creating them.
$(1):
$(if $(quiet),@echo " [AR] $$@")
$(qexec)$$(AR) $$(ARFLAGS) $$@ $$?
$(qexec)$$(AR) $$(ARFLAGS) $$@ $$^
endef
define so_template

View File

@ -428,7 +428,7 @@ NM=${NM}
CFLAGS = ${CFLAGS}
CXXFLAGS = ${CXXFLAGS}
ARFLAGS = -rus\$(if \$(quiet),c,v)
ARFLAGS = -crs\$(if \$(quiet),,v)
LDFLAGS = ${LDFLAGS}
ASFLAGS = ${ASFLAGS}
extralibs = ${extralibs}
@ -728,13 +728,6 @@ process_common_toolchain() {
# Handle darwin variants. Newer SDKs allow targeting older
# platforms, so use the newest one available.
case ${toolchain} in
arm*-darwin*)
ios_sdk_dir="$(show_darwin_sdk_path iphoneos)"
if [ -d "${ios_sdk_dir}" ]; then
add_cflags "-isysroot ${ios_sdk_dir}"
add_ldflags "-isysroot ${ios_sdk_dir}"
fi
;;
*-darwin*)
osx_sdk_dir="$(show_darwin_sdk_path macosx)"
if [ -d "${osx_sdk_dir}" ]; then
@ -810,14 +803,7 @@ process_common_toolchain() {
if disabled neon && enabled neon_asm; then
die "Disabling neon while keeping neon-asm is not supported"
fi
case ${toolchain} in
*-darwin*)
# Neon is guaranteed on iOS 6+ devices, while old media extensions
# no longer assemble with iOS 9 SDK
;;
*)
soft_enable media
esac
soft_enable media
;;
armv6)
soft_enable media
@ -1081,7 +1067,9 @@ EOF
CROSS=${CROSS:-g}
;;
os2)
disable_feature pic
AS=${AS:-nasm}
add_ldflags -Zhigh-mem
;;
esac
@ -1323,12 +1311,6 @@ EOF
add_cflags -D_LARGEFILE_SOURCE
add_cflags -D_FILE_OFFSET_BITS=64
fi
# append any user defined extra cflags
if [ -n "${extra_cflags}" ] ; then
check_add_cflags ${extra_cflags} || \
die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
fi
}
process_toolchain() {

7
configure vendored
View File

@ -265,6 +265,7 @@ EXPERIMENT_LIST="
fp_mb_stats
emulate_hardware
ext_tx
misc_fixes
"
CONFIG_LIST="
dependency_tracking
@ -717,6 +718,12 @@ EOF
esac
# libwebm needs to be linked with C++ standard library
enabled webm_io && LD=${CXX}
# append any user defined extra cflags
if [ -n "${extra_cflags}" ] ; then
check_add_cflags ${extra_cflags} || \
die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
fi
}

View File

@ -36,6 +36,8 @@ LIBYUV_SRCS += third_party/libyuv/include/libyuv/basic_types.h \
third_party/libyuv/source/scale_neon64.cc \
third_party/libyuv/source/scale_win.cc \
LIBWEBM_COMMON_SRCS += third_party/libwebm/webmids.hpp
LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \
third_party/libwebm/mkvmuxerutil.cpp \
third_party/libwebm/mkvwriter.cpp \
@ -43,8 +45,7 @@ LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \
third_party/libwebm/mkvmuxertypes.hpp \
third_party/libwebm/mkvmuxerutil.hpp \
third_party/libwebm/mkvparser.hpp \
third_party/libwebm/mkvwriter.hpp \
third_party/libwebm/webmids.hpp
third_party/libwebm/mkvwriter.hpp
LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser.cpp \
third_party/libwebm/mkvreader.cpp \
@ -68,6 +69,7 @@ ifeq ($(CONFIG_LIBYUV),yes)
vpxdec.SRCS += $(LIBYUV_SRCS)
endif
ifeq ($(CONFIG_WEBM_IO),yes)
vpxdec.SRCS += $(LIBWEBM_COMMON_SRCS)
vpxdec.SRCS += $(LIBWEBM_PARSER_SRCS)
vpxdec.SRCS += webmdec.cc webmdec.h
endif
@ -89,6 +91,7 @@ ifeq ($(CONFIG_LIBYUV),yes)
vpxenc.SRCS += $(LIBYUV_SRCS)
endif
ifeq ($(CONFIG_WEBM_IO),yes)
vpxenc.SRCS += $(LIBWEBM_COMMON_SRCS)
vpxenc.SRCS += $(LIBWEBM_MUXER_SRCS)
vpxenc.SRCS += webmenc.cc webmenc.h
endif

View File

@ -25,6 +25,7 @@
#include "../tools_common.h"
#include "../video_writer.h"
#include "../vpx_ports/vpx_timer.h"
#include "vpx/svc_context.h"
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
@ -79,6 +80,8 @@ static const arg_def_t rc_end_usage_arg =
ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
static const arg_def_t speed_arg =
ARG_DEF("sp", "speed", 1, "speed configuration");
static const arg_def_t aqmode_arg =
ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
#if CONFIG_VP9_HIGHBITDEPTH
static const struct arg_enum_list bitdepth_enum[] = {
@ -100,7 +103,7 @@ static const arg_def_t *svc_args[] = {
&kf_dist_arg, &scale_factors_arg, &passes_arg, &pass_arg,
&fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg,
&max_bitrate_arg, &temporal_layers_arg, &temporal_layering_mode_arg,
&lag_in_frame_arg, &threads_arg,
&lag_in_frame_arg, &threads_arg, &aqmode_arg,
#if OUTPUT_RC_STATS
&output_rc_stats_arg,
#endif
@ -220,6 +223,8 @@ static void parse_command_line(int argc, const char **argv_,
#endif
} else if (arg_match(&arg, &speed_arg, argi)) {
svc_ctx->speed = arg_parse_uint(&arg);
} else if (arg_match(&arg, &aqmode_arg, argi)) {
svc_ctx->aqmode = arg_parse_uint(&arg);
} else if (arg_match(&arg, &threads_arg, argi)) {
svc_ctx->threads = arg_parse_uint(&arg);
} else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
@ -564,6 +569,8 @@ int main(int argc, const char **argv) {
double sum_bitrate2 = 0.0;
double framerate = 30.0;
#endif
struct vpx_usec_timer timer;
int64_t cx_time = 0;
memset(&svc_ctx, 0, sizeof(svc_ctx));
svc_ctx.log_print = 1;
exec_name = argv[0];
@ -632,6 +639,9 @@ int main(int argc, const char **argv) {
vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
if (svc_ctx.threads)
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
// Encode frames
while (!end_of_stream) {
@ -643,9 +653,12 @@ int main(int argc, const char **argv) {
end_of_stream = 1;
}
vpx_usec_timer_start(&timer);
res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
pts, frame_duration, svc_ctx.speed >= 5 ?
VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
vpx_usec_timer_mark(&timer);
cx_time += vpx_usec_timer_elapsed(&timer);
printf("%s", vpx_svc_get_message(&svc_ctx));
if (res != VPX_CODEC_OK) {
@ -784,6 +797,10 @@ int main(int argc, const char **argv) {
}
}
#endif
printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
frame_cnt,
1000 * (float)cx_time / (double)(frame_cnt * 1000000),
1000000 * (double)frame_cnt / (double)cx_time);
vpx_img_free(&raw);
// display average size, psnr
printf("%s", vpx_svc_dump_statistics(&svc_ctx));

View File

@ -53,7 +53,7 @@ CODEC_SRCS-yes += $(addprefix vpx_dsp/,$(call enabled,DSP_SRCS))
include $(SRC_PATH_BARE)/vpx_util/vpx_util.mk
CODEC_SRCS-yes += $(addprefix vpx_util/,$(call enabled,UTIL_SRCS))
ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
ifeq ($(CONFIG_VP8),yes)
VP8_PREFIX=vp8/
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
endif
@ -76,7 +76,7 @@ ifeq ($(CONFIG_VP8_DECODER),yes)
CODEC_DOC_SECTIONS += vp8 vp8_decoder
endif
ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
ifeq ($(CONFIG_VP9),yes)
VP9_PREFIX=vp9/
include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk
endif
@ -110,7 +110,7 @@ VP9_PREFIX=vp9/
$(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra
# VP10 make file
ifneq ($(CONFIG_VP10_ENCODER)$(CONFIG_VP10_DECODER),)
ifeq ($(CONFIG_VP10),yes)
VP10_PREFIX=vp10/
include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10_common.mk
endif

View File

@ -40,30 +40,6 @@ static int round(double x) {
#endif
const int kNumCoeffs = 256;
const double PI = 3.1415926535898;
void reference2_16x16_idct_2d(double *input, double *output) {
double x;
for (int l = 0; l < 16; ++l) {
for (int k = 0; k < 16; ++k) {
double s = 0;
for (int i = 0; i < 16; ++i) {
for (int j = 0; j < 16; ++j) {
x = cos(PI * j * (l + 0.5) / 16.0) *
cos(PI * i * (k + 0.5) / 16.0) *
input[i * 16 + j] / 256;
if (i != 0)
x *= sqrt(2.0);
if (j != 0)
x *= sqrt(2.0);
s += x;
}
}
output[k*16+l] = s;
}
}
}
const double C1 = 0.995184726672197;
const double C2 = 0.98078528040323;
const double C3 = 0.956940335732209;

View File

@ -195,6 +195,7 @@ void EncoderTest::RunLoop(VideoSource *video) {
video->Begin();
encoder->InitEncoder(video);
ASSERT_FALSE(::testing::Test::HasFatalFailure());
unsigned long dec_init_flags = 0; // NOLINT
// Use fragment decoder if encoder outputs partitions.

View File

@ -20,10 +20,11 @@ const int kMaxErrorFrames = 12;
const int kMaxDroppableFrames = 12;
class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, bool> {
protected:
ErrorResilienceTestLarge()
: EncoderTest(GET_PARAM(0)),
svc_support_(GET_PARAM(2)),
psnr_(0.0),
nframes_(0),
mismatch_psnr_(0.0),
@ -193,6 +194,8 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
pattern_switch_ = frame_switch;
}
bool svc_support_;
private:
double psnr_;
unsigned int nframes_;
@ -302,6 +305,10 @@ TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
// two layer temporal pattern. The base layer does not predict from the top
// layer, so successful decoding is expected.
TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) {
// This test doesn't run if SVC is not supported.
if (!svc_support_)
return;
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
cfg_.rc_target_bitrate = 500;
@ -347,6 +354,10 @@ TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) {
// for a two layer temporal pattern, where at some point in the
// sequence, the LAST ref is not used anymore.
TEST_P(ErrorResilienceTestLarge, 2LayersNoRefLast) {
// This test doesn't run if SVC is not supported.
if (!svc_support_)
return;
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
cfg_.rc_target_bitrate = 500;
@ -579,9 +590,13 @@ TEST_P(ErrorResilienceTestLargeCodecControls, CodecControl3TemporalLayers) {
}
}
VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
::testing::Values(true));
VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLargeCodecControls,
ONE_PASS_TEST_MODES);
VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
VP10_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
::testing::Values(true));
// SVC-related tests don't run for VP10 since SVC is not supported.
VP10_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
::testing::Values(false));
} // namespace

View File

@ -74,7 +74,7 @@ TEST_F(VP9FrameSizeTestsLarge, ValidSizes) {
// size or almost 1 gig of memory.
// In total the allocations will exceed 2GiB which may cause a failure with
// mingw + wine, use a smaller size in that case.
#if defined(_WIN32) && !defined(_WIN64)
#if defined(_WIN32) && !defined(_WIN64) || defined(__OS2__)
video.SetSize(4096, 3072);
#else
video.SetSize(4096, 4096);

View File

@ -67,43 +67,6 @@ void reference_dct_2d(int16_t input[64], double output[64]) {
output[i] *= 2;
}
void reference_idct_1d(double input[8], double output[8]) {
const double kPi = 3.141592653589793238462643383279502884;
const double kSqrt2 = 1.414213562373095048801688724209698;
for (int k = 0; k < 8; k++) {
output[k] = 0.0;
for (int n = 0; n < 8; n++) {
output[k] += input[n]*cos(kPi*(2*k+1)*n/16.0);
if (n == 0)
output[k] = output[k]/kSqrt2;
}
}
}
void reference_idct_2d(double input[64], int16_t output[64]) {
double out[64], out2[64];
// First transform rows
for (int i = 0; i < 8; ++i) {
double temp_in[8], temp_out[8];
for (int j = 0; j < 8; ++j)
temp_in[j] = input[j + i*8];
reference_idct_1d(temp_in, temp_out);
for (int j = 0; j < 8; ++j)
out[j + i*8] = temp_out[j];
}
// Then transform columns
for (int i = 0; i < 8; ++i) {
double temp_in[8], temp_out[8];
for (int j = 0; j < 8; ++j)
temp_in[j] = out[j*8 + i];
reference_idct_1d(temp_in, temp_out);
for (int j = 0; j < 8; ++j)
out2[j*8 + i] = temp_out[j];
}
for (int i = 0; i < 64; ++i)
output[i] = round(out2[i]/32);
}
TEST(VP9Idct8x8Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 10000;

View File

@ -145,7 +145,7 @@ TEST_P(InvalidFileInvalidPeekTest, ReturnCode) {
}
const DecodeParam kVP9InvalidFileInvalidPeekTests[] = {
{1, "invalid-vp90-01-v2.webm"},
{1, "invalid-vp90-01-v3.webm"},
};
VP9_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,

View File

@ -590,7 +590,9 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1),
make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1)));
make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1),
make_tuple(&wrapper_vertical_16_dual_sse2,
&wrapper_vertical_16_dual_c, 8, 1)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif

View File

@ -81,6 +81,15 @@ static void write_ivf_frame_header(const vpx_codec_cx_pkt_t *const pkt,
const unsigned int kInitialWidth = 320;
const unsigned int kInitialHeight = 240;
struct FrameInfo {
FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
: pts(_pts), w(_w), h(_h) {}
vpx_codec_pts_t pts;
unsigned int w;
unsigned int h;
};
unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) {
if (frame < 10)
return val;
@ -120,15 +129,6 @@ class ResizeTest : public ::libvpx_test::EncoderTest,
virtual ~ResizeTest() {}
struct FrameInfo {
FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
: pts(_pts), w(_w), h(_h) {}
vpx_codec_pts_t pts;
unsigned int w;
unsigned int h;
};
virtual void SetUp() {
InitializeConfig();
SetMode(GET_PARAM(1));
@ -261,6 +261,134 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
}
}
class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
protected:
ResizeInternalRealtimeTest() : EncoderTest(GET_PARAM(0)) {}
virtual ~ResizeInternalRealtimeTest() {}
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
libvpx_test::Encoder *encoder) {
if (video->frame() == 0) {
encoder->Control(VP9E_SET_AQ_MODE, 3);
encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
}
if (change_bitrate_ && video->frame() == 120) {
change_bitrate_ = false;
cfg_.rc_target_bitrate = 500;
encoder->Config(&cfg_);
}
}
virtual void SetUp() {
InitializeConfig();
SetMode(GET_PARAM(1));
set_cpu_used_ = GET_PARAM(2);
}
virtual void DecompressedFrameHook(const vpx_image_t &img,
vpx_codec_pts_t pts) {
frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
}
void DefaultConfig() {
cfg_.g_w = 352;
cfg_.g_h = 288;
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 600;
cfg_.rc_buf_sz = 1000;
cfg_.rc_min_quantizer = 2;
cfg_.rc_max_quantizer = 56;
cfg_.rc_undershoot_pct = 50;
cfg_.rc_overshoot_pct = 50;
cfg_.rc_end_usage = VPX_CBR;
cfg_.kf_mode = VPX_KF_AUTO;
cfg_.g_lag_in_frames = 0;
cfg_.kf_min_dist = cfg_.kf_max_dist = 3000;
// Enable dropped frames.
cfg_.rc_dropframe_thresh = 1;
// Enable error_resilience mode.
cfg_.g_error_resilient = 1;
// Enable dynamic resizing.
cfg_.rc_resize_allowed = 1;
// Run at low bitrate.
cfg_.rc_target_bitrate = 200;
}
std::vector< FrameInfo > frame_info_list_;
int set_cpu_used_;
bool change_bitrate_;
};
// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
// Run at low bitrate, with resize_allowed = 1, and verify that we get
// one resize down event.
TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDown) {
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 299);
DefaultConfig();
change_bitrate_ = false;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
unsigned int last_w = cfg_.g_w;
unsigned int last_h = cfg_.g_h;
int resize_count = 0;
for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
info != frame_info_list_.end(); ++info) {
if (info->w != last_w || info->h != last_h) {
// Verify that resize down occurs.
ASSERT_LT(info->w, last_w);
ASSERT_LT(info->h, last_h);
last_w = info->w;
last_h = info->h;
resize_count++;
}
}
// Verify that we get 1 resize down event in this test.
ASSERT_EQ(1, resize_count) << "Resizing should occur.";
}
// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
// Start at low target bitrate, raise the bitrate in the middle of the clip,
// scaling-up should occur after bitrate changed.
TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 299);
DefaultConfig();
change_bitrate_ = true;
// Disable dropped frames.
cfg_.rc_dropframe_thresh = 0;
// Starting bitrate low.
cfg_.rc_target_bitrate = 100;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
unsigned int last_w = cfg_.g_w;
unsigned int last_h = cfg_.g_h;
int resize_count = 0;
for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
info != frame_info_list_.end(); ++info) {
if (info->w != last_w || info->h != last_h) {
resize_count++;
if (resize_count == 1) {
// Verify that resize down occurs.
ASSERT_LT(info->w, last_w);
ASSERT_LT(info->h, last_h);
} else if (resize_count == 2) {
// Verify that resize up occurs.
ASSERT_GT(info->w, last_w);
ASSERT_GT(info->h, last_h);
}
last_w = info->w;
last_h = info->h;
}
}
// Verify that we get 2 resize events in this test.
ASSERT_EQ(2, resize_count) << "Resizing should occur twice.";
}
vpx_img_fmt_t CspForFrameNumber(int frame) {
if (frame < 10)
return VPX_IMG_FMT_I420;
@ -371,6 +499,9 @@ VP9_INSTANTIATE_TEST_CASE(ResizeTest,
::testing::Values(::libvpx_test::kRealTime));
VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest,
::testing::Values(::libvpx_test::kOnePassBest));
VP9_INSTANTIATE_TEST_CASE(ResizeInternalRealtimeTest,
::testing::Values(::libvpx_test::kRealTime),
::testing::Range(5, 9));
VP9_INSTANTIATE_TEST_CASE(ResizeCspTest,
::testing::Values(::libvpx_test::kRealTime));
} // namespace

View File

@ -687,8 +687,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm.md5
endif # CONFIG_VP9_HIGHBITDEPTH
# Invalid files for testing libvpx error checking.
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm

View File

@ -6,8 +6,8 @@ b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv
456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
c123d1f9f02fb4143abb5e271916e3a3080de8f6 *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
fe346136b9b8c1e6f6084cc106485706915795e4 *invalid-vp90-01-v2.webm
25751f5d3b05ff03f0719ad42cd625348eb8961e *invalid-vp90-01-v2.webm.res
fe346136b9b8c1e6f6084cc106485706915795e4 *invalid-vp90-01-v3.webm
5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-vp90-01-v3.webm.res
d78e2fceba5ac942246503ec8366f879c4775ca5 *invalid-vp90-02-v2.webm
8e2eff4af87d2b561cce2365713269e301457ef3 *invalid-vp90-02-v2.webm.res
df1a1453feb3c00d7d89746c7003b4163523bff3 *invalid-vp90-03-v3.webm

View File

@ -167,6 +167,10 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) := test_intra_pred_speed.cc
TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) += ../md5_utils.h ../md5_utils.c
## VP10
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm_test.cc
endif # CONFIG_SHARED
include $(SRC_PATH_BARE)/test/test-data.mk

View File

@ -26,6 +26,7 @@ extern void vpx_dsp_rtcd();
extern void vpx_scale_rtcd();
}
#if ARCH_X86 || ARCH_X86_64
static void append_negative_gtest_filter(const char *str) {
std::string filter = ::testing::FLAGS_gtest_filter;
// Negative patterns begin with one '-' followed by a ':' separated list.
@ -33,6 +34,7 @@ static void append_negative_gtest_filter(const char *str) {
filter += str;
::testing::FLAGS_gtest_filter = filter;
}
#endif // ARCH_X86 || ARCH_X86_64
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
@ -55,7 +57,7 @@ int main(int argc, char **argv) {
append_negative_gtest_filter(":AVX.*:AVX/*");
if (!(simd_caps & HAS_AVX2))
append_negative_gtest_filter(":AVX2.*:AVX2/*");
#endif
#endif // ARCH_X86 || ARCH_X86_64
#if !CONFIG_SHARED
// Shared library builds don't support whitebox tests

View File

@ -19,8 +19,7 @@
// Macros
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
static double compute_psnr(const vpx_image_t *img1,
const vpx_image_t *img2) {
inline double compute_psnr(const vpx_image_t *img1, const vpx_image_t *img2) {
assert((img1->fmt == img2->fmt) &&
(img1->d_w == img2->d_w) &&
(img1->d_h == img2->d_h));

View File

@ -48,7 +48,7 @@ static std::string GetDataPath() {
#undef TO_STRING
#undef STRINGIFY
static FILE *OpenTestDataFile(const std::string& file_name) {
inline FILE *OpenTestDataFile(const std::string& file_name) {
const std::string path_to_source = GetDataPath() + "/" + file_name;
return fopen(path_to_source.c_str(), "rb");
}

112
test/vp10_dct_test.cc Normal file
View File

@ -0,0 +1,112 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <stdlib.h>
#include <new>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "test/util.h"
#include "./vpx_config.h"
#include "vpx_ports/msvc.h"
#undef CONFIG_COEFFICIENT_RANGE_CHECKING
#define CONFIG_COEFFICIENT_RANGE_CHECKING 1
#include "vp10/encoder/dct.c"
using libvpx_test::ACMRandom;
namespace {
void reference_dct_1d(const double *in, double *out, int size) {
const double PI = 3.141592653589793238462643383279502884;
const double kInvSqrt2 = 0.707106781186547524400844362104;
for (int k = 0; k < size; ++k) {
out[k] = 0;
for (int n = 0; n < size; ++n) {
out[k] += in[n] * cos(PI * (2 * n + 1) * k / (2 * size));
}
if (k == 0)
out[k] = out[k] * kInvSqrt2;
}
}
typedef void (*FdctFuncRef)(const double *in, double *out, int size);
typedef void (*IdctFuncRef)(const double *in, double *out, int size);
typedef void (*FdctFunc)(const tran_low_t *in, tran_low_t *out);
typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
class TransTestBase {
public:
virtual ~TransTestBase() {}
protected:
void RunFwdAccuracyCheck() {
tran_low_t *input = new tran_low_t[txfm_size_];
tran_low_t *output = new tran_low_t[txfm_size_];
double *ref_input = new double[txfm_size_];
double *ref_output = new double[txfm_size_];
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 5000;
for (int ti = 0; ti < count_test_block; ++ti) {
for (int ni = 0; ni < txfm_size_; ++ni) {
input[ni] = rnd.Rand8() - rnd.Rand8();
ref_input[ni] = static_cast<double>(input[ni]);
}
fwd_txfm_(input, output);
fwd_txfm_ref_(ref_input, ref_output, txfm_size_);
for (int ni = 0; ni < txfm_size_; ++ni) {
EXPECT_LE(
abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
max_error_);
}
}
delete[] input;
delete[] output;
delete[] ref_input;
delete[] ref_output;
}
double max_error_;
int txfm_size_;
FdctFunc fwd_txfm_;
FdctFuncRef fwd_txfm_ref_;
};
typedef std::tr1::tuple<FdctFunc, FdctFuncRef, int, int> FdctParam;
class Vp10FwdTxfm
: public TransTestBase,
public ::testing::TestWithParam<FdctParam> {
public:
virtual void SetUp() {
fwd_txfm_ = GET_PARAM(0);
fwd_txfm_ref_ = GET_PARAM(1);
txfm_size_ = GET_PARAM(2);
max_error_ = GET_PARAM(3);
}
virtual void TearDown() {}
};
TEST_P(Vp10FwdTxfm, RunFwdAccuracyCheck) {
RunFwdAccuracyCheck();
}
INSTANTIATE_TEST_CASE_P(
C, Vp10FwdTxfm,
::testing::Values(
FdctParam(&fdct4, &reference_dct_1d, 4, 1),
FdctParam(&fdct8, &reference_dct_1d, 8, 1),
FdctParam(&fdct16, &reference_dct_1d, 16, 2),
FdctParam(&fdct32, &reference_dct_1d, 32, 4)));
} // namespace

321
test/vp10_inv_txfm_test.cc Normal file
View File

@ -0,0 +1,321 @@
/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vp10_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
#include "vp10/common/blockd.h"
#include "vp10/common/scan.h"
#include "vpx/vpx_integer.h"
#include "vp10/common/vp10_inv_txfm.h"
using libvpx_test::ACMRandom;
namespace {
const double PI = 3.141592653589793238462643383279502884;
const double kInvSqrt2 = 0.707106781186547524400844362104;
void reference_idct_1d(const double *in, double *out, int size) {
for (int n = 0; n < size; ++n) {
out[n] = 0;
for (int k = 0; k < size; ++k) {
if (k == 0)
out[n] += kInvSqrt2 * in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
else
out[n] += in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
}
}
}
typedef void (*IdctFuncRef)(const double *in, double *out, int size);
typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
class TransTestBase {
public:
virtual ~TransTestBase() {}
protected:
void RunInvAccuracyCheck() {
tran_low_t *input = new tran_low_t[txfm_size_];
tran_low_t *output = new tran_low_t[txfm_size_];
double *ref_input = new double[txfm_size_];
double *ref_output = new double[txfm_size_];
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 5000;
for (int ti = 0; ti < count_test_block; ++ti) {
for (int ni = 0; ni < txfm_size_; ++ni) {
input[ni] = rnd.Rand8() - rnd.Rand8();
ref_input[ni] = static_cast<double>(input[ni]);
}
fwd_txfm_(input, output);
fwd_txfm_ref_(ref_input, ref_output, txfm_size_);
for (int ni = 0; ni < txfm_size_; ++ni) {
EXPECT_LE(
abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
max_error_);
}
}
delete[] input;
delete[] output;
delete[] ref_input;
delete[] ref_output;
}
double max_error_;
int txfm_size_;
IdctFunc fwd_txfm_;
IdctFuncRef fwd_txfm_ref_;
};
typedef std::tr1::tuple<IdctFunc, IdctFuncRef, int, int> IdctParam;
class Vp10InvTxfm
: public TransTestBase,
public ::testing::TestWithParam<IdctParam> {
public:
virtual void SetUp() {
fwd_txfm_ = GET_PARAM(0);
fwd_txfm_ref_ = GET_PARAM(1);
txfm_size_ = GET_PARAM(2);
max_error_ = GET_PARAM(3);
}
virtual void TearDown() {}
};
TEST_P(Vp10InvTxfm, RunInvAccuracyCheck) {
RunInvAccuracyCheck();
}
INSTANTIATE_TEST_CASE_P(
C, Vp10InvTxfm,
::testing::Values(
IdctParam(&vp10_idct4_c, &reference_idct_1d, 4, 1),
IdctParam(&vp10_idct8_c, &reference_idct_1d, 8, 2),
IdctParam(&vp10_idct16_c, &reference_idct_1d, 16, 4),
IdctParam(&vp10_idct32_c, &reference_idct_1d, 32, 6))
);
typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
typedef std::tr1::tuple<FwdTxfmFunc,
InvTxfmFunc,
InvTxfmFunc,
TX_SIZE, int> PartialInvTxfmParam;
const int kMaxNumCoeffs = 1024;
class Vp10PartialIDctTest
: public ::testing::TestWithParam<PartialInvTxfmParam> {
public:
virtual ~Vp10PartialIDctTest() {}
virtual void SetUp() {
ftxfm_ = GET_PARAM(0);
full_itxfm_ = GET_PARAM(1);
partial_itxfm_ = GET_PARAM(2);
tx_size_ = GET_PARAM(3);
last_nonzero_ = GET_PARAM(4);
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
int last_nonzero_;
TX_SIZE tx_size_;
FwdTxfmFunc ftxfm_;
InvTxfmFunc full_itxfm_;
InvTxfmFunc partial_itxfm_;
};
TEST_P(Vp10PartialIDctTest, RunQuantCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int size;
switch (tx_size_) {
case TX_4X4:
size = 4;
break;
case TX_8X8:
size = 8;
break;
case TX_16X16:
size = 16;
break;
case TX_32X32:
size = 32;
break;
default:
FAIL() << "Wrong Size!";
break;
}
DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
const int count_test_block = 1000;
const int block_size = size * size;
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
int max_error = 0;
for (int i = 0; i < count_test_block; ++i) {
// clear out destination buffer
memset(dst1, 0, sizeof(*dst1) * block_size);
memset(dst2, 0, sizeof(*dst2) * block_size);
memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
ACMRandom rnd(ACMRandom::DeterministicSeed());
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-255, 255].
if (i == 0) {
for (int j = 0; j < block_size; ++j)
input_extreme_block[j] = 255;
} else if (i == 1) {
for (int j = 0; j < block_size; ++j)
input_extreme_block[j] = -255;
} else {
for (int j = 0; j < block_size; ++j) {
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
}
}
ftxfm_(input_extreme_block, output_ref_block, size);
// quantization with maximum allowed step sizes
test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336;
for (int j = 1; j < last_nonzero_; ++j)
test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]]
= (output_ref_block[j] / 1828) * 1828;
}
ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
for (int j = 0; j < block_size; ++j) {
const int diff = dst1[j] - dst2[j];
const int error = diff * diff;
if (max_error < error)
max_error = error;
}
}
EXPECT_EQ(0, max_error)
<< "Error: partial inverse transform produces different results";
}
TEST_P(Vp10PartialIDctTest, ResultsMatch) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int size;
switch (tx_size_) {
case TX_4X4:
size = 4;
break;
case TX_8X8:
size = 8;
break;
case TX_16X16:
size = 16;
break;
case TX_32X32:
size = 32;
break;
default:
FAIL() << "Wrong Size!";
break;
}
DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
const int count_test_block = 1000;
const int max_coeff = 32766 / 4;
const int block_size = size * size;
int max_error = 0;
for (int i = 0; i < count_test_block; ++i) {
// clear out destination buffer
memset(dst1, 0, sizeof(*dst1) * block_size);
memset(dst2, 0, sizeof(*dst2) * block_size);
memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
int max_energy_leftover = max_coeff * max_coeff;
for (int j = 0; j < last_nonzero_; ++j) {
int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
(rnd.Rand16() - 32768) / 65536);
max_energy_leftover -= coef * coef;
if (max_energy_leftover < 0) {
max_energy_leftover = 0;
coef = 0;
}
test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] = coef;
}
memcpy(test_coef_block2, test_coef_block1,
sizeof(*test_coef_block2) * block_size);
ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
for (int j = 0; j < block_size; ++j) {
const int diff = dst1[j] - dst2[j];
const int error = diff * diff;
if (max_error < error)
max_error = error;
}
}
EXPECT_EQ(0, max_error)
<< "Error: partial inverse transform produces different results";
}
using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P(
C, Vp10PartialIDctTest,
::testing::Values(
make_tuple(&vpx_fdct32x32_c,
&vp10_idct32x32_1024_add_c,
&vp10_idct32x32_34_add_c,
TX_32X32, 34),
make_tuple(&vpx_fdct32x32_c,
&vp10_idct32x32_1024_add_c,
&vp10_idct32x32_1_add_c,
TX_32X32, 1),
make_tuple(&vpx_fdct16x16_c,
&vp10_idct16x16_256_add_c,
&vp10_idct16x16_10_add_c,
TX_16X16, 10),
make_tuple(&vpx_fdct16x16_c,
&vp10_idct16x16_256_add_c,
&vp10_idct16x16_1_add_c,
TX_16X16, 1),
make_tuple(&vpx_fdct8x8_c,
&vp10_idct8x8_64_add_c,
&vp10_idct8x8_12_add_c,
TX_8X8, 12),
make_tuple(&vpx_fdct8x8_c,
&vp10_idct8x8_64_add_c,
&vp10_idct8x8_1_add_c,
TX_8X8, 1),
make_tuple(&vpx_fdct4x4_c,
&vp10_idct4x4_16_add_c,
&vp10_idct4x4_1_add_c,
TX_4X4, 1)));
} // namespace

View File

@ -14,38 +14,12 @@
#include "test/encode_test_driver.h"
#include "test/util.h"
#include "test/y4m_video_source.h"
#include "test/yuv_video_source.h"
#include "vp9/decoder/vp9_decoder.h"
typedef vpx_codec_stream_info_t vp9_stream_info_t;
struct vpx_codec_alg_priv {
vpx_codec_priv_t base;
vpx_codec_dec_cfg_t cfg;
vp9_stream_info_t si;
struct VP9Decoder *pbi;
int postproc_cfg_set;
vp8_postproc_cfg_t postproc_cfg;
vpx_decrypt_cb decrypt_cb;
void *decrypt_state;
vpx_image_t img;
int img_avail;
int flushed;
int invert_tile_order;
int frame_parallel_decode;
// External frame buffer info to save for VP9 common.
void *ext_priv; // Private data associated with the external frame buffers.
vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb;
vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
};
static vpx_codec_alg_priv_t *get_alg_priv(vpx_codec_ctx_t *ctx) {
return (vpx_codec_alg_priv_t *)ctx->priv;
}
#include "vp9/vp9_dx_iface.c"
namespace {
const unsigned int kFramerate = 50;
const int kCpuUsed = 2;
struct EncodePerfTestVideo {
@ -66,35 +40,26 @@ struct EncodeParameters {
int32_t lossless;
int32_t error_resilient;
int32_t frame_parallel;
int32_t color_range;
vpx_color_space_t cs;
// TODO(JBB): quantizers / bitrate
};
const EncodeParameters kVP9EncodeParameterSet[] = {
{0, 0, 0, 1, 0, VPX_CS_BT_601},
{0, 0, 0, 0, 0, VPX_CS_BT_709},
{0, 0, 1, 0, 0, VPX_CS_BT_2020},
{0, 2, 0, 0, 1, VPX_CS_UNKNOWN},
// TODO(JBB): Test profiles (requires more work).
{0, 0, 0, 1, 0, 0, VPX_CS_BT_601},
{0, 0, 0, 0, 0, 1, VPX_CS_BT_709},
{0, 0, 1, 0, 0, 1, VPX_CS_BT_2020},
{0, 2, 0, 0, 1, 0, VPX_CS_UNKNOWN},
// TODO(JBB): Test profiles (requires more work).
};
int is_extension_y4m(const char *filename) {
const char *dot = strrchr(filename, '.');
if (!dot || dot == filename)
return 0;
else
return !strcmp(dot, ".y4m");
}
class VpxEncoderParmsGetToDecoder
: public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith2Params<EncodeParameters, \
public ::libvpx_test::CodecTestWith2Params<EncodeParameters,
EncodePerfTestVideo> {
protected:
VpxEncoderParmsGetToDecoder()
: EncoderTest(GET_PARAM(0)),
encode_parms(GET_PARAM(1)) {
}
: EncoderTest(GET_PARAM(0)), encode_parms(GET_PARAM(1)) {}
virtual ~VpxEncoderParmsGetToDecoder() {}
@ -112,6 +77,7 @@ class VpxEncoderParmsGetToDecoder
::libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {
encoder->Control(VP9E_SET_COLOR_SPACE, encode_parms.cs);
encoder->Control(VP9E_SET_COLOR_RANGE, encode_parms.color_range);
encoder->Control(VP9E_SET_LOSSLESS, encode_parms.lossless);
encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING,
encode_parms.frame_parallel);
@ -126,33 +92,34 @@ class VpxEncoderParmsGetToDecoder
}
virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
const libvpx_test::VideoSource& video,
const libvpx_test::VideoSource &video,
libvpx_test::Decoder *decoder) {
vpx_codec_ctx_t* vp9_decoder = decoder->GetDecoder();
vpx_codec_alg_priv_t* priv =
(vpx_codec_alg_priv_t*) get_alg_priv(vp9_decoder);
VP9Decoder* pbi = priv->pbi;
VP9_COMMON* common = &pbi->common;
vpx_codec_ctx_t *const vp9_decoder = decoder->GetDecoder();
vpx_codec_alg_priv_t *const priv =
reinterpret_cast<vpx_codec_alg_priv_t *>(vp9_decoder->priv);
FrameWorkerData *const worker_data =
reinterpret_cast<FrameWorkerData *>(priv->frame_workers[0].data1);
VP9_COMMON *const common = &worker_data->pbi->common;
if (encode_parms.lossless) {
EXPECT_EQ(common->base_qindex, 0);
EXPECT_EQ(common->y_dc_delta_q, 0);
EXPECT_EQ(common->uv_dc_delta_q, 0);
EXPECT_EQ(common->uv_ac_delta_q, 0);
EXPECT_EQ(common->tx_mode, ONLY_4X4);
EXPECT_EQ(0, common->base_qindex);
EXPECT_EQ(0, common->y_dc_delta_q);
EXPECT_EQ(0, common->uv_dc_delta_q);
EXPECT_EQ(0, common->uv_ac_delta_q);
EXPECT_EQ(ONLY_4X4, common->tx_mode);
}
EXPECT_EQ(common->error_resilient_mode, encode_parms.error_resilient);
EXPECT_EQ(encode_parms.error_resilient, common->error_resilient_mode);
if (encode_parms.error_resilient) {
EXPECT_EQ(common->frame_parallel_decoding_mode, 1);
EXPECT_EQ(common->use_prev_frame_mvs, 0);
EXPECT_EQ(1, common->frame_parallel_decoding_mode);
EXPECT_EQ(0, common->use_prev_frame_mvs);
} else {
EXPECT_EQ(common->frame_parallel_decoding_mode,
encode_parms.frame_parallel);
EXPECT_EQ(encode_parms.frame_parallel,
common->frame_parallel_decoding_mode);
}
EXPECT_EQ(common->color_space, encode_parms.cs);
EXPECT_EQ(common->log2_tile_cols, encode_parms.tile_cols);
EXPECT_EQ(common->log2_tile_rows, encode_parms.tile_rows);
EXPECT_EQ(encode_parms.color_range, common->color_range);
EXPECT_EQ(encode_parms.cs, common->color_space);
EXPECT_EQ(encode_parms.tile_cols, common->log2_tile_cols);
EXPECT_EQ(encode_parms.tile_rows, common->log2_tile_rows);
EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
return VPX_CODEC_OK == res_dec;
@ -164,35 +131,18 @@ class VpxEncoderParmsGetToDecoder
EncodeParameters encode_parms;
};
// TODO(hkuang): This test conflicts with frame parallel decode. So disable it
// for now until fix.
TEST_P(VpxEncoderParmsGetToDecoder, DISABLED_BitstreamParms) {
TEST_P(VpxEncoderParmsGetToDecoder, BitstreamParms) {
init_flags_ = VPX_CODEC_USE_PSNR;
libvpx_test::VideoSource *video;
if (is_extension_y4m(test_video_.name)) {
video = new libvpx_test::Y4mVideoSource(test_video_.name,
0, test_video_.frames);
} else {
video = new libvpx_test::YUVVideoSource(test_video_.name,
VPX_IMG_FMT_I420,
test_video_.width,
test_video_.height,
kFramerate, 1, 0,
test_video_.frames);
}
libvpx_test::VideoSource *const video =
new libvpx_test::Y4mVideoSource(test_video_.name, 0, test_video_.frames);
ASSERT_TRUE(video != NULL);
ASSERT_NO_FATAL_FAILURE(RunLoop(video));
delete(video);
delete video;
}
VP9_INSTANTIATE_TEST_CASE(
VpxEncoderParmsGetToDecoder,
::testing::ValuesIn(kVP9EncodeParameterSet),
::testing::ValuesIn(kVP9EncodePerfTestVectors));
VP10_INSTANTIATE_TEST_CASE(
VpxEncoderParmsGetToDecoder,
::testing::ValuesIn(kVP9EncodeParameterSet),
::testing::ValuesIn(kVP9EncodePerfTestVectors));
VP9_INSTANTIATE_TEST_CASE(VpxEncoderParmsGetToDecoder,
::testing::ValuesIn(kVP9EncodeParameterSet),
::testing::ValuesIn(kVP9EncodePerfTestVectors));
} // namespace

View File

@ -1,7 +1,10 @@
URL: https://chromium.googlesource.com/webm/libwebm
Version: 2dec09426ab62b794464cc9971bd135b4d313e65
Version: 476366249e1fda7710a389cd41c57db42305e0d4
License: BSD
License File: LICENSE.txt
Description:
libwebm is used to handle WebM container I/O.
Local Changes:
* <none>

View File

@ -528,7 +528,7 @@ class Tracks {
public:
// Audio and video type defined by the Matroska specs.
enum { kVideo = 0x1, kAudio = 0x2 };
// Opus, Vorbis, VP8, and VP9 codec ids defined by the Matroska specs.
static const char kOpusCodecId[];
static const char kVorbisCodecId[];
static const char kVp8CodecId[];

File diff suppressed because it is too large Load Diff

View File

@ -9,12 +9,13 @@
#ifndef MKVPARSER_HPP
#define MKVPARSER_HPP
#include <cstdlib>
#include <cstdio>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
namespace mkvparser {
const int E_PARSE_FAILED = -1;
const int E_FILE_FORMAT_INVALID = -2;
const int E_BUFFER_NOT_FULL = -3;
@ -27,8 +28,11 @@ class IMkvReader {
virtual ~IMkvReader();
};
template<typename Type> Type* SafeArrayAlloc(unsigned long long num_elements,
unsigned long long element_size);
long long GetUIntLength(IMkvReader*, long long, long&);
long long ReadUInt(IMkvReader*, long long, long&);
long long ReadID(IMkvReader* pReader, long long pos, long& len);
long long UnserializeUInt(IMkvReader*, long long pos, long long size);
long UnserializeFloat(IMkvReader*, long long pos, long long size, double&);
@ -833,7 +837,7 @@ class Cues {
private:
bool Init() const;
void PreloadCuePoint(long&, long long) const;
bool PreloadCuePoint(long&, long long) const;
mutable CuePoint** m_cue_points;
mutable long m_count;
@ -999,8 +1003,8 @@ class Segment {
long DoLoadClusterUnknownSize(long long&, long&);
long DoParseNext(const Cluster*&, long long&, long&);
void AppendCluster(Cluster*);
void PreloadCluster(Cluster*, ptrdiff_t);
bool AppendCluster(Cluster*);
bool PreloadCluster(Cluster*, ptrdiff_t);
// void ParseSeekHead(long long pos, long long size);
// void ParseSeekEntry(long long pos, long long size);

View File

@ -41,6 +41,7 @@ enum MkvId {
kMkvTimecodeScale = 0x2AD7B1,
kMkvDuration = 0x4489,
kMkvDateUTC = 0x4461,
kMkvTitle = 0x7BA9,
kMkvMuxingApp = 0x4D80,
kMkvWritingApp = 0x5741,
// Cluster
@ -107,9 +108,16 @@ enum MkvId {
kMkvContentEncodingOrder = 0x5031,
kMkvContentEncodingScope = 0x5032,
kMkvContentEncodingType = 0x5033,
kMkvContentCompression = 0x5034,
kMkvContentCompAlgo = 0x4254,
kMkvContentCompSettings = 0x4255,
kMkvContentEncryption = 0x5035,
kMkvContentEncAlgo = 0x47E1,
kMkvContentEncKeyID = 0x47E2,
kMkvContentSignature = 0x47E3,
kMkvContentSigKeyID = 0x47E4,
kMkvContentSigAlgo = 0x47E5,
kMkvContentSigHashAlgo = 0x47E6,
kMkvContentEncAESSettings = 0x47E7,
kMkvAESSettingsCipherMode = 0x47E8,
kMkvAESSettingsCipherInitData = 0x47E9,

View File

@ -20,3 +20,5 @@ Copy PIC 'GLOBAL' macros from x86_abi_support.asm
Use .text instead of .rodata on macho to avoid broken tables in PIC mode.
Use .text with no alignment for aout
Only use 'hidden' visibility with Chromium
Move '%use smartalign' for nasm out of 'INIT_CPUFLAGS' and before
'ALIGNMODE'.

View File

@ -876,6 +876,10 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
%ifdef __NASM_VER__
%use smartalign
%endif
; Takes an arbitrary number of cpuflags from the above list.
; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
@ -912,7 +916,6 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endif
%ifdef __NASM_VER__
%use smartalign
ALIGNMODE k7
%elif ARCH_X86_64 || cpuflag(sse2)
CPU amdnop

View File

@ -14,6 +14,7 @@
#include "./vpx_config.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
#include "vpx_scale/yv12config.h"
@ -69,6 +70,9 @@ typedef struct {
PREDICTION_MODE mode;
TX_SIZE tx_size;
int8_t skip;
#if CONFIG_MISC_FIXES
int8_t has_no_coeffs;
#endif
int8_t segment_id;
int8_t seg_id_predicted; // valid only when temporal_update is enabled
@ -178,7 +182,6 @@ typedef struct macroblockd {
int mb_to_bottom_edge;
FRAME_CONTEXT *fc;
int frame_parallel_decoding_mode;
/* pointers to reference frames */
RefBuffer *block_refs[2];
@ -286,7 +289,7 @@ static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize,
return TX_4X4;
} else {
const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss];
return MIN(y_tx_size, max_txsize_lookup[plane_bsize]);
return VPXMIN(y_tx_size, max_txsize_lookup[plane_bsize]);
}
}

View File

@ -13,6 +13,7 @@
#include "vp10/common/enums.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
#ifdef __cplusplus
extern "C" {
@ -35,7 +36,7 @@ static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] =
static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8};
// MIN(3, MIN(b_width_log2(bsize), b_height_log2(bsize)))
// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize)))
static const uint8_t size_group_lookup[BLOCK_SIZES] =
{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3};

View File

@ -484,12 +484,12 @@ void vp10_setup_past_independence(VP10_COMMON *cm) {
vp10_init_mv_probs(cm);
cm->fc->initialized = 1;
if (cm->frame_type == KEY_FRAME ||
cm->error_resilient_mode || cm->reset_frame_context == 3) {
if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL) {
// Reset all frame contexts.
for (i = 0; i < FRAME_CONTEXTS; ++i)
cm->frame_contexts[i] = *cm->fc;
} else if (cm->reset_frame_context == 2) {
} else if (cm->reset_frame_context == RESET_FRAME_CONTEXT_CURRENT) {
// Reset only the frame context specified in the frame header.
cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
}
@ -499,7 +499,5 @@ void vp10_setup_past_independence(VP10_COMMON *cm) {
memset(cm->prev_mip, 0,
cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip));
vp10_zero(cm->ref_frame_sign_bias);
cm->frame_context_idx = 0;
}

View File

@ -161,17 +161,19 @@ static void inc_mv_component(int v, nmv_component_counts *comp_counts,
}
}
void vp10_inc_mv(const MV *mv, nmv_context_counts *counts) {
void vp10_inc_mv(const MV *mv, nmv_context_counts *counts, const int usehp) {
if (counts != NULL) {
const MV_JOINT_TYPE j = vp10_get_mv_joint(mv);
++counts->joints[j];
if (mv_joint_vertical(j)) {
inc_mv_component(mv->row, &counts->comps[0], 1, 1);
inc_mv_component(mv->row, &counts->comps[0], 1,
!CONFIG_MISC_FIXES || usehp);
}
if (mv_joint_horizontal(j)) {
inc_mv_component(mv->col, &counts->comps[1], 1, 1);
inc_mv_component(mv->col, &counts->comps[1], 1,
!CONFIG_MISC_FIXES || usehp);
}
}
}

View File

@ -124,7 +124,7 @@ typedef struct {
nmv_component_counts comps[2];
} nmv_context_counts;
void vp10_inc_mv(const MV *mv, nmv_context_counts *mvctx);
void vp10_inc_mv(const MV *mv, nmv_context_counts *mvctx, const int usehp);
#ifdef __cplusplus
} // extern "C"

View File

@ -13,6 +13,7 @@
#include "vp10/common/loopfilter.h"
#include "vp10/common/onyxc_int.h"
#include "vp10/common/reconinter.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@ -753,8 +754,13 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
// If the block has no coefficients and is not intra we skip applying
// the loop filter on block edges.
#if CONFIG_MISC_FIXES
if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi))
return;
#else
if (mbmi->skip && is_inter_block(mbmi))
return;
#endif
// Here we are adding a mask for the transform size. The transform
// size mask is set to be correct for a 64x64 prediction block size. We
@ -811,8 +817,13 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
*above_y |= above_prediction_mask[block_size] << shift_y;
*left_y |= left_prediction_mask[block_size] << shift_y;
#if CONFIG_MISC_FIXES
if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi))
return;
#else
if (mbmi->skip && is_inter_block(mbmi))
return;
#endif
*above_y |= (size_mask[block_size] &
above_64x64_txform_mask[tx_size_y]) << shift_y;
@ -1588,7 +1599,7 @@ void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
if (partial_frame && cm->mi_rows > 8) {
start_mi_row = cm->mi_rows >> 1;
start_mi_row &= 0xfffffff8;
mi_rows_to_filter = MAX(cm->mi_rows / 8, 8);
mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
}
end_mi_row = start_mi_row + mi_rows_to_filter;
vp10_loop_filter_frame_init(cm, frame_filter_level);

View File

@ -125,8 +125,10 @@ static void find_mv_refs_idx(const VP10_COMMON *cm, const MACROBLOCKD *xd,
}
if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
prev_frame_mvs->ref_frame[1] != ref_frame &&
prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) {
#if !CONFIG_MISC_FIXES
prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int &&
#endif
prev_frame_mvs->ref_frame[1] != ref_frame) {
int_mv mv = prev_frame_mvs->mv[1];
if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] !=
ref_sign_bias[ref_frame]) {

View File

@ -180,8 +180,9 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \
refmv_count, mv_ref_list, Done); \
if (has_second_ref(mbmi) && \
(mbmi)->ref_frame[1] != ref_frame && \
(mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
(CONFIG_MISC_FIXES || \
(mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) && \
(mbmi)->ref_frame[1] != ref_frame) \
ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \
refmv_count, mv_ref_list, Done); \
} \

View File

@ -57,6 +57,29 @@ typedef enum {
REFERENCE_MODES = 3,
} REFERENCE_MODE;
typedef enum {
RESET_FRAME_CONTEXT_NONE = 0,
RESET_FRAME_CONTEXT_CURRENT = 1,
RESET_FRAME_CONTEXT_ALL = 2,
} RESET_FRAME_CONTEXT_MODE;
typedef enum {
/**
* Don't update frame context
*/
REFRESH_FRAME_CONTEXT_OFF,
/**
* Update frame context to values resulting from forward probability
* updates signaled in the frame header
*/
REFRESH_FRAME_CONTEXT_FORWARD,
/**
* Update frame context to values resulting from backward probability
* updates based on entropy/counts in the decoded frame
*/
REFRESH_FRAME_CONTEXT_BACKWARD,
} REFRESH_FRAME_CONTEXT_MODE;
typedef struct {
int_mv mv[2];
MV_REFERENCE_FRAME ref_frame[2];
@ -106,6 +129,7 @@ typedef struct BufferPool {
typedef struct VP10Common {
struct vpx_internal_error_info error;
vpx_color_space_t color_space;
int color_range;
int width;
int height;
int display_width;
@ -161,10 +185,8 @@ typedef struct VP10Common {
int allow_high_precision_mv;
// Flag signaling that the frame context should be reset to default values.
// 0 or 1 implies don't reset, 2 reset just the context specified in the
// frame header, 3 reset all contexts.
int reset_frame_context;
// Flag signaling which frame contexts should be reset to default values.
RESET_FRAME_CONTEXT_MODE reset_frame_context;
// MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in
// MODE_INFO (8-pixel) units.
@ -222,15 +244,15 @@ typedef struct VP10Common {
loop_filter_info_n lf_info;
int refresh_frame_context; /* Two state 0 = NO, 1 = YES */
// Flag signaling how frame contexts should be updated at the end of
// a frame decode
REFRESH_FRAME_CONTEXT_MODE refresh_frame_context;
int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
struct loopfilter lf;
struct segmentation seg;
// TODO(hkuang): Remove this as it is the same as frame_parallel_decode
// in pbi.
int frame_parallel_decode; // frame-based threading.
// Context probabilities for reference frame prediction
@ -255,7 +277,6 @@ typedef struct VP10Common {
#endif
int error_resilient_mode;
int frame_parallel_decoding_mode;
int log2_tile_cols, log2_tile_rows;
int byte_alignment;
@ -370,7 +391,6 @@ static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd,
memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant));
}
xd->fc = cm->fc;
xd->frame_parallel_decoding_mode = cm->frame_parallel_decoding_mode;
}
xd->above_seg_context = cm->above_seg_context;

View File

@ -16,6 +16,7 @@
#include "./vpx_scale_rtcd.h"
#include "./vp10_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/system_state.h"
#include "vpx_scale/vpx_scale.h"
@ -625,7 +626,7 @@ static void swap_mi_and_prev_mi(VP10_COMMON *cm) {
int vp10_post_proc_frame(struct VP10Common *cm,
YV12_BUFFER_CONFIG *dest, vp10_ppflags_t *ppflags) {
const int q = MIN(105, cm->lf.filter_level * 2);
const int q = VPXMIN(105, cm->lf.filter_level * 2);
const int flags = ppflags->post_proc_flag;
YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer;
struct postproc_state *const ppstate = &cm->postproc_state;

View File

@ -13,6 +13,7 @@
#include "vp10/common/blockd.h"
#include "vp10/common/onyxc_int.h"
#include "vpx_dsp/vpx_dsp_common.h"
#ifdef __cplusplus
extern "C" {
@ -24,14 +25,14 @@ static INLINE int get_segment_id(const VP10_COMMON *cm,
const int mi_offset = mi_row * cm->mi_cols + mi_col;
const int bw = num_8x8_blocks_wide_lookup[bsize];
const int bh = num_8x8_blocks_high_lookup[bsize];
const int xmis = MIN(cm->mi_cols - mi_col, bw);
const int ymis = MIN(cm->mi_rows - mi_row, bh);
const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
int x, y, segment_id = MAX_SEGMENTS;
for (y = 0; y < ymis; ++y)
for (x = 0; x < xmis; ++x)
segment_id = MIN(segment_id,
segment_ids[mi_offset + y * cm->mi_cols + x]);
segment_id =
VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]);
assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
return segment_id;

View File

@ -135,20 +135,26 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
const int mi_x = mi_col * MI_SIZE;
const int mi_y = mi_row * MI_SIZE;
for (plane = plane_from; plane <= plane_to; ++plane) {
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
&xd->plane[plane]);
const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
const int bw = 4 * num_4x4_w;
const int bh = 4 * num_4x4_h;
const struct macroblockd_plane *pd = &xd->plane[plane];
const int bw = 4 * num_4x4_blocks_wide_lookup[bsize] >> pd->subsampling_x;
const int bh = 4 * num_4x4_blocks_high_lookup[bsize] >> pd->subsampling_y;
if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
int i = 0, x, y;
const PARTITION_TYPE bp = bsize - xd->mi[0]->mbmi.sb_type;
const int have_vsplit = bp != PARTITION_HORZ;
const int have_hsplit = bp != PARTITION_VERT;
const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
const int pw = 8 >> (have_vsplit | pd->subsampling_x);
const int ph = 8 >> (have_hsplit | pd->subsampling_y);
int x, y;
assert(bp != PARTITION_NONE && bp < PARTITION_TYPES);
assert(bsize == BLOCK_8X8);
assert(pw * num_4x4_w == bw && ph * num_4x4_h == bh);
for (y = 0; y < num_4x4_h; ++y)
for (x = 0; x < num_4x4_w; ++x)
build_inter_predictors(xd, plane, i++, bw, bh,
4 * x, 4 * y, 4, 4, mi_x, mi_y);
build_inter_predictors(xd, plane, y * 2 + x, bw, bh,
4 * x, 4 * y, pw, ph, mi_x, mi_y);
} else {
build_inter_predictors(xd, plane, 0, bw, bh,
0, 0, bw, bh, mi_x, mi_y);

View File

@ -34,14 +34,14 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
}
#if CONFIG_VP9_HIGHBITDEPTH
static void high_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int subpel_x,
const int subpel_y,
const struct scale_factors *sf,
int w, int h, int ref,
const InterpKernel *kernel,
int xs, int ys, int bd) {
static INLINE void high_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int subpel_x,
const int subpel_y,
const struct scale_factors *sf,
int w, int h, int ref,
const InterpKernel *kernel,
int xs, int ys, int bd) {
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
src, src_stride, dst, dst_stride,
kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
@ -77,8 +77,9 @@ static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) {
}
// TODO(jkoleszar): yet another mv clamping function :-(
static MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv,
int bw, int bh, int ss_x, int ss_y) {
static INLINE MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd,
const MV *src_mv,
int bw, int bh, int ss_x, int ss_y) {
// If the MV points so far into the UMV border that no visible pixels
// are used for reconstruction, the subpel part of the MV can be
// discarded and the MV limited to 16 pixels with equivalent results.
@ -102,8 +103,8 @@ static MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv,
return clamped_mv;
}
static MV average_split_mvs(const struct macroblockd_plane *pd,
const MODE_INFO *mi, int ref, int block) {
static INLINE MV average_split_mvs(const struct macroblockd_plane *pd,
const MODE_INFO *mi, int ref, int block) {
const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0);
MV res = {0, 0};
switch (ss_idx) {

View File

@ -695,6 +695,13 @@ DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_32x32[1024]) = {
1023,
};
const scan_order vp10_default_scan_orders[TX_SIZES] = {
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
};
#if CONFIG_EXT_TX
const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ // TX_4X4

View File

@ -29,6 +29,7 @@ typedef struct {
const int16_t *neighbors;
} scan_order;
extern const scan_order vp10_default_scan_orders[TX_SIZES];
extern const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES];
static INLINE int get_coef_context(const int16_t *neighbors,

View File

@ -9,6 +9,7 @@
*/
#include "./vpx_config.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vp10/common/entropymode.h"
#include "vp10/common/thread_common.h"
@ -165,7 +166,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame,
// Decoder may allocate more threads than number of tiles based on user's
// input.
const int tile_cols = 1 << cm->log2_tile_cols;
const int num_workers = MIN(nworkers, tile_cols);
const int num_workers = VPXMIN(nworkers, tile_cols);
int i;
if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
@ -229,7 +230,7 @@ void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
if (partial_frame && cm->mi_rows > 8) {
start_mi_row = cm->mi_rows >> 1;
start_mi_row &= 0xfffffff8;
mi_rows_to_filter = MAX(cm->mi_rows / 8, 8);
mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
}
end_mi_row = start_mi_row + mi_rows_to_filter;
vp10_loop_filter_frame_init(cm, frame_filter_level);

View File

@ -14,6 +14,10 @@
#include "vp10/common/loopfilter.h"
#include "vpx_util/vpx_thread.h"
#ifdef __cplusplus
extern "C" {
#endif
struct VP10Common;
struct FRAME_COUNTS;
@ -54,4 +58,8 @@ void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
void vp10_accumulate_frame_counts(struct VP10Common *cm,
struct FRAME_COUNTS *counts, int is_dec);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP10_COMMON_LOOPFILTER_THREAD_H_

View File

@ -9,8 +9,8 @@
*/
#include "vp10/common/tile_common.h"
#include "vp10/common/onyxc_int.h"
#include "vpx_dsp/vpx_dsp_common.h"
#define MIN_TILE_WIDTH_B64 4
#define MAX_TILE_WIDTH_B64 64
@ -18,7 +18,7 @@
static int get_tile_offset(int idx, int mis, int log2) {
const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2;
const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2;
return MIN(offset, mis);
return VPXMIN(offset, mis);
}
void vp10_tile_set_row(TileInfo *tile, const VP10_COMMON *cm, int row) {

824
vp10/common/vp10_fwd_txfm.c Normal file
View File

@ -0,0 +1,824 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp10/common/vp10_fwd_txfm.h"
void vp10_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we transpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int pass;
// We need an intermediate buffer between passes.
tran_low_t intermediate[4 * 4];
const int16_t *in_pass0 = input;
const tran_low_t *in = NULL;
tran_low_t *out = intermediate;
// Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) {
tran_high_t input[4]; // canbe16
tran_high_t step[4]; // canbe16
tran_high_t temp1, temp2; // needs32
int i;
for (i = 0; i < 4; ++i) {
// Load inputs.
if (0 == pass) {
input[0] = in_pass0[0 * stride] * 16;
input[1] = in_pass0[1 * stride] * 16;
input[2] = in_pass0[2 * stride] * 16;
input[3] = in_pass0[3 * stride] * 16;
if (i == 0 && input[0]) {
input[0] += 1;
}
} else {
input[0] = in[0 * 4];
input[1] = in[1 * 4];
input[2] = in[2 * 4];
input[3] = in[3 * 4];
}
// Transform.
step[0] = input[0] + input[3];
step[1] = input[1] + input[2];
step[2] = input[1] - input[2];
step[3] = input[0] - input[3];
temp1 = (step[0] + step[1]) * cospi_16_64;
temp2 = (step[0] - step[1]) * cospi_16_64;
out[0] = (tran_low_t)fdct_round_shift(temp1);
out[2] = (tran_low_t)fdct_round_shift(temp2);
temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
out[1] = (tran_low_t)fdct_round_shift(temp1);
out[3] = (tran_low_t)fdct_round_shift(temp2);
// Do next column (which is a transposed row in second/horizontal pass)
in_pass0++;
in++;
out += 4;
}
// Setup in/out for next pass.
in = intermediate;
out = output;
}
{
int i, j;
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
}
}
}
void vp10_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) {
int r, c;
tran_low_t sum = 0;
for (r = 0; r < 4; ++r)
for (c = 0; c < 4; ++c)
sum += input[r * stride + c];
output[0] = sum << 1;
output[1] = 0;
}
void vp10_fdct8x8_c(const int16_t *input,
tran_low_t *final_output, int stride) {
int i, j;
tran_low_t intermediate[64];
int pass;
tran_low_t *output = intermediate;
const tran_low_t *in = NULL;
// Transform columns
for (pass = 0; pass < 2; ++pass) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16
int i;
for (i = 0; i < 8; i++) {
// stage 1
if (pass == 0) {
s0 = (input[0 * stride] + input[7 * stride]) * 4;
s1 = (input[1 * stride] + input[6 * stride]) * 4;
s2 = (input[2 * stride] + input[5 * stride]) * 4;
s3 = (input[3 * stride] + input[4 * stride]) * 4;
s4 = (input[3 * stride] - input[4 * stride]) * 4;
s5 = (input[2 * stride] - input[5 * stride]) * 4;
s6 = (input[1 * stride] - input[6 * stride]) * 4;
s7 = (input[0 * stride] - input[7 * stride]) * 4;
++input;
} else {
s0 = in[0 * 8] + in[7 * 8];
s1 = in[1 * 8] + in[6 * 8];
s2 = in[2 * 8] + in[5 * 8];
s3 = in[3 * 8] + in[4 * 8];
s4 = in[3 * 8] - in[4 * 8];
s5 = in[2 * 8] - in[5 * 8];
s6 = in[1 * 8] - in[6 * 8];
s7 = in[0 * 8] - in[7 * 8];
++in;
}
// fdct4(step, step);
x0 = s0 + s3;
x1 = s1 + s2;
x2 = s1 - s2;
x3 = s0 - s3;
t0 = (x0 + x1) * cospi_16_64;
t1 = (x0 - x1) * cospi_16_64;
t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
output[0] = (tran_low_t)fdct_round_shift(t0);
output[2] = (tran_low_t)fdct_round_shift(t2);
output[4] = (tran_low_t)fdct_round_shift(t1);
output[6] = (tran_low_t)fdct_round_shift(t3);
// Stage 2
t0 = (s6 - s5) * cospi_16_64;
t1 = (s6 + s5) * cospi_16_64;
t2 = fdct_round_shift(t0);
t3 = fdct_round_shift(t1);
// Stage 3
x0 = s4 + t2;
x1 = s4 - t2;
x2 = s7 - t3;
x3 = s7 + t3;
// Stage 4
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
output[1] = (tran_low_t)fdct_round_shift(t0);
output[3] = (tran_low_t)fdct_round_shift(t2);
output[5] = (tran_low_t)fdct_round_shift(t1);
output[7] = (tran_low_t)fdct_round_shift(t3);
output += 8;
}
in = intermediate;
output = final_output;
}
// Rows
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
final_output[j + i * 8] /= 2;
}
}
void vp10_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) {
int r, c;
tran_low_t sum = 0;
for (r = 0; r < 8; ++r)
for (c = 0; c < 8; ++c)
sum += input[r * stride + c];
output[0] = sum;
output[1] = 0;
}
void vp10_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we transpose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int pass;
// We need an intermediate buffer between passes.
tran_low_t intermediate[256];
const int16_t *in_pass0 = input;
const tran_low_t *in = NULL;
tran_low_t *out = intermediate;
// Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) {
tran_high_t step1[8]; // canbe16
tran_high_t step2[8]; // canbe16
tran_high_t step3[8]; // canbe16
tran_high_t input[8]; // canbe16
tran_high_t temp1, temp2; // needs32
int i;
for (i = 0; i < 16; i++) {
if (0 == pass) {
// Calculate input for the first 8 results.
input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) * 4;
input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) * 4;
input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) * 4;
input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4;
input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4;
input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4;
input[6] = (in_pass0[6 * stride] + in_pass0[ 9 * stride]) * 4;
input[7] = (in_pass0[7 * stride] + in_pass0[ 8 * stride]) * 4;
// Calculate input for the next 8 results.
step1[0] = (in_pass0[7 * stride] - in_pass0[ 8 * stride]) * 4;
step1[1] = (in_pass0[6 * stride] - in_pass0[ 9 * stride]) * 4;
step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4;
step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4;
step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4;
step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) * 4;
step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) * 4;
step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) * 4;
} else {
// Calculate input for the first 8 results.
input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2);
input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2);
// Calculate input for the next 8 results.
step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2);
step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2);
step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
}
// Work on the first eight values; fdct8(input, even_results);
{
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16
// stage 1
s0 = input[0] + input[7];
s1 = input[1] + input[6];
s2 = input[2] + input[5];
s3 = input[3] + input[4];
s4 = input[3] - input[4];
s5 = input[2] - input[5];
s6 = input[1] - input[6];
s7 = input[0] - input[7];
// fdct4(step, step);
x0 = s0 + s3;
x1 = s1 + s2;
x2 = s1 - s2;
x3 = s0 - s3;
t0 = (x0 + x1) * cospi_16_64;
t1 = (x0 - x1) * cospi_16_64;
t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
out[0] = (tran_low_t)fdct_round_shift(t0);
out[4] = (tran_low_t)fdct_round_shift(t2);
out[8] = (tran_low_t)fdct_round_shift(t1);
out[12] = (tran_low_t)fdct_round_shift(t3);
// Stage 2
t0 = (s6 - s5) * cospi_16_64;
t1 = (s6 + s5) * cospi_16_64;
t2 = fdct_round_shift(t0);
t3 = fdct_round_shift(t1);
// Stage 3
x0 = s4 + t2;
x1 = s4 - t2;
x2 = s7 - t3;
x3 = s7 + t3;
// Stage 4
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
out[2] = (tran_low_t)fdct_round_shift(t0);
out[6] = (tran_low_t)fdct_round_shift(t2);
out[10] = (tran_low_t)fdct_round_shift(t1);
out[14] = (tran_low_t)fdct_round_shift(t3);
}
// Work on the next eight values; step1 -> odd_results
{
// step 2
temp1 = (step1[5] - step1[2]) * cospi_16_64;
temp2 = (step1[4] - step1[3]) * cospi_16_64;
step2[2] = fdct_round_shift(temp1);
step2[3] = fdct_round_shift(temp2);
temp1 = (step1[4] + step1[3]) * cospi_16_64;
temp2 = (step1[5] + step1[2]) * cospi_16_64;
step2[4] = fdct_round_shift(temp1);
step2[5] = fdct_round_shift(temp2);
// step 3
step3[0] = step1[0] + step2[3];
step3[1] = step1[1] + step2[2];
step3[2] = step1[1] - step2[2];
step3[3] = step1[0] - step2[3];
step3[4] = step1[7] - step2[4];
step3[5] = step1[6] - step2[5];
step3[6] = step1[6] + step2[5];
step3[7] = step1[7] + step2[4];
// step 4
temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
step2[1] = fdct_round_shift(temp1);
step2[2] = fdct_round_shift(temp2);
temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
step2[5] = fdct_round_shift(temp1);
step2[6] = fdct_round_shift(temp2);
// step 5
step1[0] = step3[0] + step2[1];
step1[1] = step3[0] - step2[1];
step1[2] = step3[3] + step2[2];
step1[3] = step3[3] - step2[2];
step1[4] = step3[4] - step2[5];
step1[5] = step3[4] + step2[5];
step1[6] = step3[7] - step2[6];
step1[7] = step3[7] + step2[6];
// step 6
temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
out[1] = (tran_low_t)fdct_round_shift(temp1);
out[9] = (tran_low_t)fdct_round_shift(temp2);
temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
out[5] = (tran_low_t)fdct_round_shift(temp1);
out[13] = (tran_low_t)fdct_round_shift(temp2);
temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
out[3] = (tran_low_t)fdct_round_shift(temp1);
out[11] = (tran_low_t)fdct_round_shift(temp2);
temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
out[7] = (tran_low_t)fdct_round_shift(temp1);
out[15] = (tran_low_t)fdct_round_shift(temp2);
}
// Do next column (which is a transposed row in second/horizontal pass)
in++;
in_pass0++;
out += 16;
}
// Setup in/out for next pass.
in = intermediate;
out = output;
}
}
void vp10_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) {
int r, c;
tran_low_t sum = 0;
for (r = 0; r < 16; ++r)
for (c = 0; c < 16; ++c)
sum += input[r * stride + c];
output[0] = sum >> 1;
output[1] = 0;
}
static INLINE tran_high_t dct_32_round(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
// TODO(debargha, peter.derivaz): Find new bounds for this assert,
// and make the bounds consts.
// assert(-131072 <= rv && rv <= 131071);
return rv;
}
static INLINE tran_high_t half_round_shift(tran_high_t input) {
tran_high_t rv = (input + 1 + (input < 0)) >> 2;
return rv;
}
void vp10_fdct32(const tran_high_t *input, tran_high_t *output, int round) {
tran_high_t step[32];
// Stage 1
step[0] = input[0] + input[(32 - 1)];
step[1] = input[1] + input[(32 - 2)];
step[2] = input[2] + input[(32 - 3)];
step[3] = input[3] + input[(32 - 4)];
step[4] = input[4] + input[(32 - 5)];
step[5] = input[5] + input[(32 - 6)];
step[6] = input[6] + input[(32 - 7)];
step[7] = input[7] + input[(32 - 8)];
step[8] = input[8] + input[(32 - 9)];
step[9] = input[9] + input[(32 - 10)];
step[10] = input[10] + input[(32 - 11)];
step[11] = input[11] + input[(32 - 12)];
step[12] = input[12] + input[(32 - 13)];
step[13] = input[13] + input[(32 - 14)];
step[14] = input[14] + input[(32 - 15)];
step[15] = input[15] + input[(32 - 16)];
step[16] = -input[16] + input[(32 - 17)];
step[17] = -input[17] + input[(32 - 18)];
step[18] = -input[18] + input[(32 - 19)];
step[19] = -input[19] + input[(32 - 20)];
step[20] = -input[20] + input[(32 - 21)];
step[21] = -input[21] + input[(32 - 22)];
step[22] = -input[22] + input[(32 - 23)];
step[23] = -input[23] + input[(32 - 24)];
step[24] = -input[24] + input[(32 - 25)];
step[25] = -input[25] + input[(32 - 26)];
step[26] = -input[26] + input[(32 - 27)];
step[27] = -input[27] + input[(32 - 28)];
step[28] = -input[28] + input[(32 - 29)];
step[29] = -input[29] + input[(32 - 30)];
step[30] = -input[30] + input[(32 - 31)];
step[31] = -input[31] + input[(32 - 32)];
// Stage 2
output[0] = step[0] + step[16 - 1];
output[1] = step[1] + step[16 - 2];
output[2] = step[2] + step[16 - 3];
output[3] = step[3] + step[16 - 4];
output[4] = step[4] + step[16 - 5];
output[5] = step[5] + step[16 - 6];
output[6] = step[6] + step[16 - 7];
output[7] = step[7] + step[16 - 8];
output[8] = -step[8] + step[16 - 9];
output[9] = -step[9] + step[16 - 10];
output[10] = -step[10] + step[16 - 11];
output[11] = -step[11] + step[16 - 12];
output[12] = -step[12] + step[16 - 13];
output[13] = -step[13] + step[16 - 14];
output[14] = -step[14] + step[16 - 15];
output[15] = -step[15] + step[16 - 16];
output[16] = step[16];
output[17] = step[17];
output[18] = step[18];
output[19] = step[19];
output[20] = dct_32_round((-step[20] + step[27]) * cospi_16_64);
output[21] = dct_32_round((-step[21] + step[26]) * cospi_16_64);
output[22] = dct_32_round((-step[22] + step[25]) * cospi_16_64);
output[23] = dct_32_round((-step[23] + step[24]) * cospi_16_64);
output[24] = dct_32_round((step[24] + step[23]) * cospi_16_64);
output[25] = dct_32_round((step[25] + step[22]) * cospi_16_64);
output[26] = dct_32_round((step[26] + step[21]) * cospi_16_64);
output[27] = dct_32_round((step[27] + step[20]) * cospi_16_64);
output[28] = step[28];
output[29] = step[29];
output[30] = step[30];
output[31] = step[31];
// dump the magnitude by 4, hence the intermediate values are within
// the range of 16 bits.
if (round) {
output[0] = half_round_shift(output[0]);
output[1] = half_round_shift(output[1]);
output[2] = half_round_shift(output[2]);
output[3] = half_round_shift(output[3]);
output[4] = half_round_shift(output[4]);
output[5] = half_round_shift(output[5]);
output[6] = half_round_shift(output[6]);
output[7] = half_round_shift(output[7]);
output[8] = half_round_shift(output[8]);
output[9] = half_round_shift(output[9]);
output[10] = half_round_shift(output[10]);
output[11] = half_round_shift(output[11]);
output[12] = half_round_shift(output[12]);
output[13] = half_round_shift(output[13]);
output[14] = half_round_shift(output[14]);
output[15] = half_round_shift(output[15]);
output[16] = half_round_shift(output[16]);
output[17] = half_round_shift(output[17]);
output[18] = half_round_shift(output[18]);
output[19] = half_round_shift(output[19]);
output[20] = half_round_shift(output[20]);
output[21] = half_round_shift(output[21]);
output[22] = half_round_shift(output[22]);
output[23] = half_round_shift(output[23]);
output[24] = half_round_shift(output[24]);
output[25] = half_round_shift(output[25]);
output[26] = half_round_shift(output[26]);
output[27] = half_round_shift(output[27]);
output[28] = half_round_shift(output[28]);
output[29] = half_round_shift(output[29]);
output[30] = half_round_shift(output[30]);
output[31] = half_round_shift(output[31]);
}
// Stage 3
step[0] = output[0] + output[(8 - 1)];
step[1] = output[1] + output[(8 - 2)];
step[2] = output[2] + output[(8 - 3)];
step[3] = output[3] + output[(8 - 4)];
step[4] = -output[4] + output[(8 - 5)];
step[5] = -output[5] + output[(8 - 6)];
step[6] = -output[6] + output[(8 - 7)];
step[7] = -output[7] + output[(8 - 8)];
step[8] = output[8];
step[9] = output[9];
step[10] = dct_32_round((-output[10] + output[13]) * cospi_16_64);
step[11] = dct_32_round((-output[11] + output[12]) * cospi_16_64);
step[12] = dct_32_round((output[12] + output[11]) * cospi_16_64);
step[13] = dct_32_round((output[13] + output[10]) * cospi_16_64);
step[14] = output[14];
step[15] = output[15];
step[16] = output[16] + output[23];
step[17] = output[17] + output[22];
step[18] = output[18] + output[21];
step[19] = output[19] + output[20];
step[20] = -output[20] + output[19];
step[21] = -output[21] + output[18];
step[22] = -output[22] + output[17];
step[23] = -output[23] + output[16];
step[24] = -output[24] + output[31];
step[25] = -output[25] + output[30];
step[26] = -output[26] + output[29];
step[27] = -output[27] + output[28];
step[28] = output[28] + output[27];
step[29] = output[29] + output[26];
step[30] = output[30] + output[25];
step[31] = output[31] + output[24];
// Stage 4
output[0] = step[0] + step[3];
output[1] = step[1] + step[2];
output[2] = -step[2] + step[1];
output[3] = -step[3] + step[0];
output[4] = step[4];
output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64);
output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64);
output[7] = step[7];
output[8] = step[8] + step[11];
output[9] = step[9] + step[10];
output[10] = -step[10] + step[9];
output[11] = -step[11] + step[8];
output[12] = -step[12] + step[15];
output[13] = -step[13] + step[14];
output[14] = step[14] + step[13];
output[15] = step[15] + step[12];
output[16] = step[16];
output[17] = step[17];
output[18] = dct_32_round(step[18] * -cospi_8_64 + step[29] * cospi_24_64);
output[19] = dct_32_round(step[19] * -cospi_8_64 + step[28] * cospi_24_64);
output[20] = dct_32_round(step[20] * -cospi_24_64 + step[27] * -cospi_8_64);
output[21] = dct_32_round(step[21] * -cospi_24_64 + step[26] * -cospi_8_64);
output[22] = step[22];
output[23] = step[23];
output[24] = step[24];
output[25] = step[25];
output[26] = dct_32_round(step[26] * cospi_24_64 + step[21] * -cospi_8_64);
output[27] = dct_32_round(step[27] * cospi_24_64 + step[20] * -cospi_8_64);
output[28] = dct_32_round(step[28] * cospi_8_64 + step[19] * cospi_24_64);
output[29] = dct_32_round(step[29] * cospi_8_64 + step[18] * cospi_24_64);
output[30] = step[30];
output[31] = step[31];
// Stage 5
step[0] = dct_32_round((output[0] + output[1]) * cospi_16_64);
step[1] = dct_32_round((-output[1] + output[0]) * cospi_16_64);
step[2] = dct_32_round(output[2] * cospi_24_64 + output[3] * cospi_8_64);
step[3] = dct_32_round(output[3] * cospi_24_64 - output[2] * cospi_8_64);
step[4] = output[4] + output[5];
step[5] = -output[5] + output[4];
step[6] = -output[6] + output[7];
step[7] = output[7] + output[6];
step[8] = output[8];
step[9] = dct_32_round(output[9] * -cospi_8_64 + output[14] * cospi_24_64);
step[10] = dct_32_round(output[10] * -cospi_24_64 + output[13] * -cospi_8_64);
step[11] = output[11];
step[12] = output[12];
step[13] = dct_32_round(output[13] * cospi_24_64 + output[10] * -cospi_8_64);
step[14] = dct_32_round(output[14] * cospi_8_64 + output[9] * cospi_24_64);
step[15] = output[15];
step[16] = output[16] + output[19];
step[17] = output[17] + output[18];
step[18] = -output[18] + output[17];
step[19] = -output[19] + output[16];
step[20] = -output[20] + output[23];
step[21] = -output[21] + output[22];
step[22] = output[22] + output[21];
step[23] = output[23] + output[20];
step[24] = output[24] + output[27];
step[25] = output[25] + output[26];
step[26] = -output[26] + output[25];
step[27] = -output[27] + output[24];
step[28] = -output[28] + output[31];
step[29] = -output[29] + output[30];
step[30] = output[30] + output[29];
step[31] = output[31] + output[28];
// Stage 6
output[0] = step[0];
output[1] = step[1];
output[2] = step[2];
output[3] = step[3];
output[4] = dct_32_round(step[4] * cospi_28_64 + step[7] * cospi_4_64);
output[5] = dct_32_round(step[5] * cospi_12_64 + step[6] * cospi_20_64);
output[6] = dct_32_round(step[6] * cospi_12_64 + step[5] * -cospi_20_64);
output[7] = dct_32_round(step[7] * cospi_28_64 + step[4] * -cospi_4_64);
output[8] = step[8] + step[9];
output[9] = -step[9] + step[8];
output[10] = -step[10] + step[11];
output[11] = step[11] + step[10];
output[12] = step[12] + step[13];
output[13] = -step[13] + step[12];
output[14] = -step[14] + step[15];
output[15] = step[15] + step[14];
output[16] = step[16];
output[17] = dct_32_round(step[17] * -cospi_4_64 + step[30] * cospi_28_64);
output[18] = dct_32_round(step[18] * -cospi_28_64 + step[29] * -cospi_4_64);
output[19] = step[19];
output[20] = step[20];
output[21] = dct_32_round(step[21] * -cospi_20_64 + step[26] * cospi_12_64);
output[22] = dct_32_round(step[22] * -cospi_12_64 + step[25] * -cospi_20_64);
output[23] = step[23];
output[24] = step[24];
output[25] = dct_32_round(step[25] * cospi_12_64 + step[22] * -cospi_20_64);
output[26] = dct_32_round(step[26] * cospi_20_64 + step[21] * cospi_12_64);
output[27] = step[27];
output[28] = step[28];
output[29] = dct_32_round(step[29] * cospi_28_64 + step[18] * -cospi_4_64);
output[30] = dct_32_round(step[30] * cospi_4_64 + step[17] * cospi_28_64);
output[31] = step[31];
// Stage 7
step[0] = output[0];
step[1] = output[1];
step[2] = output[2];
step[3] = output[3];
step[4] = output[4];
step[5] = output[5];
step[6] = output[6];
step[7] = output[7];
step[8] = dct_32_round(output[8] * cospi_30_64 + output[15] * cospi_2_64);
step[9] = dct_32_round(output[9] * cospi_14_64 + output[14] * cospi_18_64);
step[10] = dct_32_round(output[10] * cospi_22_64 + output[13] * cospi_10_64);
step[11] = dct_32_round(output[11] * cospi_6_64 + output[12] * cospi_26_64);
step[12] = dct_32_round(output[12] * cospi_6_64 + output[11] * -cospi_26_64);
step[13] = dct_32_round(output[13] * cospi_22_64 + output[10] * -cospi_10_64);
step[14] = dct_32_round(output[14] * cospi_14_64 + output[9] * -cospi_18_64);
step[15] = dct_32_round(output[15] * cospi_30_64 + output[8] * -cospi_2_64);
step[16] = output[16] + output[17];
step[17] = -output[17] + output[16];
step[18] = -output[18] + output[19];
step[19] = output[19] + output[18];
step[20] = output[20] + output[21];
step[21] = -output[21] + output[20];
step[22] = -output[22] + output[23];
step[23] = output[23] + output[22];
step[24] = output[24] + output[25];
step[25] = -output[25] + output[24];
step[26] = -output[26] + output[27];
step[27] = output[27] + output[26];
step[28] = output[28] + output[29];
step[29] = -output[29] + output[28];
step[30] = -output[30] + output[31];
step[31] = output[31] + output[30];
// Final stage --- outputs indices are bit-reversed.
output[0] = step[0];
output[16] = step[1];
output[8] = step[2];
output[24] = step[3];
output[4] = step[4];
output[20] = step[5];
output[12] = step[6];
output[28] = step[7];
output[2] = step[8];
output[18] = step[9];
output[10] = step[10];
output[26] = step[11];
output[6] = step[12];
output[22] = step[13];
output[14] = step[14];
output[30] = step[15];
output[1] = dct_32_round(step[16] * cospi_31_64 + step[31] * cospi_1_64);
output[17] = dct_32_round(step[17] * cospi_15_64 + step[30] * cospi_17_64);
output[9] = dct_32_round(step[18] * cospi_23_64 + step[29] * cospi_9_64);
output[25] = dct_32_round(step[19] * cospi_7_64 + step[28] * cospi_25_64);
output[5] = dct_32_round(step[20] * cospi_27_64 + step[27] * cospi_5_64);
output[21] = dct_32_round(step[21] * cospi_11_64 + step[26] * cospi_21_64);
output[13] = dct_32_round(step[22] * cospi_19_64 + step[25] * cospi_13_64);
output[29] = dct_32_round(step[23] * cospi_3_64 + step[24] * cospi_29_64);
output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64);
output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64);
output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64);
output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64);
output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64);
output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64);
output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64);
output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
}
void vp10_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
int i, j;
tran_high_t output[32 * 32];
// Columns
for (i = 0; i < 32; ++i) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = input[j * stride + i] * 4;
vp10_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
}
// Rows
for (i = 0; i < 32; ++i) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i * 32];
vp10_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
out[j + i * 32] =
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
}
}
// Note that although we use dct_32_round in dct32 computation flow,
// this 2d fdct32x32 for rate-distortion optimization loop is operating
// within 16 bits precision.
void vp10_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
int i, j;
tran_high_t output[32 * 32];
// Columns
for (i = 0; i < 32; ++i) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = input[j * stride + i] * 4;
vp10_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
// TODO(cd): see quality impact of only doing
// output[j * 32 + i] = (temp_out[j] + 1) >> 2;
// PS: also change code in vp10_dsp/x86/vp10_dct_sse2.c
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
}
// Rows
for (i = 0; i < 32; ++i) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i * 32];
vp10_fdct32(temp_in, temp_out, 1);
for (j = 0; j < 32; ++j)
out[j + i * 32] = (tran_low_t)temp_out[j];
}
}
void vp10_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) {
int r, c;
tran_low_t sum = 0;
for (r = 0; r < 32; ++r)
for (c = 0; c < 32; ++c)
sum += input[r * stride + c];
output[0] = sum >> 3;
output[1] = 0;
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output,
int stride) {
vp10_fdct4x4_c(input, output, stride);
}
void vp10_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
int stride) {
vp10_fdct8x8_c(input, final_output, stride);
}
void vp10_highbd_fdct8x8_1_c(const int16_t *input, tran_low_t *final_output,
int stride) {
vp10_fdct8x8_1_c(input, final_output, stride);
}
void vp10_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output,
int stride) {
vp10_fdct16x16_c(input, output, stride);
}
void vp10_highbd_fdct16x16_1_c(const int16_t *input, tran_low_t *output,
int stride) {
vp10_fdct16x16_1_c(input, output, stride);
}
void vp10_highbd_fdct32x32_c(const int16_t *input,
tran_low_t *out, int stride) {
vp10_fdct32x32_c(input, out, stride);
}
void vp10_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out,
int stride) {
vp10_fdct32x32_rd_c(input, out, stride);
}
void vp10_highbd_fdct32x32_1_c(const int16_t *input,
tran_low_t *out, int stride) {
vp10_fdct32x32_1_c(input, out, stride);
}
#endif // CONFIG_VP9_HIGHBITDEPTH

View File

@ -0,0 +1,18 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_COMMON_VP10_FWD_TXFM_H_
#define VP10_COMMON_VP10_FWD_TXFM_H_
#include "vpx_dsp/txfm_common.h"
#include "vpx_dsp/fwd_txfm.h"
void vp10_fdct32(const tran_high_t *input, tran_high_t *output, int round);
#endif // VP10_COMMON_VP10_FWD_TXFM_H_

2499
vp10/common/vp10_inv_txfm.c Normal file

File diff suppressed because it is too large Load Diff

122
vp10/common/vp10_inv_txfm.h Normal file
View File

@ -0,0 +1,122 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VPX_DSP_INV_TXFM_H_
#define VPX_DSP_INV_TXFM_H_
#include <assert.h>
#include "./vpx_config.h"
#include "vpx_dsp/txfm_common.h"
#include "vpx_ports/mem.h"
#ifdef __cplusplus
extern "C" {
#endif
static INLINE tran_low_t check_range(tran_high_t input) {
#if CONFIG_COEFFICIENT_RANGE_CHECKING
// For valid VP9 input streams, intermediate stage coefficients should always
// stay within the range of a signed 16 bit integer. Coefficients can go out
// of this range for invalid/corrupt VP9 streams. However, strictly checking
// this range for every intermediate coefficient can burdensome for a decoder,
// therefore the following assertion is only enabled when configured with
// --enable-coefficient-range-checking.
assert(INT16_MIN <= input);
assert(input <= INT16_MAX);
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
return (tran_low_t)input;
}
static INLINE tran_low_t dct_const_round_shift(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
return check_range(rv);
}
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE tran_low_t highbd_check_range(tran_high_t input,
int bd) {
#if CONFIG_COEFFICIENT_RANGE_CHECKING
// For valid highbitdepth VP9 streams, intermediate stage coefficients will
// stay within the ranges:
// - 8 bit: signed 16 bit integer
// - 10 bit: signed 18 bit integer
// - 12 bit: signed 20 bit integer
const int32_t int_max = (1 << (7 + bd)) - 1;
const int32_t int_min = -int_max - 1;
assert(int_min <= input);
assert(input <= int_max);
(void) int_min;
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
(void) bd;
return (tran_low_t)input;
}
static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input,
int bd) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
return highbd_check_range(rv, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_EMULATE_HARDWARE
// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
// non-normative method to handle overflows. A stream that causes
// overflows in the inverse transform is considered invalid in VP9,
// and a hardware implementer is free to choose any reasonable
// method to handle overflows. However to aid in hardware
// verification they can use a specific implementation of the
// WRAPLOW() macro below that is identical to their intended
// hardware implementation (and also use configure options to trigger
// the C-implementation of the transform).
//
// The particular WRAPLOW implementation below performs strict
// overflow wrapping to match common hardware implementations.
// bd of 8 uses trans_low with 16bits, need to remove 16bits
// bd of 10 uses trans_low with 18bits, need to remove 14bits
// bd of 12 uses trans_low with 20bits, need to remove 12bits
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
#else
#define WRAPLOW(x, bd) ((int32_t)(x))
#endif // CONFIG_EMULATE_HARDWARE
void vp10_idct4_c(const tran_low_t *input, tran_low_t *output);
void vp10_idct8_c(const tran_low_t *input, tran_low_t *output);
void vp10_idct16_c(const tran_low_t *input, tran_low_t *output);
void vp10_idct32_c(const tran_low_t *input, tran_low_t *output);
void vp10_iadst4_c(const tran_low_t *input, tran_low_t *output);
void vp10_iadst8_c(const tran_low_t *input, tran_low_t *output);
void vp10_iadst16_c(const tran_low_t *input, tran_low_t *output);
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);
void vp10_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);
void vp10_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);
void vp10_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);
void vp10_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);
void vp10_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);
static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
int bd) {
trans = WRAPLOW(trans, bd);
return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
}
#endif
static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
trans = WRAPLOW(trans, 8);
return clip_pixel(WRAPLOW(dest + trans, 8));
}
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VPX_DSP_INV_TXFM_H_

View File

@ -95,6 +95,57 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/vp10_iht16x16_256_add/;
add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct4x4 sse2/;
add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct4x4_1 sse2/;
add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct8x8 sse2/;
add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct8x8_1 sse2/;
add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct16x16 sse2/;
add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct16x16_1 sse2/;
add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct32x32 sse2/;
add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct32x32_rd sse2/;
add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct32x32_1 sse2/;
add_proto qw/void vp10_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_highbd_fdct4x4 sse2/;
add_proto qw/void vp10_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_highbd_fdct8x8 sse2/;
add_proto qw/void vp10_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_highbd_fdct8x8_1/;
add_proto qw/void vp10_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_highbd_fdct16x16 sse2/;
add_proto qw/void vp10_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_highbd_fdct16x16_1/;
add_proto qw/void vp10_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_highbd_fdct32x32 sse2/;
add_proto qw/void vp10_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_highbd_fdct32x32_rd sse2/;
add_proto qw/void vp10_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_highbd_fdct32x32_1/;
} else {
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
@ -106,6 +157,33 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/vp10_iht16x16_256_add/;
add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct4x4/;
add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct4x4_1/;
add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct8x8/;
add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct8x8_1/;
add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct16x16/;
add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct16x16_1/;
add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct32x32/;
add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct32x32_rd/;
add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct32x32_1/;
} else {
add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp10_iht4x4_16_add sse2 neon dspr2 msa/;
@ -115,6 +193,33 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/vp10_iht16x16_256_add sse2 dspr2 msa/;
add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct4x4 sse2/;
add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct4x4_1 sse2/;
add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct8x8 sse2/;
add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct8x8_1 sse2/;
add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct16x16 sse2/;
add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct16x16_1 sse2/;
add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct32x32 sse2/;
add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct32x32_rd sse2/;
add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fdct32x32_1 sse2/;
}
}
@ -289,6 +394,188 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc";
}
# Inverse transform
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct4x4_1_add/;
add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct4x4_16_add/;
add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct8x8_1_add/;
add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct8x8_64_add/;
add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct8x8_12_add/;
add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct16x16_1_add/;
add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct16x16_256_add/;
add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct16x16_10_add/;
add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct32x32_1024_add/;
add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct32x32_34_add/;
add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct32x32_1_add/;
add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_iwht4x4_1_add/;
add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_iwht4x4_16_add/;
add_proto qw/void vp10_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct4x4_1_add/;
add_proto qw/void vp10_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct8x8_1_add/;
add_proto qw/void vp10_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct16x16_1_add/;
add_proto qw/void vp10_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct32x32_1024_add/;
add_proto qw/void vp10_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct32x32_34_add/;
add_proto qw/void vp10_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct32x32_1_add/;
add_proto qw/void vp10_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_iwht4x4_1_add/;
add_proto qw/void vp10_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_iwht4x4_16_add/;
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
add_proto qw/void vp10_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct4x4_16_add/;
add_proto qw/void vp10_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct8x8_64_add/;
add_proto qw/void vp10_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct8x8_10_add/;
add_proto qw/void vp10_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct16x16_256_add/;
add_proto qw/void vp10_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct16x16_10_add/;
} else {
add_proto qw/void vp10_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct4x4_16_add sse2/;
add_proto qw/void vp10_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct8x8_64_add sse2/;
add_proto qw/void vp10_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct8x8_10_add sse2/;
add_proto qw/void vp10_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct16x16_256_add sse2/;
add_proto qw/void vp10_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp10_highbd_idct16x16_10_add sse2/;
} # CONFIG_EMULATE_HARDWARE
} else {
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct4x4_1_add/;
add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct4x4_16_add/;
add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct8x8_1_add/;
add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct8x8_64_add/;
add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct8x8_12_add/;
add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct16x16_1_add/;
add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct16x16_256_add/;
add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct16x16_10_add/;
add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct32x32_1024_add/;
add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct32x32_34_add/;
add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct32x32_1_add/;
add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_iwht4x4_1_add/;
add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_iwht4x4_16_add/;
} else {
add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct4x4_1_add sse2/;
add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct4x4_16_add sse2/;
add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct8x8_1_add sse2/;
add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct8x8_64_add sse2/;
add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct8x8_12_add sse2/;
add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct16x16_1_add sse2/;
add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct16x16_256_add sse2/;
add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct16x16_10_add sse2/;
add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct32x32_1024_add sse2/;
add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct32x32_34_add sse2/;
add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_idct32x32_1_add sse2/;
add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_iwht4x4_1_add/;
add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp10_iwht4x4_16_add/;
} # CONFIG_EMULATE_HARDWARE
} # CONFIG_VP9_HIGHBITDEPTH
#
# Motion search
#

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,271 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <emmintrin.h> // SSE2
#include "./vpx_config.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/x86/fwd_txfm_sse2.h"
void vp10_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
__m128i in0, in1;
__m128i tmp;
const __m128i zero = _mm_setzero_si128();
in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
in1 = _mm_unpacklo_epi64(in1, _mm_loadl_epi64((const __m128i *)
(input + 2 * stride)));
in0 = _mm_unpacklo_epi64(in0, _mm_loadl_epi64((const __m128i *)
(input + 3 * stride)));
tmp = _mm_add_epi16(in0, in1);
in0 = _mm_unpacklo_epi16(zero, tmp);
in1 = _mm_unpackhi_epi16(zero, tmp);
in0 = _mm_srai_epi32(in0, 16);
in1 = _mm_srai_epi32(in1, 16);
tmp = _mm_add_epi32(in0, in1);
in0 = _mm_unpacklo_epi32(tmp, zero);
in1 = _mm_unpackhi_epi32(tmp, zero);
tmp = _mm_add_epi32(in0, in1);
in0 = _mm_srli_si128(tmp, 8);
in1 = _mm_add_epi32(tmp, in0);
in0 = _mm_slli_epi32(in1, 1);
store_output(&in0, output);
}
void vp10_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
__m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
__m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
__m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
__m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
__m128i u0, u1, sum;
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
in0 = _mm_load_si128((const __m128i *)(input + 4 * stride));
in1 = _mm_load_si128((const __m128i *)(input + 5 * stride));
in2 = _mm_load_si128((const __m128i *)(input + 6 * stride));
in3 = _mm_load_si128((const __m128i *)(input + 7 * stride));
sum = _mm_add_epi16(u0, u1);
in0 = _mm_add_epi16(in0, in1);
in2 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, in0);
u0 = _mm_setzero_si128();
sum = _mm_add_epi16(sum, in2);
in0 = _mm_unpacklo_epi16(u0, sum);
in1 = _mm_unpackhi_epi16(u0, sum);
in0 = _mm_srai_epi32(in0, 16);
in1 = _mm_srai_epi32(in1, 16);
sum = _mm_add_epi32(in0, in1);
in0 = _mm_unpacklo_epi32(sum, u0);
in1 = _mm_unpackhi_epi32(sum, u0);
sum = _mm_add_epi32(in0, in1);
in0 = _mm_srli_si128(sum, 8);
in1 = _mm_add_epi32(sum, in0);
store_output(&in1, output);
}
void vp10_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
int stride) {
__m128i in0, in1, in2, in3;
__m128i u0, u1;
__m128i sum = _mm_setzero_si128();
int i;
for (i = 0; i < 2; ++i) {
input += 8 * i;
in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, u0);
in0 = _mm_load_si128((const __m128i *)(input + 4 * stride));
in1 = _mm_load_si128((const __m128i *)(input + 5 * stride));
in2 = _mm_load_si128((const __m128i *)(input + 6 * stride));
in3 = _mm_load_si128((const __m128i *)(input + 7 * stride));
sum = _mm_add_epi16(sum, u1);
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, u0);
in0 = _mm_load_si128((const __m128i *)(input + 8 * stride));
in1 = _mm_load_si128((const __m128i *)(input + 9 * stride));
in2 = _mm_load_si128((const __m128i *)(input + 10 * stride));
in3 = _mm_load_si128((const __m128i *)(input + 11 * stride));
sum = _mm_add_epi16(sum, u1);
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, u0);
in0 = _mm_load_si128((const __m128i *)(input + 12 * stride));
in1 = _mm_load_si128((const __m128i *)(input + 13 * stride));
in2 = _mm_load_si128((const __m128i *)(input + 14 * stride));
in3 = _mm_load_si128((const __m128i *)(input + 15 * stride));
sum = _mm_add_epi16(sum, u1);
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, u0);
sum = _mm_add_epi16(sum, u1);
}
u0 = _mm_setzero_si128();
in0 = _mm_unpacklo_epi16(u0, sum);
in1 = _mm_unpackhi_epi16(u0, sum);
in0 = _mm_srai_epi32(in0, 16);
in1 = _mm_srai_epi32(in1, 16);
sum = _mm_add_epi32(in0, in1);
in0 = _mm_unpacklo_epi32(sum, u0);
in1 = _mm_unpackhi_epi32(sum, u0);
sum = _mm_add_epi32(in0, in1);
in0 = _mm_srli_si128(sum, 8);
in1 = _mm_add_epi32(sum, in0);
in1 = _mm_srai_epi32(in1, 1);
store_output(&in1, output);
}
void vp10_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output,
int stride) {
__m128i in0, in1, in2, in3;
__m128i u0, u1;
__m128i sum = _mm_setzero_si128();
int i;
for (i = 0; i < 8; ++i) {
in0 = _mm_load_si128((const __m128i *)(input + 0));
in1 = _mm_load_si128((const __m128i *)(input + 8));
in2 = _mm_load_si128((const __m128i *)(input + 16));
in3 = _mm_load_si128((const __m128i *)(input + 24));
input += stride;
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, u0);
in0 = _mm_load_si128((const __m128i *)(input + 0));
in1 = _mm_load_si128((const __m128i *)(input + 8));
in2 = _mm_load_si128((const __m128i *)(input + 16));
in3 = _mm_load_si128((const __m128i *)(input + 24));
input += stride;
sum = _mm_add_epi16(sum, u1);
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, u0);
in0 = _mm_load_si128((const __m128i *)(input + 0));
in1 = _mm_load_si128((const __m128i *)(input + 8));
in2 = _mm_load_si128((const __m128i *)(input + 16));
in3 = _mm_load_si128((const __m128i *)(input + 24));
input += stride;
sum = _mm_add_epi16(sum, u1);
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, u0);
in0 = _mm_load_si128((const __m128i *)(input + 0));
in1 = _mm_load_si128((const __m128i *)(input + 8));
in2 = _mm_load_si128((const __m128i *)(input + 16));
in3 = _mm_load_si128((const __m128i *)(input + 24));
input += stride;
sum = _mm_add_epi16(sum, u1);
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, u0);
sum = _mm_add_epi16(sum, u1);
}
u0 = _mm_setzero_si128();
in0 = _mm_unpacklo_epi16(u0, sum);
in1 = _mm_unpackhi_epi16(u0, sum);
in0 = _mm_srai_epi32(in0, 16);
in1 = _mm_srai_epi32(in1, 16);
sum = _mm_add_epi32(in0, in1);
in0 = _mm_unpacklo_epi32(sum, u0);
in1 = _mm_unpackhi_epi32(sum, u0);
sum = _mm_add_epi32(in0, in1);
in0 = _mm_srli_si128(sum, 8);
in1 = _mm_add_epi32(sum, in0);
in1 = _mm_srai_epi32(in1, 3);
store_output(&in1, output);
}
#define DCT_HIGH_BIT_DEPTH 0
#define FDCT4x4_2D vp10_fdct4x4_sse2
#define FDCT8x8_2D vp10_fdct8x8_sse2
#define FDCT16x16_2D vp10_fdct16x16_sse2
#include "vp10/common/x86/vp10_fwd_txfm_impl_sse2.h"
#undef FDCT4x4_2D
#undef FDCT8x8_2D
#undef FDCT16x16_2D
#define FDCT32x32_2D vp10_fdct32x32_rd_sse2
#define FDCT32x32_HIGH_PRECISION 0
#include "vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h"
#undef FDCT32x32_2D
#undef FDCT32x32_HIGH_PRECISION
#define FDCT32x32_2D vp10_fdct32x32_sse2
#define FDCT32x32_HIGH_PRECISION 1
#include "vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h" // NOLINT
#undef FDCT32x32_2D
#undef FDCT32x32_HIGH_PRECISION
#undef DCT_HIGH_BIT_DEPTH
#if CONFIG_VP9_HIGHBITDEPTH
#define DCT_HIGH_BIT_DEPTH 1
#define FDCT4x4_2D vp10_highbd_fdct4x4_sse2
#define FDCT8x8_2D vp10_highbd_fdct8x8_sse2
#define FDCT16x16_2D vp10_highbd_fdct16x16_sse2
#include "vp10/common/x86/vp10_fwd_txfm_impl_sse2.h" // NOLINT
#undef FDCT4x4_2D
#undef FDCT8x8_2D
#undef FDCT16x16_2D
#define FDCT32x32_2D vp10_highbd_fdct32x32_rd_sse2
#define FDCT32x32_HIGH_PRECISION 0
#include "vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h" // NOLINT
#undef FDCT32x32_2D
#undef FDCT32x32_HIGH_PRECISION
#define FDCT32x32_2D vp10_highbd_fdct32x32_sse2
#define FDCT32x32_HIGH_PRECISION 1
#include "vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h" // NOLINT
#undef FDCT32x32_2D
#undef FDCT32x32_HIGH_PRECISION
#undef DCT_HIGH_BIT_DEPTH
#endif // CONFIG_VP9_HIGHBITDEPTH

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,184 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VPX_DSP_X86_INV_TXFM_SSE2_H_
#define VPX_DSP_X86_INV_TXFM_SSE2_H_
#include <emmintrin.h> // SSE2
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#include "vp10/common/vp10_inv_txfm.h"
// perform 8x8 transpose
static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]);
const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]);
const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]);
const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]);
const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5);
const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5);
const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3);
const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3);
const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1);
res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1);
res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3);
res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3);
res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5);
res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5);
res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7);
res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
}
#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \
{ \
const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
\
in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \
in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \
}
static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) {
const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4);
out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4);
out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6);
out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6);
}
static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
__m128i tbuf[8];
array_transpose_8x8(res0, res0);
array_transpose_8x8(res1, tbuf);
array_transpose_8x8(res0 + 8, res1);
array_transpose_8x8(res1 + 8, res1 + 8);
res0[8] = tbuf[0];
res0[9] = tbuf[1];
res0[10] = tbuf[2];
res0[11] = tbuf[3];
res0[12] = tbuf[4];
res0[13] = tbuf[5];
res0[14] = tbuf[6];
res0[15] = tbuf[7];
}
static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16));
in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16));
in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16));
in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16));
in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16));
in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16));
in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16));
in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16));
in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16));
in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16));
in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16));
in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16));
in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16));
in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16));
in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16));
in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16));
}
#define RECON_AND_STORE(dest, in_x) \
{ \
__m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \
d0 = _mm_unpacklo_epi8(d0, zero); \
d0 = _mm_add_epi16(in_x, d0); \
d0 = _mm_packus_epi16(d0, d0); \
_mm_storel_epi64((__m128i *)(dest), d0); \
}
static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) {
const __m128i final_rounding = _mm_set1_epi16(1<<5);
const __m128i zero = _mm_setzero_si128();
// Final rounding and shift
in[0] = _mm_adds_epi16(in[0], final_rounding);
in[1] = _mm_adds_epi16(in[1], final_rounding);
in[2] = _mm_adds_epi16(in[2], final_rounding);
in[3] = _mm_adds_epi16(in[3], final_rounding);
in[4] = _mm_adds_epi16(in[4], final_rounding);
in[5] = _mm_adds_epi16(in[5], final_rounding);
in[6] = _mm_adds_epi16(in[6], final_rounding);
in[7] = _mm_adds_epi16(in[7], final_rounding);
in[8] = _mm_adds_epi16(in[8], final_rounding);
in[9] = _mm_adds_epi16(in[9], final_rounding);
in[10] = _mm_adds_epi16(in[10], final_rounding);
in[11] = _mm_adds_epi16(in[11], final_rounding);
in[12] = _mm_adds_epi16(in[12], final_rounding);
in[13] = _mm_adds_epi16(in[13], final_rounding);
in[14] = _mm_adds_epi16(in[14], final_rounding);
in[15] = _mm_adds_epi16(in[15], final_rounding);
in[0] = _mm_srai_epi16(in[0], 6);
in[1] = _mm_srai_epi16(in[1], 6);
in[2] = _mm_srai_epi16(in[2], 6);
in[3] = _mm_srai_epi16(in[3], 6);
in[4] = _mm_srai_epi16(in[4], 6);
in[5] = _mm_srai_epi16(in[5], 6);
in[6] = _mm_srai_epi16(in[6], 6);
in[7] = _mm_srai_epi16(in[7], 6);
in[8] = _mm_srai_epi16(in[8], 6);
in[9] = _mm_srai_epi16(in[9], 6);
in[10] = _mm_srai_epi16(in[10], 6);
in[11] = _mm_srai_epi16(in[11], 6);
in[12] = _mm_srai_epi16(in[12], 6);
in[13] = _mm_srai_epi16(in[13], 6);
in[14] = _mm_srai_epi16(in[14], 6);
in[15] = _mm_srai_epi16(in[15], 6);
RECON_AND_STORE(dest + 0 * stride, in[0]);
RECON_AND_STORE(dest + 1 * stride, in[1]);
RECON_AND_STORE(dest + 2 * stride, in[2]);
RECON_AND_STORE(dest + 3 * stride, in[3]);
RECON_AND_STORE(dest + 4 * stride, in[4]);
RECON_AND_STORE(dest + 5 * stride, in[5]);
RECON_AND_STORE(dest + 6 * stride, in[6]);
RECON_AND_STORE(dest + 7 * stride, in[7]);
RECON_AND_STORE(dest + 8 * stride, in[8]);
RECON_AND_STORE(dest + 9 * stride, in[9]);
RECON_AND_STORE(dest + 10 * stride, in[10]);
RECON_AND_STORE(dest + 11 * stride, in[11]);
RECON_AND_STORE(dest + 12 * stride, in[12]);
RECON_AND_STORE(dest + 13 * stride, in[13]);
RECON_AND_STORE(dest + 14 * stride, in[14]);
RECON_AND_STORE(dest + 15 * stride, in[15]);
}
void idct4_sse2(__m128i *in);
void idct8_sse2(__m128i *in);
void idct16_sse2(__m128i *in0, __m128i *in1);
void iadst4_sse2(__m128i *in);
void iadst8_sse2(__m128i *in);
void iadst16_sse2(__m128i *in0, __m128i *in1);
#endif // VPX_DSP_X86_INV_TXFM_SSE2_H_

View File

@ -17,6 +17,7 @@
#include "vpx_dsp/bitreader_buffer.h"
#include "vpx_dsp/bitreader.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/mem_ops.h"
@ -80,12 +81,18 @@ static int decode_unsigned_max(struct vpx_read_bit_buffer *rb, int max) {
return data > max ? max : data;
}
#if CONFIG_MISC_FIXES
static TX_MODE read_tx_mode(struct vpx_read_bit_buffer *rb) {
return vpx_rb_read_bit(rb) ? TX_MODE_SELECT : vpx_rb_read_literal(rb, 2);
}
#else
static TX_MODE read_tx_mode(vpx_reader *r) {
TX_MODE tx_mode = vpx_read_literal(r, 2);
if (tx_mode == ALLOW_32X32)
tx_mode += vpx_read_bit(r);
return tx_mode;
}
#endif
static void read_tx_mode_probs(struct tx_probs *tx_probs, vpx_reader *r) {
int i, j;
@ -526,6 +533,7 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd,
struct buf_2d *dst_buf, const MV* mv,
RefCntBuffer *ref_frame_buf,
int is_scaled, int ref) {
VP10_COMMON *const cm = &pbi->common;
struct macroblockd_plane *const pd = &xd->plane[plane];
uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
MV32 scaled_mv;
@ -622,9 +630,9 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd,
// Wait until reference block is ready. Pad 7 more pixels as last 7
// pixels of each superblock row can be changed by next superblock row.
if (pbi->frame_parallel_decode)
if (cm->frame_parallel_decode)
vp10_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
// Skip border extension if block is inside the frame.
if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 ||
@ -649,10 +657,10 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd,
} else {
// Wait until reference block is ready. Pad 7 more pixels as last 7
// pixels of each superblock row can be changed by next superblock row.
if (pbi->frame_parallel_decode) {
if (cm->frame_parallel_decode) {
const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
vp10_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
}
}
#if CONFIG_VP9_HIGHBITDEPTH
@ -699,12 +707,19 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi,
const int is_scaled = vp10_is_scaled(sf);
if (sb_type < BLOCK_8X8) {
int i = 0, x, y;
const PARTITION_TYPE bp = BLOCK_8X8 - sb_type;
const int have_vsplit = bp != PARTITION_HORZ;
const int have_hsplit = bp != PARTITION_VERT;
const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
const int pw = 8 >> (have_vsplit | pd->subsampling_x);
const int ph = 8 >> (have_hsplit | pd->subsampling_y);
int x, y;
for (y = 0; y < num_4x4_h; ++y) {
for (x = 0; x < num_4x4_w; ++x) {
const MV mv = average_split_mvs(pd, mi, ref, i++);
const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4,
4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel,
4 * x, 4 * y, pw, ph, mi_x, mi_y, kernel,
sf, pre_buf, dst_buf, &mv,
ref_frame_buf, is_scaled, ref);
}
@ -723,8 +738,8 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi,
static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi,
int n4_wl, int n4_hl) {
// get minimum log2 num4x4s dimension
const int x = MIN(n4_wl, n4_hl);
return MIN(mbmi->tx_size, x);
const int x = VPXMIN(n4_wl, n4_hl);
return VPXMIN(mbmi->tx_size, x);
}
static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) {
@ -785,8 +800,8 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
const int less8x8 = bsize < BLOCK_8X8;
const int bw = 1 << (bwl - 1);
const int bh = 1 << (bhl - 1);
const int x_mis = MIN(bw, cm->mi_cols - mi_col);
const int y_mis = MIN(bh, cm->mi_rows - mi_row);
const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col,
bw, bh, x_mis, y_mis, bwl, bhl);
@ -856,7 +871,11 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
}
if (!less8x8 && eobtotal == 0)
#if CONFIG_MISC_FIXES
mbmi->has_no_coeffs = 1; // skip loopfilter
#else
mbmi->skip = 1; // skip loopfilter
#endif
}
}
@ -1011,8 +1030,9 @@ static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode,
read_coef_probs_common(fc->coef_probs[tx_size], r);
}
static void setup_segmentation(struct segmentation *seg,
static void setup_segmentation(VP10_COMMON *const cm,
struct vpx_read_bit_buffer *rb) {
struct segmentation *const seg = &cm->seg;
int i, j;
seg->update_map = 0;
@ -1023,13 +1043,21 @@ static void setup_segmentation(struct segmentation *seg,
return;
// Segmentation map update
seg->update_map = vpx_rb_read_bit(rb);
if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
seg->update_map = 1;
} else {
seg->update_map = vpx_rb_read_bit(rb);
}
if (seg->update_map) {
for (i = 0; i < SEG_TREE_PROBS; i++)
seg->tree_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8)
: MAX_PROB;
seg->temporal_update = vpx_rb_read_bit(rb);
if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
seg->temporal_update = 0;
} else {
seg->temporal_update = vpx_rb_read_bit(rb);
}
if (seg->temporal_update) {
for (i = 0; i < PREDICTION_PROBS; i++)
seg->pred_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8)
@ -1080,17 +1108,17 @@ static void setup_loopfilter(struct loopfilter *lf,
for (i = 0; i < MAX_REF_FRAMES; i++)
if (vpx_rb_read_bit(rb))
lf->ref_deltas[i] = vpx_rb_read_signed_literal(rb, 6);
lf->ref_deltas[i] = vpx_rb_read_inv_signed_literal(rb, 6);
for (i = 0; i < MAX_MODE_LF_DELTAS; i++)
if (vpx_rb_read_bit(rb))
lf->mode_deltas[i] = vpx_rb_read_signed_literal(rb, 6);
lf->mode_deltas[i] = vpx_rb_read_inv_signed_literal(rb, 6);
}
}
}
static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) {
return vpx_rb_read_bit(rb) ? vpx_rb_read_signed_literal(rb, 4) : 0;
return vpx_rb_read_bit(rb) ? vpx_rb_read_inv_signed_literal(rb, 4) : 0;
}
static void setup_quantization(VP10_COMMON *const cm, MACROBLOCKD *const xd,
@ -1138,12 +1166,7 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) {
}
static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) {
const INTERP_FILTER literal_to_filter[] = { EIGHTTAP_SMOOTH,
EIGHTTAP,
EIGHTTAP_SHARP,
BILINEAR };
return vpx_rb_read_bit(rb) ? SWITCHABLE
: literal_to_filter[vpx_rb_read_literal(rb, 2)];
return vpx_rb_read_bit(rb) ? SWITCHABLE : vpx_rb_read_literal(rb, 2);
}
static void setup_display_size(VP10_COMMON *cm,
@ -1222,6 +1245,7 @@ static void setup_frame_size(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y;
pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space;
pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range;
}
static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth,
@ -1303,6 +1327,7 @@ static void setup_frame_size_with_refs(VP10_COMMON *cm,
pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y;
pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space;
pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range;
}
static void setup_tile_info(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
@ -1448,8 +1473,9 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi,
tile_data->cm = cm;
tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
NULL : &cm->counts;
tile_data->xd.counts =
cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD ?
&cm->counts : NULL;
vp10_zero(tile_data->dqcoeff);
vp10_tile_init(&tile_data->xd.tile, tile_data->cm, tile_row, tile_col);
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
@ -1504,7 +1530,7 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi,
// After loopfiltering, the last 7 row pixels in each superblock row may
// still be changed by the longest loopfilter of the next superblock
// row.
if (pbi->frame_parallel_decode)
if (cm->frame_parallel_decode)
vp10_frameworker_broadcast(pbi->cur_buf,
mi_row << MI_BLOCK_SIZE_LOG2);
}
@ -1522,7 +1548,7 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi,
// Get last tile data.
tile_data = pbi->tile_data + tile_cols * tile_rows - 1;
if (pbi->frame_parallel_decode)
if (cm->frame_parallel_decode)
vp10_frameworker_broadcast(pbi->cur_buf, INT_MAX);
return vpx_reader_find_end(&tile_data->bit_reader);
}
@ -1570,7 +1596,7 @@ static const uint8_t *decode_tiles_mt(VP10Decoder *pbi,
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
const int num_workers = MIN(pbi->max_threads & ~1, tile_cols);
const int num_workers = VPXMIN(pbi->max_threads & ~1, tile_cols);
TileBuffer tile_buffers[1][1 << 6];
int n;
int final_worker = -1;
@ -1637,7 +1663,7 @@ static const uint8_t *decode_tiles_mt(VP10Decoder *pbi,
int group_start = 0;
while (group_start < tile_cols) {
const TileBuffer largest = tile_buffers[0][group_start];
const int group_end = MIN(group_start + num_workers, tile_cols) - 1;
const int group_end = VPXMIN(group_start + num_workers, tile_cols) - 1;
memmove(tile_buffers[0] + group_start, tile_buffers[0] + group_start + 1,
(group_end - group_start) * sizeof(tile_buffers[0][0]));
tile_buffers[0][group_end] = largest;
@ -1646,7 +1672,7 @@ static const uint8_t *decode_tiles_mt(VP10Decoder *pbi,
}
// Initialize thread frame counts.
if (!cm->frame_parallel_decoding_mode) {
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
int i;
for (i = 0; i < num_workers; ++i) {
@ -1668,8 +1694,9 @@ static const uint8_t *decode_tiles_mt(VP10Decoder *pbi,
tile_data->pbi = pbi;
tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
0 : &tile_data->counts;
tile_data->xd.counts =
cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD ?
&tile_data->counts : NULL;
vp10_zero(tile_data->dqcoeff);
vp10_tile_init(tile, cm, 0, buf->col);
vp10_tile_init(&tile_data->xd.tile, cm, 0, buf->col);
@ -1708,7 +1735,8 @@ static const uint8_t *decode_tiles_mt(VP10Decoder *pbi,
}
// Accumulate thread frame counts.
if (n >= tile_cols && !cm->frame_parallel_decoding_mode) {
if (n >= tile_cols &&
cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
for (i = 0; i < num_workers; ++i) {
TileWorkerData *const tile_data =
(TileWorkerData*)pbi->tile_workers[i].data1;
@ -1740,7 +1768,8 @@ static void read_bitdepth_colorspace_sampling(
}
cm->color_space = vpx_rb_read_literal(rb, 3);
if (cm->color_space != VPX_CS_SRGB) {
vpx_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range
// [16,235] (including xvycc) vs [0,255] range
cm->color_range = vpx_rb_read_bit(rb);
if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
cm->subsampling_x = vpx_rb_read_bit(rb);
cm->subsampling_y = vpx_rb_read_bit(rb);
@ -1771,6 +1800,9 @@ static void read_bitdepth_colorspace_sampling(
static size_t read_uncompressed_header(VP10Decoder *pbi,
struct vpx_read_bit_buffer *rb) {
VP10_COMMON *const cm = &pbi->common;
#if CONFIG_MISC_FIXES
MACROBLOCKD *const xd = &pbi->mb;
#endif
BufferPool *const pool = cm->buffer_pool;
RefCntBuffer *const frame_bufs = pool->frame_bufs;
int i, mask, ref_index = 0;
@ -1812,7 +1844,7 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
cm->lf.filter_level = 0;
cm->show_frame = 1;
if (pbi->frame_parallel_decode) {
if (cm->frame_parallel_decode) {
for (i = 0; i < REF_FRAMES; ++i)
cm->next_ref_frame_map[i] = cm->ref_frame_map[i];
}
@ -1844,8 +1876,33 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
} else {
cm->intra_only = cm->show_frame ? 0 : vpx_rb_read_bit(rb);
cm->reset_frame_context = cm->error_resilient_mode ?
0 : vpx_rb_read_literal(rb, 2);
if (cm->error_resilient_mode) {
cm->reset_frame_context = RESET_FRAME_CONTEXT_ALL;
} else {
#if CONFIG_MISC_FIXES
if (cm->intra_only) {
cm->reset_frame_context =
vpx_rb_read_bit(rb) ? RESET_FRAME_CONTEXT_ALL
: RESET_FRAME_CONTEXT_CURRENT;
} else {
cm->reset_frame_context =
vpx_rb_read_bit(rb) ? RESET_FRAME_CONTEXT_CURRENT
: RESET_FRAME_CONTEXT_NONE;
if (cm->reset_frame_context == RESET_FRAME_CONTEXT_CURRENT)
cm->reset_frame_context =
vpx_rb_read_bit(rb) ? RESET_FRAME_CONTEXT_ALL
: RESET_FRAME_CONTEXT_CURRENT;
}
#else
static const RESET_FRAME_CONTEXT_MODE reset_frame_context_conv_tbl[4] = {
RESET_FRAME_CONTEXT_NONE, RESET_FRAME_CONTEXT_NONE,
RESET_FRAME_CONTEXT_CURRENT, RESET_FRAME_CONTEXT_ALL
};
cm->reset_frame_context =
reset_frame_context_conv_tbl[vpx_rb_read_literal(rb, 2)];
#endif
}
if (cm->intra_only) {
if (!vp10_read_sync_code(rb))
@ -1859,6 +1916,7 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
// specifies that the default color format should be YUV 4:2:0 in this
// case (normative).
cm->color_space = VPX_CS_BT_601;
cm->color_range = 0;
cm->subsampling_y = cm->subsampling_x = 1;
cm->bit_depth = VPX_BITS_8;
#if CONFIG_VP9_HIGHBITDEPTH
@ -1909,6 +1967,7 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
get_frame_new_buffer(cm)->bit_depth = cm->bit_depth;
#endif
get_frame_new_buffer(cm)->color_space = cm->color_space;
get_frame_new_buffer(cm)->color_range = cm->color_range;
if (pbi->need_resync) {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
@ -1917,11 +1976,20 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
}
if (!cm->error_resilient_mode) {
cm->refresh_frame_context = vpx_rb_read_bit(rb);
cm->frame_parallel_decoding_mode = vpx_rb_read_bit(rb);
cm->refresh_frame_context =
vpx_rb_read_bit(rb) ? REFRESH_FRAME_CONTEXT_FORWARD
: REFRESH_FRAME_CONTEXT_OFF;
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_FORWARD) {
cm->refresh_frame_context =
vpx_rb_read_bit(rb) ? REFRESH_FRAME_CONTEXT_FORWARD
: REFRESH_FRAME_CONTEXT_BACKWARD;
#if !CONFIG_MISC_FIXES
} else {
vpx_rb_read_bit(rb); // parallel decoding mode flag
#endif
}
} else {
cm->refresh_frame_context = 0;
cm->frame_parallel_decoding_mode = 1;
cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_OFF;
}
// This flag will be overridden by the call to vp10_setup_past_independence
@ -1957,8 +2025,11 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
setup_loopfilter(&cm->lf, rb);
setup_quantization(cm, &pbi->mb, rb);
setup_segmentation(&cm->seg, rb);
setup_segmentation(cm, rb);
setup_segmentation_dequant(cm);
#if CONFIG_MISC_FIXES
cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(rb);
#endif
setup_tile_info(cm, rb);
sz = vpx_rb_read_literal(rb, 16);
@ -1984,7 +2055,9 @@ static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data,
size_t partition_size) {
VP10_COMMON *const cm = &pbi->common;
#if !CONFIG_MISC_FIXES
MACROBLOCKD *const xd = &pbi->mb;
#endif
FRAME_CONTEXT *const fc = cm->fc;
vpx_reader r;
int k;
@ -1994,7 +2067,9 @@ static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data,
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder 0");
#if !CONFIG_MISC_FIXES
cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r);
#endif
if (cm->tx_mode == TX_MODE_SELECT)
read_tx_mode_probs(&fc->tx_probs, &r);
read_coef_probs(fc, cm->tx_mode, &r);
@ -2044,7 +2119,8 @@ static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data,
static void debug_check_frame_counts(const VP10_COMMON *const cm) {
FRAME_COUNTS zero_counts;
vp10_zero(zero_counts);
assert(cm->frame_parallel_decoding_mode || cm->error_resilient_mode);
assert(cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_BACKWARD ||
cm->error_resilient_mode);
assert(!memcmp(cm->counts.y_mode, zero_counts.y_mode,
sizeof(cm->counts.y_mode)));
assert(!memcmp(cm->counts.uv_mode, zero_counts.uv_mode,
@ -2087,7 +2163,7 @@ static struct vpx_read_bit_buffer *init_read_bit_buffer(
rb->error_handler = error_handler;
rb->error_handler_data = &pbi->common;
if (pbi->decrypt_cb) {
const int n = (int)MIN(MAX_VP9_HEADER_SIZE, data_end - data);
const int n = (int)VPXMIN(MAX_VP9_HEADER_SIZE, data_end - data);
pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n);
rb->bit_buffer = clear_data;
rb->bit_buffer_end = clear_data + n;
@ -2174,10 +2250,11 @@ void vp10_decode_frame(VP10Decoder *pbi,
// If encoded in frame parallel mode, frame context is ready after decoding
// the frame header.
if (pbi->frame_parallel_decode && cm->frame_parallel_decoding_mode) {
if (cm->frame_parallel_decode &&
cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_BACKWARD) {
VPxWorker *const worker = pbi->frame_worker_owner;
FrameWorkerData *const frame_worker_data = worker->data1;
if (cm->refresh_frame_context) {
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_FORWARD) {
context_updated = 1;
cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
}
@ -2211,7 +2288,7 @@ void vp10_decode_frame(VP10Decoder *pbi,
}
if (!xd->corrupted) {
if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
vp10_adapt_coef_probs(cm);
if (!frame_is_intra_only(cm)) {
@ -2227,6 +2304,7 @@ void vp10_decode_frame(VP10Decoder *pbi,
}
// Non frame parallel update frame context here.
if (cm->refresh_frame_context && !context_updated)
if (cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF &&
!context_updated)
cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
}

View File

@ -22,6 +22,8 @@
#include "vp10/decoder/decodemv.h"
#include "vp10/decoder/decodeframe.h"
#include "vpx_dsp/vpx_dsp_common.h"
static PREDICTION_MODE read_intra_mode(vpx_reader *r, const vpx_prob *p) {
return (PREDICTION_MODE)vpx_read_tree(r, vp10_intra_mode_tree, p);
}
@ -87,7 +89,7 @@ static TX_SIZE read_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd,
if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8)
return read_selected_tx_size(cm, xd, max_tx_size, r);
else
return MIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]);
return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]);
}
static int dec_get_segment_id(const VP10_COMMON *cm, const uint8_t *segment_ids,
@ -96,8 +98,8 @@ static int dec_get_segment_id(const VP10_COMMON *cm, const uint8_t *segment_ids,
for (y = 0; y < y_mis; y++)
for (x = 0; x < x_mis; x++)
segment_id = MIN(segment_id,
segment_ids[mi_offset + y * cm->mi_cols + x]);
segment_id =
VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]);
assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
return segment_id;
@ -114,6 +116,22 @@ static void set_segment_id(VP10_COMMON *cm, int mi_offset,
cm->current_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id;
}
static int read_intra_segment_id(VP10_COMMON *const cm, int mi_offset,
int x_mis, int y_mis,
vpx_reader *r) {
struct segmentation *const seg = &cm->seg;
int segment_id;
if (!seg->enabled)
return 0; // Default for disabled segmentation
assert(seg->update_map && !seg->temporal_update);
segment_id = read_segment_id(r, seg);
set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
return segment_id;
}
static void copy_segment_id(const VP10_COMMON *cm,
const uint8_t *last_segment_ids,
uint8_t *current_segment_ids,
@ -126,26 +144,6 @@ static void copy_segment_id(const VP10_COMMON *cm,
last_segment_ids[mi_offset + y * cm->mi_cols + x] : 0;
}
static int read_intra_segment_id(VP10_COMMON *const cm, int mi_offset,
int x_mis, int y_mis,
vpx_reader *r) {
struct segmentation *const seg = &cm->seg;
int segment_id;
if (!seg->enabled)
return 0; // Default for disabled segmentation
if (!seg->update_map) {
copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map,
mi_offset, x_mis, y_mis);
return 0;
}
segment_id = read_segment_id(r, seg);
set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
return segment_id;
}
static int read_inter_segment_id(VP10_COMMON *const cm, MACROBLOCKD *const xd,
int mi_row, int mi_col, vpx_reader *r) {
struct segmentation *const seg = &cm->seg;
@ -156,8 +154,8 @@ static int read_inter_segment_id(VP10_COMMON *const cm, MACROBLOCKD *const xd,
const int bh = xd->plane[0].n4_h >> 1;
// TODO(slavarnway): move x_mis, y_mis into xd ?????
const int x_mis = MIN(cm->mi_cols - mi_col, bw);
const int y_mis = MIN(cm->mi_rows - mi_row, bh);
const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw);
const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh);
if (!seg->enabled)
return 0; // Default for disabled segmentation
@ -212,8 +210,8 @@ static void read_intra_frame_mode_info(VP10_COMMON *const cm,
const int bh = xd->plane[0].n4_h >> 1;
// TODO(slavarnway): move x_mis, y_mis into xd ?????
const int x_mis = MIN(cm->mi_cols - mi_col, bw);
const int y_mis = MIN(cm->mi_rows - mi_row, bh);
const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw);
const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh);
mbmi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r);
mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
@ -296,7 +294,7 @@ static INLINE void read_mv(vpx_reader *r, MV *mv, const MV *ref,
if (mv_joint_horizontal(joint_type))
diff.col = read_mv_component(r, &ctx->comps[1], use_hp);
vp10_inc_mv(&diff, counts);
vp10_inc_mv(&diff, counts, use_hp);
mv->row = ref->row + diff.row;
mv->col = ref->col + diff.col;
@ -604,11 +602,12 @@ static void read_inter_frame_mode_info(VP10Decoder *const pbi,
mbmi->sb_type >= BLOCK_8X8 &&
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
!mbmi->skip) {
FRAME_COUNTS *counts = xd->counts;
mbmi->ext_txfrm = vpx_read_tree(r,
vp10_ext_tx_tree,
cm->fc->ext_tx_prob[mbmi->tx_size]);
if (!cm->frame_parallel_decoding_mode)
++cm->counts.ext_tx[mbmi->tx_size][mbmi->ext_txfrm];
if (counts)
++counts->ext_tx[mbmi->tx_size][mbmi->ext_txfrm];
} else {
mbmi->ext_txfrm = NORM;
}

View File

@ -258,7 +258,7 @@ static void swap_frame_buffers(VP10Decoder *pbi) {
pbi->hold_ref_buf = 0;
cm->frame_to_show = get_frame_new_buffer(cm);
if (!pbi->frame_parallel_decode || !cm->show_frame) {
if (!cm->frame_parallel_decode || !cm->show_frame) {
lock_buffer_pool(pool);
--frame_bufs[cm->new_fb_idx].ref_count;
unlock_buffer_pool(pool);
@ -297,7 +297,7 @@ int vp10_receive_compressed_data(VP10Decoder *pbi,
// Check if the previous frame was a frame without any references to it.
// Release frame buffer if not decoding in frame parallel mode.
if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0
if (!cm->frame_parallel_decode && cm->new_fb_idx >= 0
&& frame_bufs[cm->new_fb_idx].ref_count == 0)
pool->release_fb_cb(pool->cb_priv,
&frame_bufs[cm->new_fb_idx].raw_frame_buffer);
@ -310,7 +310,7 @@ int vp10_receive_compressed_data(VP10Decoder *pbi,
cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
pbi->hold_ref_buf = 0;
if (pbi->frame_parallel_decode) {
if (cm->frame_parallel_decode) {
VPxWorker *const worker = pbi->frame_worker_owner;
vp10_frameworker_lock_stats(worker);
frame_bufs[cm->new_fb_idx].frame_worker_owner = worker;
@ -379,12 +379,12 @@ int vp10_receive_compressed_data(VP10Decoder *pbi,
if (!cm->show_existing_frame) {
cm->last_show_frame = cm->show_frame;
cm->prev_frame = cm->cur_frame;
if (cm->seg.enabled && !pbi->frame_parallel_decode)
if (cm->seg.enabled && !cm->frame_parallel_decode)
vp10_swap_current_and_last_seg_map(cm);
}
// Update progress in frame parallel decode.
if (pbi->frame_parallel_decode) {
if (cm->frame_parallel_decode) {
// Need to lock the mutex here as another thread may
// be accessing this buffer.
VPxWorker *const worker = pbi->frame_worker_owner;

View File

@ -55,8 +55,6 @@ typedef struct VP10Decoder {
int refresh_frame_flags;
int frame_parallel_decode; // frame-based threading.
// TODO(hkuang): Combine this with cur_buf in macroblockd as they are
// the same.
RefCntBuffer *cur_buf; // Current decoding frame buffer.

View File

@ -15,6 +15,10 @@
#include "vpx_util/vpx_thread.h"
#include "vpx/internal/vpx_codec_internal.h"
#ifdef __cplusplus
extern "C" {
#endif
struct VP10Common;
struct VP10Decoder;
@ -63,4 +67,8 @@ void vp10_frameworker_broadcast(RefCntBuffer *const buf, int row);
void vp10_frameworker_copy_context(VPxWorker *const dst_worker,
VPxWorker *const src_worker);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP10_DECODER_DTHREAD_H_

View File

@ -16,6 +16,7 @@
#include "vp10/encoder/encodeframe.h"
#include "vp10/common/seg_common.h"
#include "vp10/encoder/segmentation.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/system_state.h"
#define AQ_C_SEGMENTS 5
@ -117,8 +118,8 @@ void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
const int mi_offset = mi_row * cm->mi_cols + mi_col;
const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
const int xmis = MIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]);
const int ymis = MIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]);
const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]);
const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]);
int x, y;
int i;
unsigned char segment;
@ -136,7 +137,7 @@ void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
vpx_clear_system_state();
low_var_thresh = (cpi->oxcf.pass == 2)
? MAX(cpi->twopass.mb_av_energy, MIN_DEFAULT_LV_THRESH)
? VPXMAX(cpi->twopass.mb_av_energy, MIN_DEFAULT_LV_THRESH)
: DEFAULT_LV_THRESH;
vp10_setup_src_planes(mb, cpi->Source, mi_row, mi_col);

View File

@ -15,6 +15,7 @@
#include "vp10/encoder/aq_cyclicrefresh.h"
#include "vp10/encoder/ratectrl.h"
#include "vp10/encoder/segmentation.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/system_state.h"
struct CYCLIC_REFRESH {
@ -220,8 +221,8 @@ void vp10_cyclic_refresh_update_segment(VP10_COMP *const cpi,
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
const int bw = num_8x8_blocks_wide_lookup[bsize];
const int bh = num_8x8_blocks_high_lookup[bsize];
const int xmis = MIN(cm->mi_cols - mi_col, bw);
const int ymis = MIN(cm->mi_rows - mi_row, bh);
const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
const int block_index = mi_row * cm->mi_cols + mi_col;
const int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist,
bsize);
@ -291,7 +292,7 @@ void vp10_cyclic_refresh_postencode(VP10_COMP *const cpi) {
}
}
// Set golden frame update interval, for non-svc 1 pass CBR mode.
// Set golden frame update interval, for 1 pass CBR mode.
void vp10_cyclic_refresh_set_golden_update(VP10_COMP *const cpi) {
RATE_CONTROL *const rc = &cpi->rc;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
@ -413,10 +414,10 @@ static void cyclic_refresh_update_map(VP10_COMP *const cpi) {
assert(mi_col >= 0 && mi_col < cm->mi_cols);
bl_index = mi_row * cm->mi_cols + mi_col;
// Loop through all 8x8 blocks in superblock and update map.
xmis = MIN(cm->mi_cols - mi_col,
num_8x8_blocks_wide_lookup[BLOCK_64X64]);
ymis = MIN(cm->mi_rows - mi_row,
num_8x8_blocks_high_lookup[BLOCK_64X64]);
xmis =
VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_64X64]);
ymis =
VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_64X64]);
for (y = 0; y < ymis; y++) {
for (x = 0; x < xmis; x++) {
const int bl_index2 = bl_index + y * cm->mi_cols + x;
@ -484,10 +485,7 @@ void vp10_cyclic_refresh_setup(VP10_COMP *const cpi) {
if (cm->current_video_frame == 0)
cr->low_content_avg = 0.0;
// Don't apply refresh on key frame or enhancement layer frames.
if (!apply_cyclic_refresh ||
(cm->frame_type == KEY_FRAME) ||
(cpi->svc.temporal_layer_id > 0) ||
(cpi->svc.spatial_layer_id > 0)) {
if (!apply_cyclic_refresh || cm->frame_type == KEY_FRAME) {
// Set segmentation map to 0 and disable.
unsigned char *const seg_map = cpi->segmentation_map;
memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
@ -545,8 +543,9 @@ void vp10_cyclic_refresh_setup(VP10_COMP *const cpi) {
// Set a more aggressive (higher) q delta for segment BOOST2.
qindex_delta = compute_deltaq(
cpi, cm->base_qindex, MIN(CR_MAX_RATE_TARGET_RATIO,
0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta));
cpi, cm->base_qindex,
VPXMIN(CR_MAX_RATE_TARGET_RATIO,
0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta));
cr->qindex_delta[2] = qindex_delta;
vp10_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);

View File

@ -61,7 +61,7 @@ void vp10_cyclic_refresh_update__map(struct VP10_COMP *const cpi);
// Update the actual number of blocks that were applied the segment delta q.
void vp10_cyclic_refresh_postencode(struct VP10_COMP *const cpi);
// Set golden frame update interval, for non-svc 1 pass CBR mode.
// Set golden frame update interval, for 1 pass CBR mode.
void vp10_cyclic_refresh_set_golden_update(struct VP10_COMP *const cpi);
// Check if we should not update golden reference, based on past refresh stats.

View File

@ -14,6 +14,7 @@
#include "vpx/vpx_encoder.h"
#include "vpx_dsp/bitwriter_buffer.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem_ops.h"
#include "vpx_ports/system_state.h"
@ -776,8 +777,7 @@ static void encode_loopfilter(struct loopfilter *lf,
vpx_wb_write_bit(wb, changed);
if (changed) {
lf->last_ref_deltas[i] = delta;
vpx_wb_write_literal(wb, abs(delta) & 0x3F, 6);
vpx_wb_write_bit(wb, delta < 0);
vpx_wb_write_inv_signed_literal(wb, delta, 6);
}
}
@ -787,8 +787,7 @@ static void encode_loopfilter(struct loopfilter *lf,
vpx_wb_write_bit(wb, changed);
if (changed) {
lf->last_mode_deltas[i] = delta;
vpx_wb_write_literal(wb, abs(delta) & 0x3F, 6);
vpx_wb_write_bit(wb, delta < 0);
vpx_wb_write_inv_signed_literal(wb, delta, 6);
}
}
}
@ -798,8 +797,7 @@ static void encode_loopfilter(struct loopfilter *lf,
static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) {
if (delta_q != 0) {
vpx_wb_write_bit(wb, 1);
vpx_wb_write_literal(wb, abs(delta_q), 4);
vpx_wb_write_bit(wb, delta_q < 0);
vpx_wb_write_inv_signed_literal(wb, delta_q, 4);
} else {
vpx_wb_write_bit(wb, 0);
}
@ -824,7 +822,11 @@ static void encode_segmentation(VP10_COMMON *cm, MACROBLOCKD *xd,
return;
// Segmentation map
vpx_wb_write_bit(wb, seg->update_map);
if (!frame_is_intra_only(cm) && !cm->error_resilient_mode) {
vpx_wb_write_bit(wb, seg->update_map);
} else {
assert(seg->update_map == 1);
}
if (seg->update_map) {
// Select the coding strategy (temporal or spatial)
vp10_choose_segmap_coding_method(cm, xd);
@ -838,7 +840,11 @@ static void encode_segmentation(VP10_COMMON *cm, MACROBLOCKD *xd,
}
// Write out the chosen coding method.
vpx_wb_write_bit(wb, seg->temporal_update);
if (!frame_is_intra_only(cm) && !cm->error_resilient_mode) {
vpx_wb_write_bit(wb, seg->temporal_update);
} else {
assert(seg->temporal_update == 0);
}
if (seg->temporal_update) {
for (i = 0; i < PREDICTION_PROBS; i++) {
const int prob = seg->pred_probs[i];
@ -875,14 +881,25 @@ static void encode_segmentation(VP10_COMMON *cm, MACROBLOCKD *xd,
}
}
static void encode_txfm_probs(VP10_COMMON *cm, vpx_writer *w,
#if CONFIG_MISC_FIXES
static void write_txfm_mode(TX_MODE mode, struct vpx_write_bit_buffer *wb) {
vpx_wb_write_bit(wb, mode == TX_MODE_SELECT);
if (mode != TX_MODE_SELECT)
vpx_wb_write_literal(wb, mode, 2);
}
#endif
static void update_txfm_probs(VP10_COMMON *cm, vpx_writer *w,
FRAME_COUNTS *counts) {
#if !CONFIG_MISC_FIXES
// Mode
vpx_write_literal(w, MIN(cm->tx_mode, ALLOW_32X32), 2);
vpx_write_literal(w, VPXMIN(cm->tx_mode, ALLOW_32X32), 2);
if (cm->tx_mode >= ALLOW_32X32)
vpx_write_bit(w, cm->tx_mode == TX_MODE_SELECT);
// Probabilities
#endif
if (cm->tx_mode == TX_MODE_SELECT) {
int i, j;
unsigned int ct_8x8p[TX_SIZES - 3][2];
@ -914,11 +931,9 @@ static void encode_txfm_probs(VP10_COMMON *cm, vpx_writer *w,
static void write_interp_filter(INTERP_FILTER filter,
struct vpx_write_bit_buffer *wb) {
const int filter_to_literal[] = { 1, 0, 2, 3 };
vpx_wb_write_bit(wb, filter == SWITCHABLE);
if (filter != SWITCHABLE)
vpx_wb_write_literal(wb, filter_to_literal[filter], 2);
vpx_wb_write_literal(wb, filter, 2);
}
static void fix_interp_filter(VP10_COMMON *cm, FRAME_COUNTS *counts) {
@ -1059,18 +1074,7 @@ static void write_frame_size_with_refs(VP10_COMP *cpi,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, ref_frame);
// Set "found" to 0 for temporal svc and for spatial svc key frame
if (cpi->use_svc &&
((cpi->svc.number_temporal_layers > 1 &&
cpi->oxcf.rc_mode == VPX_CBR) ||
(cpi->svc.number_spatial_layers > 1 &&
cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) ||
(is_two_pass_svc(cpi) &&
cpi->svc.encode_empty_frame_state == ENCODING &&
cpi->svc.layer_context[0].frames_from_key_frame <
cpi->svc.number_temporal_layers + 1))) {
found = 0;
} else if (cfg != NULL) {
if (cfg != NULL) {
found = cm->width == cfg->y_crop_width &&
cm->height == cfg->y_crop_height;
}
@ -1122,7 +1126,8 @@ static void write_bitdepth_colorspace_sampling(
}
vpx_wb_write_literal(wb, cm->color_space, 3);
if (cm->color_space != VPX_CS_SRGB) {
vpx_wb_write_bit(wb, 0); // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
// 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
vpx_wb_write_bit(wb, cm->color_range);
if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
assert(cm->subsampling_x != 1 || cm->subsampling_y != 1);
vpx_wb_write_bit(wb, cm->subsampling_x);
@ -1156,19 +1161,28 @@ static void write_uncompressed_header(VP10_COMP *cpi,
write_bitdepth_colorspace_sampling(cm, wb);
write_frame_size(cm, wb);
} else {
// In spatial svc if it's not error_resilient_mode then we need to code all
// visible frames as invisible. But we need to keep the show_frame flag so
// that the publisher could know whether it is supposed to be visible.
// So we will code the show_frame flag as it is. Then code the intra_only
// bit here. This will make the bitstream incompatible. In the player we
// will change to show_frame flag to 0, then add an one byte frame with
// show_existing_frame flag which tells the decoder which frame we want to
// show.
if (!cm->show_frame)
vpx_wb_write_bit(wb, cm->intra_only);
if (!cm->error_resilient_mode)
vpx_wb_write_literal(wb, cm->reset_frame_context, 2);
if (!cm->error_resilient_mode) {
#if CONFIG_MISC_FIXES
if (cm->intra_only) {
vpx_wb_write_bit(wb,
cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL);
} else {
vpx_wb_write_bit(wb,
cm->reset_frame_context != RESET_FRAME_CONTEXT_NONE);
if (cm->reset_frame_context != RESET_FRAME_CONTEXT_NONE)
vpx_wb_write_bit(wb,
cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL);
}
#else
static const int reset_frame_context_conv_tbl[3] = { 0, 2, 3 };
vpx_wb_write_literal(wb,
reset_frame_context_conv_tbl[cm->reset_frame_context], 2);
#endif
}
if (cm->intra_only) {
write_sync_code(wb);
@ -1200,8 +1214,13 @@ static void write_uncompressed_header(VP10_COMP *cpi,
}
if (!cm->error_resilient_mode) {
vpx_wb_write_bit(wb, cm->refresh_frame_context);
vpx_wb_write_bit(wb, cm->frame_parallel_decoding_mode);
vpx_wb_write_bit(wb,
cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF);
#if CONFIG_MISC_FIXES
if (cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF)
#endif
vpx_wb_write_bit(wb, cm->refresh_frame_context !=
REFRESH_FRAME_CONTEXT_BACKWARD);
}
vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
@ -1209,24 +1228,32 @@ static void write_uncompressed_header(VP10_COMP *cpi,
encode_loopfilter(&cm->lf, wb);
encode_quantization(cm, wb);
encode_segmentation(cm, xd, wb);
#if CONFIG_MISC_FIXES
if (xd->lossless)
cm->tx_mode = TX_4X4;
else
write_txfm_mode(cm->tx_mode, wb);
#endif
write_tile_info(cm, wb);
}
static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) {
VP10_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
FRAME_CONTEXT *const fc = cm->fc;
FRAME_COUNTS *counts = cpi->td.counts;
vpx_writer header_bc;
vpx_start_encode(&header_bc, data);
if (xd->lossless)
cm->tx_mode = ONLY_4X4;
#if !CONFIG_MISC_FIXES
if (cpi->td.mb.e_mbd.lossless)
cm->tx_mode = TX_4X4;
else
encode_txfm_probs(cm, &header_bc, counts);
update_txfm_probs(cm, &header_bc, counts);
#else
update_txfm_probs(cm, &header_bc, counts);
#endif
update_coef_probs(cpi, &header_bc);
update_skip_probs(cm, &header_bc, counts);

View File

@ -24,12 +24,7 @@ void vp10_encode_token_init();
static INLINE int vp10_preserve_existing_gf(VP10_COMP *cpi) {
return !cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
cpi->rc.is_src_frame_alt_ref &&
(!cpi->use_svc || // Add spatial svc base layer case here
(is_two_pass_svc(cpi) &&
cpi->svc.spatial_layer_id == 0 &&
cpi->svc.layer_context[0].gold_ref_idx >=0 &&
cpi->oxcf.ss_enable_auto_arf[0]));
cpi->rc.is_src_frame_alt_ref;
}
#ifdef __cplusplus

View File

@ -115,7 +115,6 @@ struct macroblock {
// indicate if it is in the rd search loop or encoding process
int use_lp32x32fdct;
int skip_encode;
// use fast quantization process
int quant_fp;

View File

@ -14,6 +14,10 @@
#include "vp10/common/blockd.h"
#include "vp10/encoder/block.h"
#ifdef __cplusplus
extern "C" {
#endif
struct VP10_COMP;
struct VP10Common;
struct ThreadData;
@ -84,4 +88,8 @@ typedef struct PC_TREE {
void vp10_setup_pc_tree(struct VP10Common *cm, struct ThreadData *td);
void vp10_free_pc_tree(struct ThreadData *td);
#ifdef __cplusplus
} // extern "C"
#endif
#endif /* VP10_ENCODER_CONTEXT_TREE_H_ */

View File

@ -98,216 +98,704 @@ void fdst16(const tran_low_t *input, tran_low_t *output) {
}
#endif // CONFIG_EXT_TX
static INLINE void range_check(const tran_low_t *input, const int size,
const int bit) {
#if CONFIG_COEFFICIENT_RANGE_CHECKING
int i;
for (i = 0; i < size; ++i) {
assert(abs(input[i]) < (1 << bit));
}
#else
(void)input;
(void)size;
(void)bit;
#endif
}
static void fdct4(const tran_low_t *input, tran_low_t *output) {
tran_high_t step[4];
tran_high_t temp1, temp2;
tran_high_t temp;
tran_low_t step[4];
step[0] = input[0] + input[3];
step[1] = input[1] + input[2];
step[2] = input[1] - input[2];
step[3] = input[0] - input[3];
// stage 0
range_check(input, 4, 11);
temp1 = (step[0] + step[1]) * cospi_16_64;
temp2 = (step[0] - step[1]) * cospi_16_64;
output[0] = (tran_low_t)fdct_round_shift(temp1);
output[2] = (tran_low_t)fdct_round_shift(temp2);
temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
output[1] = (tran_low_t)fdct_round_shift(temp1);
output[3] = (tran_low_t)fdct_round_shift(temp2);
// stage 1
output[0] = input[0] + input[3];
output[1] = input[1] + input[2];
output[2] = input[1] - input[2];
output[3] = input[0] - input[3];
range_check(output, 4, 12);
// stage 2
temp = output[0] * cospi_16_64 + output[1] * cospi_16_64;
step[0] = (tran_low_t)fdct_round_shift(temp);
temp = output[1] * -cospi_16_64 + output[0] * cospi_16_64;
step[1] = (tran_low_t)fdct_round_shift(temp);
temp = output[2] * cospi_24_64 + output[3] * cospi_8_64;
step[2] = (tran_low_t)fdct_round_shift(temp);
temp = output[3] * cospi_24_64 + output[2] * -cospi_8_64;
step[3] = (tran_low_t)fdct_round_shift(temp);
range_check(step, 4, 13);
// stage 3
output[0] = step[0];
output[1] = step[2];
output[2] = step[1];
output[3] = step[3];
range_check(output, 4, 13);
}
static void fdct8(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16
tran_high_t temp;
tran_low_t step[8];
// stage 0
range_check(input, 8, 12);
// stage 1
s0 = input[0] + input[7];
s1 = input[1] + input[6];
s2 = input[2] + input[5];
s3 = input[3] + input[4];
s4 = input[3] - input[4];
s5 = input[2] - input[5];
s6 = input[1] - input[6];
s7 = input[0] - input[7];
output[0] = input[0] + input[7];
output[1] = input[1] + input[6];
output[2] = input[2] + input[5];
output[3] = input[3] + input[4];
output[4] = input[3] - input[4];
output[5] = input[2] - input[5];
output[6] = input[1] - input[6];
output[7] = input[0] - input[7];
// fdct4(step, step);
x0 = s0 + s3;
x1 = s1 + s2;
x2 = s1 - s2;
x3 = s0 - s3;
t0 = (x0 + x1) * cospi_16_64;
t1 = (x0 - x1) * cospi_16_64;
t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
output[0] = (tran_low_t)fdct_round_shift(t0);
output[2] = (tran_low_t)fdct_round_shift(t2);
output[4] = (tran_low_t)fdct_round_shift(t1);
output[6] = (tran_low_t)fdct_round_shift(t3);
range_check(output, 8, 13);
// Stage 2
t0 = (s6 - s5) * cospi_16_64;
t1 = (s6 + s5) * cospi_16_64;
t2 = (tran_low_t)fdct_round_shift(t0);
t3 = (tran_low_t)fdct_round_shift(t1);
// stage 2
step[0] = output[0] + output[3];
step[1] = output[1] + output[2];
step[2] = output[1] - output[2];
step[3] = output[0] - output[3];
step[4] = output[4];
temp = output[5] * -cospi_16_64 + output[6] * cospi_16_64;
step[5] = (tran_low_t)fdct_round_shift(temp);
temp = output[6] * cospi_16_64 + output[5] * cospi_16_64;
step[6] = (tran_low_t)fdct_round_shift(temp);
step[7] = output[7];
// Stage 3
x0 = s4 + t2;
x1 = s4 - t2;
x2 = s7 - t3;
x3 = s7 + t3;
range_check(step, 8, 14);
// Stage 4
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
output[1] = (tran_low_t)fdct_round_shift(t0);
output[3] = (tran_low_t)fdct_round_shift(t2);
output[5] = (tran_low_t)fdct_round_shift(t1);
output[7] = (tran_low_t)fdct_round_shift(t3);
// stage 3
temp = step[0] * cospi_16_64 + step[1] * cospi_16_64;
output[0] = (tran_low_t)fdct_round_shift(temp);
temp = step[1] * -cospi_16_64 + step[0] * cospi_16_64;
output[1] = (tran_low_t)fdct_round_shift(temp);
temp = step[2] * cospi_24_64 + step[3] * cospi_8_64;
output[2] = (tran_low_t)fdct_round_shift(temp);
temp = step[3] * cospi_24_64 + step[2] * -cospi_8_64;
output[3] = (tran_low_t)fdct_round_shift(temp);
output[4] = step[4] + step[5];
output[5] = step[4] - step[5];
output[6] = step[7] - step[6];
output[7] = step[7] + step[6];
range_check(output, 8, 14);
// stage 4
step[0] = output[0];
step[1] = output[1];
step[2] = output[2];
step[3] = output[3];
temp = output[4] * cospi_28_64 + output[7] * cospi_4_64;
step[4] = (tran_low_t)fdct_round_shift(temp);
temp = output[5] * cospi_12_64 + output[6] * cospi_20_64;
step[5] = (tran_low_t)fdct_round_shift(temp);
temp = output[6] * cospi_12_64 + output[5] * -cospi_20_64;
step[6] = (tran_low_t)fdct_round_shift(temp);
temp = output[7] * cospi_28_64 + output[4] * -cospi_4_64;
step[7] = (tran_low_t)fdct_round_shift(temp);
range_check(step, 8, 14);
// stage 5
output[0] = step[0];
output[1] = step[4];
output[2] = step[2];
output[3] = step[6];
output[4] = step[1];
output[5] = step[5];
output[6] = step[3];
output[7] = step[7];
range_check(output, 8, 14);
}
static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
tran_high_t step1[8]; // canbe16
tran_high_t step2[8]; // canbe16
tran_high_t step3[8]; // canbe16
tran_high_t input[8]; // canbe16
tran_high_t temp1, temp2; // needs32
static void fdct16(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[16];
// step 1
input[0] = in[0] + in[15];
input[1] = in[1] + in[14];
input[2] = in[2] + in[13];
input[3] = in[3] + in[12];
input[4] = in[4] + in[11];
input[5] = in[5] + in[10];
input[6] = in[6] + in[ 9];
input[7] = in[7] + in[ 8];
// stage 0
range_check(input, 16, 13);
step1[0] = in[7] - in[ 8];
step1[1] = in[6] - in[ 9];
step1[2] = in[5] - in[10];
step1[3] = in[4] - in[11];
step1[4] = in[3] - in[12];
step1[5] = in[2] - in[13];
step1[6] = in[1] - in[14];
step1[7] = in[0] - in[15];
// stage 1
output[0] = input[0] + input[15];
output[1] = input[1] + input[14];
output[2] = input[2] + input[13];
output[3] = input[3] + input[12];
output[4] = input[4] + input[11];
output[5] = input[5] + input[10];
output[6] = input[6] + input[9];
output[7] = input[7] + input[8];
output[8] = input[7] - input[8];
output[9] = input[6] - input[9];
output[10] = input[5] - input[10];
output[11] = input[4] - input[11];
output[12] = input[3] - input[12];
output[13] = input[2] - input[13];
output[14] = input[1] - input[14];
output[15] = input[0] - input[15];
// fdct8(step, step);
{
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16
range_check(output, 16, 14);
// stage 1
s0 = input[0] + input[7];
s1 = input[1] + input[6];
s2 = input[2] + input[5];
s3 = input[3] + input[4];
s4 = input[3] - input[4];
s5 = input[2] - input[5];
s6 = input[1] - input[6];
s7 = input[0] - input[7];
// stage 2
step[0] = output[0] + output[7];
step[1] = output[1] + output[6];
step[2] = output[2] + output[5];
step[3] = output[3] + output[4];
step[4] = output[3] - output[4];
step[5] = output[2] - output[5];
step[6] = output[1] - output[6];
step[7] = output[0] - output[7];
step[8] = output[8];
step[9] = output[9];
temp = output[10] * -cospi_16_64 + output[13] * cospi_16_64;
step[10] = (tran_low_t)fdct_round_shift(temp);
temp = output[11] * -cospi_16_64 + output[12] * cospi_16_64;
step[11] = (tran_low_t)fdct_round_shift(temp);
temp = output[12] * cospi_16_64 + output[11] * cospi_16_64;
step[12] = (tran_low_t)fdct_round_shift(temp);
temp = output[13] * cospi_16_64 + output[10] * cospi_16_64;
step[13] = (tran_low_t)fdct_round_shift(temp);
step[14] = output[14];
step[15] = output[15];
// fdct4(step, step);
x0 = s0 + s3;
x1 = s1 + s2;
x2 = s1 - s2;
x3 = s0 - s3;
t0 = (x0 + x1) * cospi_16_64;
t1 = (x0 - x1) * cospi_16_64;
t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
out[0] = (tran_low_t)fdct_round_shift(t0);
out[4] = (tran_low_t)fdct_round_shift(t2);
out[8] = (tran_low_t)fdct_round_shift(t1);
out[12] = (tran_low_t)fdct_round_shift(t3);
range_check(step, 16, 15);
// Stage 2
t0 = (s6 - s5) * cospi_16_64;
t1 = (s6 + s5) * cospi_16_64;
t2 = fdct_round_shift(t0);
t3 = fdct_round_shift(t1);
// stage 3
output[0] = step[0] + step[3];
output[1] = step[1] + step[2];
output[2] = step[1] - step[2];
output[3] = step[0] - step[3];
output[4] = step[4];
temp = step[5] * -cospi_16_64 + step[6] * cospi_16_64;
output[5] = (tran_low_t)fdct_round_shift(temp);
temp = step[6] * cospi_16_64 + step[5] * cospi_16_64;
output[6] = (tran_low_t)fdct_round_shift(temp);
output[7] = step[7];
output[8] = step[8] + step[11];
output[9] = step[9] + step[10];
output[10] = step[9] - step[10];
output[11] = step[8] - step[11];
output[12] = step[15] - step[12];
output[13] = step[14] - step[13];
output[14] = step[14] + step[13];
output[15] = step[15] + step[12];
// Stage 3
x0 = s4 + t2;
x1 = s4 - t2;
x2 = s7 - t3;
x3 = s7 + t3;
range_check(output, 16, 16);
// Stage 4
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
out[2] = (tran_low_t)fdct_round_shift(t0);
out[6] = (tran_low_t)fdct_round_shift(t2);
out[10] = (tran_low_t)fdct_round_shift(t1);
out[14] = (tran_low_t)fdct_round_shift(t3);
}
// stage 4
temp = output[0] * cospi_16_64 + output[1] * cospi_16_64;
step[0] = (tran_low_t)fdct_round_shift(temp);
temp = output[1] * -cospi_16_64 + output[0] * cospi_16_64;
step[1] = (tran_low_t)fdct_round_shift(temp);
temp = output[2] * cospi_24_64 + output[3] * cospi_8_64;
step[2] = (tran_low_t)fdct_round_shift(temp);
temp = output[3] * cospi_24_64 + output[2] * -cospi_8_64;
step[3] = (tran_low_t)fdct_round_shift(temp);
step[4] = output[4] + output[5];
step[5] = output[4] - output[5];
step[6] = output[7] - output[6];
step[7] = output[7] + output[6];
step[8] = output[8];
temp = output[9] * -cospi_8_64 + output[14] * cospi_24_64;
step[9] = (tran_low_t)fdct_round_shift(temp);
temp = output[10] * -cospi_24_64 + output[13] * -cospi_8_64;
step[10] = (tran_low_t)fdct_round_shift(temp);
step[11] = output[11];
step[12] = output[12];
temp = output[13] * cospi_24_64 + output[10] * -cospi_8_64;
step[13] = (tran_low_t)fdct_round_shift(temp);
temp = output[14] * cospi_8_64 + output[9] * cospi_24_64;
step[14] = (tran_low_t)fdct_round_shift(temp);
step[15] = output[15];
// step 2
temp1 = (step1[5] - step1[2]) * cospi_16_64;
temp2 = (step1[4] - step1[3]) * cospi_16_64;
step2[2] = fdct_round_shift(temp1);
step2[3] = fdct_round_shift(temp2);
temp1 = (step1[4] + step1[3]) * cospi_16_64;
temp2 = (step1[5] + step1[2]) * cospi_16_64;
step2[4] = fdct_round_shift(temp1);
step2[5] = fdct_round_shift(temp2);
range_check(step, 16, 16);
// step 3
step3[0] = step1[0] + step2[3];
step3[1] = step1[1] + step2[2];
step3[2] = step1[1] - step2[2];
step3[3] = step1[0] - step2[3];
step3[4] = step1[7] - step2[4];
step3[5] = step1[6] - step2[5];
step3[6] = step1[6] + step2[5];
step3[7] = step1[7] + step2[4];
// stage 5
output[0] = step[0];
output[1] = step[1];
output[2] = step[2];
output[3] = step[3];
temp = step[4] * cospi_28_64 + step[7] * cospi_4_64;
output[4] = (tran_low_t)fdct_round_shift(temp);
temp = step[5] * cospi_12_64 + step[6] * cospi_20_64;
output[5] = (tran_low_t)fdct_round_shift(temp);
temp = step[6] * cospi_12_64 + step[5] * -cospi_20_64;
output[6] = (tran_low_t)fdct_round_shift(temp);
temp = step[7] * cospi_28_64 + step[4] * -cospi_4_64;
output[7] = (tran_low_t)fdct_round_shift(temp);
output[8] = step[8] + step[9];
output[9] = step[8] - step[9];
output[10] = step[11] - step[10];
output[11] = step[11] + step[10];
output[12] = step[12] + step[13];
output[13] = step[12] - step[13];
output[14] = step[15] - step[14];
output[15] = step[15] + step[14];
// step 4
temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
step2[1] = fdct_round_shift(temp1);
step2[2] = fdct_round_shift(temp2);
temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
step2[5] = fdct_round_shift(temp1);
step2[6] = fdct_round_shift(temp2);
range_check(output, 16, 16);
// step 5
step1[0] = step3[0] + step2[1];
step1[1] = step3[0] - step2[1];
step1[2] = step3[3] + step2[2];
step1[3] = step3[3] - step2[2];
step1[4] = step3[4] - step2[5];
step1[5] = step3[4] + step2[5];
step1[6] = step3[7] - step2[6];
step1[7] = step3[7] + step2[6];
// stage 6
step[0] = output[0];
step[1] = output[1];
step[2] = output[2];
step[3] = output[3];
step[4] = output[4];
step[5] = output[5];
step[6] = output[6];
step[7] = output[7];
temp = output[8] * cospi_30_64 + output[15] * cospi_2_64;
step[8] = (tran_low_t)fdct_round_shift(temp);
temp = output[9] * cospi_14_64 + output[14] * cospi_18_64;
step[9] = (tran_low_t)fdct_round_shift(temp);
temp = output[10] * cospi_22_64 + output[13] * cospi_10_64;
step[10] = (tran_low_t)fdct_round_shift(temp);
temp = output[11] * cospi_6_64 + output[12] * cospi_26_64;
step[11] = (tran_low_t)fdct_round_shift(temp);
temp = output[12] * cospi_6_64 + output[11] * -cospi_26_64;
step[12] = (tran_low_t)fdct_round_shift(temp);
temp = output[13] * cospi_22_64 + output[10] * -cospi_10_64;
step[13] = (tran_low_t)fdct_round_shift(temp);
temp = output[14] * cospi_14_64 + output[9] * -cospi_18_64;
step[14] = (tran_low_t)fdct_round_shift(temp);
temp = output[15] * cospi_30_64 + output[8] * -cospi_2_64;
step[15] = (tran_low_t)fdct_round_shift(temp);
// step 6
temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
out[1] = (tran_low_t)fdct_round_shift(temp1);
out[9] = (tran_low_t)fdct_round_shift(temp2);
range_check(step, 16, 16);
temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
out[5] = (tran_low_t)fdct_round_shift(temp1);
out[13] = (tran_low_t)fdct_round_shift(temp2);
// stage 7
output[0] = step[0];
output[1] = step[8];
output[2] = step[4];
output[3] = step[12];
output[4] = step[2];
output[5] = step[10];
output[6] = step[6];
output[7] = step[14];
output[8] = step[1];
output[9] = step[9];
output[10] = step[5];
output[11] = step[13];
output[12] = step[3];
output[13] = step[11];
output[14] = step[7];
output[15] = step[15];
temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
out[3] = (tran_low_t)fdct_round_shift(temp1);
out[11] = (tran_low_t)fdct_round_shift(temp2);
range_check(output, 16, 16);
}
temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
out[7] = (tran_low_t)fdct_round_shift(temp1);
out[15] = (tran_low_t)fdct_round_shift(temp2);
static void fdct32(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[32];
// stage 0
range_check(input, 32, 14);
// stage 1
output[0] = input[0] + input[31];
output[1] = input[1] + input[30];
output[2] = input[2] + input[29];
output[3] = input[3] + input[28];
output[4] = input[4] + input[27];
output[5] = input[5] + input[26];
output[6] = input[6] + input[25];
output[7] = input[7] + input[24];
output[8] = input[8] + input[23];
output[9] = input[9] + input[22];
output[10] = input[10] + input[21];
output[11] = input[11] + input[20];
output[12] = input[12] + input[19];
output[13] = input[13] + input[18];
output[14] = input[14] + input[17];
output[15] = input[15] + input[16];
output[16] = input[15] - input[16];
output[17] = input[14] - input[17];
output[18] = input[13] - input[18];
output[19] = input[12] - input[19];
output[20] = input[11] - input[20];
output[21] = input[10] - input[21];
output[22] = input[9] - input[22];
output[23] = input[8] - input[23];
output[24] = input[7] - input[24];
output[25] = input[6] - input[25];
output[26] = input[5] - input[26];
output[27] = input[4] - input[27];
output[28] = input[3] - input[28];
output[29] = input[2] - input[29];
output[30] = input[1] - input[30];
output[31] = input[0] - input[31];
range_check(output, 32, 15);
// stage 2
step[0] = output[0] + output[15];
step[1] = output[1] + output[14];
step[2] = output[2] + output[13];
step[3] = output[3] + output[12];
step[4] = output[4] + output[11];
step[5] = output[5] + output[10];
step[6] = output[6] + output[9];
step[7] = output[7] + output[8];
step[8] = output[7] - output[8];
step[9] = output[6] - output[9];
step[10] = output[5] - output[10];
step[11] = output[4] - output[11];
step[12] = output[3] - output[12];
step[13] = output[2] - output[13];
step[14] = output[1] - output[14];
step[15] = output[0] - output[15];
step[16] = output[16];
step[17] = output[17];
step[18] = output[18];
step[19] = output[19];
temp = output[20] * -cospi_16_64 + output[27] * cospi_16_64;
step[20] = (tran_low_t)fdct_round_shift(temp);
temp = output[21] * -cospi_16_64 + output[26] * cospi_16_64;
step[21] = (tran_low_t)fdct_round_shift(temp);
temp = output[22] * -cospi_16_64 + output[25] * cospi_16_64;
step[22] = (tran_low_t)fdct_round_shift(temp);
temp = output[23] * -cospi_16_64 + output[24] * cospi_16_64;
step[23] = (tran_low_t)fdct_round_shift(temp);
temp = output[24] * cospi_16_64 + output[23] * cospi_16_64;
step[24] = (tran_low_t)fdct_round_shift(temp);
temp = output[25] * cospi_16_64 + output[22] * cospi_16_64;
step[25] = (tran_low_t)fdct_round_shift(temp);
temp = output[26] * cospi_16_64 + output[21] * cospi_16_64;
step[26] = (tran_low_t)fdct_round_shift(temp);
temp = output[27] * cospi_16_64 + output[20] * cospi_16_64;
step[27] = (tran_low_t)fdct_round_shift(temp);
step[28] = output[28];
step[29] = output[29];
step[30] = output[30];
step[31] = output[31];
range_check(step, 32, 16);
// stage 3
output[0] = step[0] + step[7];
output[1] = step[1] + step[6];
output[2] = step[2] + step[5];
output[3] = step[3] + step[4];
output[4] = step[3] - step[4];
output[5] = step[2] - step[5];
output[6] = step[1] - step[6];
output[7] = step[0] - step[7];
output[8] = step[8];
output[9] = step[9];
temp = step[10] * -cospi_16_64 + step[13] * cospi_16_64;
output[10] = (tran_low_t)fdct_round_shift(temp);
temp = step[11] * -cospi_16_64 + step[12] * cospi_16_64;
output[11] = (tran_low_t)fdct_round_shift(temp);
temp = step[12] * cospi_16_64 + step[11] * cospi_16_64;
output[12] = (tran_low_t)fdct_round_shift(temp);
temp = step[13] * cospi_16_64 + step[10] * cospi_16_64;
output[13] = (tran_low_t)fdct_round_shift(temp);
output[14] = step[14];
output[15] = step[15];
output[16] = step[16] + step[23];
output[17] = step[17] + step[22];
output[18] = step[18] + step[21];
output[19] = step[19] + step[20];
output[20] = step[19] - step[20];
output[21] = step[18] - step[21];
output[22] = step[17] - step[22];
output[23] = step[16] - step[23];
output[24] = step[31] - step[24];
output[25] = step[30] - step[25];
output[26] = step[29] - step[26];
output[27] = step[28] - step[27];
output[28] = step[28] + step[27];
output[29] = step[29] + step[26];
output[30] = step[30] + step[25];
output[31] = step[31] + step[24];
range_check(output, 32, 17);
// stage 4
step[0] = output[0] + output[3];
step[1] = output[1] + output[2];
step[2] = output[1] - output[2];
step[3] = output[0] - output[3];
step[4] = output[4];
temp = output[5] * -cospi_16_64 + output[6] * cospi_16_64;
step[5] = (tran_low_t)fdct_round_shift(temp);
temp = output[6] * cospi_16_64 + output[5] * cospi_16_64;
step[6] = (tran_low_t)fdct_round_shift(temp);
step[7] = output[7];
step[8] = output[8] + output[11];
step[9] = output[9] + output[10];
step[10] = output[9] - output[10];
step[11] = output[8] - output[11];
step[12] = output[15] - output[12];
step[13] = output[14] - output[13];
step[14] = output[14] + output[13];
step[15] = output[15] + output[12];
step[16] = output[16];
step[17] = output[17];
temp = output[18] * -cospi_8_64 + output[29] * cospi_24_64;
step[18] = (tran_low_t)fdct_round_shift(temp);
temp = output[19] * -cospi_8_64 + output[28] * cospi_24_64;
step[19] = (tran_low_t)fdct_round_shift(temp);
temp = output[20] * -cospi_24_64 + output[27] * -cospi_8_64;
step[20] = (tran_low_t)fdct_round_shift(temp);
temp = output[21] * -cospi_24_64 + output[26] * -cospi_8_64;
step[21] = (tran_low_t)fdct_round_shift(temp);
step[22] = output[22];
step[23] = output[23];
step[24] = output[24];
step[25] = output[25];
temp = output[26] * cospi_24_64 + output[21] * -cospi_8_64;
step[26] = (tran_low_t)fdct_round_shift(temp);
temp = output[27] * cospi_24_64 + output[20] * -cospi_8_64;
step[27] = (tran_low_t)fdct_round_shift(temp);
temp = output[28] * cospi_8_64 + output[19] * cospi_24_64;
step[28] = (tran_low_t)fdct_round_shift(temp);
temp = output[29] * cospi_8_64 + output[18] * cospi_24_64;
step[29] = (tran_low_t)fdct_round_shift(temp);
step[30] = output[30];
step[31] = output[31];
range_check(step, 32, 18);
// stage 5
temp = step[0] * cospi_16_64 + step[1] * cospi_16_64;
output[0] = (tran_low_t)fdct_round_shift(temp);
temp = step[1] * -cospi_16_64 + step[0] * cospi_16_64;
output[1] = (tran_low_t)fdct_round_shift(temp);
temp = step[2] * cospi_24_64 + step[3] * cospi_8_64;
output[2] = (tran_low_t)fdct_round_shift(temp);
temp = step[3] * cospi_24_64 + step[2] * -cospi_8_64;
output[3] = (tran_low_t)fdct_round_shift(temp);
output[4] = step[4] + step[5];
output[5] = step[4] - step[5];
output[6] = step[7] - step[6];
output[7] = step[7] + step[6];
output[8] = step[8];
temp = step[9] * -cospi_8_64 + step[14] * cospi_24_64;
output[9] = (tran_low_t)fdct_round_shift(temp);
temp = step[10] * -cospi_24_64 + step[13] * -cospi_8_64;
output[10] = (tran_low_t)fdct_round_shift(temp);
output[11] = step[11];
output[12] = step[12];
temp = step[13] * cospi_24_64 + step[10] * -cospi_8_64;
output[13] = (tran_low_t)fdct_round_shift(temp);
temp = step[14] * cospi_8_64 + step[9] * cospi_24_64;
output[14] = (tran_low_t)fdct_round_shift(temp);
output[15] = step[15];
output[16] = step[16] + step[19];
output[17] = step[17] + step[18];
output[18] = step[17] - step[18];
output[19] = step[16] - step[19];
output[20] = step[23] - step[20];
output[21] = step[22] - step[21];
output[22] = step[22] + step[21];
output[23] = step[23] + step[20];
output[24] = step[24] + step[27];
output[25] = step[25] + step[26];
output[26] = step[25] - step[26];
output[27] = step[24] - step[27];
output[28] = step[31] - step[28];
output[29] = step[30] - step[29];
output[30] = step[30] + step[29];
output[31] = step[31] + step[28];
range_check(output, 32, 18);
// stage 6
step[0] = output[0];
step[1] = output[1];
step[2] = output[2];
step[3] = output[3];
temp = output[4] * cospi_28_64 + output[7] * cospi_4_64;
step[4] = (tran_low_t)fdct_round_shift(temp);
temp = output[5] * cospi_12_64 + output[6] * cospi_20_64;
step[5] = (tran_low_t)fdct_round_shift(temp);
temp = output[6] * cospi_12_64 + output[5] * -cospi_20_64;
step[6] = (tran_low_t)fdct_round_shift(temp);
temp = output[7] * cospi_28_64 + output[4] * -cospi_4_64;
step[7] = (tran_low_t)fdct_round_shift(temp);
step[8] = output[8] + output[9];
step[9] = output[8] - output[9];
step[10] = output[11] - output[10];
step[11] = output[11] + output[10];
step[12] = output[12] + output[13];
step[13] = output[12] - output[13];
step[14] = output[15] - output[14];
step[15] = output[15] + output[14];
step[16] = output[16];
temp = output[17] * -cospi_4_64 + output[30] * cospi_28_64;
step[17] = (tran_low_t)fdct_round_shift(temp);
temp = output[18] * -cospi_28_64 + output[29] * -cospi_4_64;
step[18] = (tran_low_t)fdct_round_shift(temp);
step[19] = output[19];
step[20] = output[20];
temp = output[21] * -cospi_20_64 + output[26] * cospi_12_64;
step[21] = (tran_low_t)fdct_round_shift(temp);
temp = output[22] * -cospi_12_64 + output[25] * -cospi_20_64;
step[22] = (tran_low_t)fdct_round_shift(temp);
step[23] = output[23];
step[24] = output[24];
temp = output[25] * cospi_12_64 + output[22] * -cospi_20_64;
step[25] = (tran_low_t)fdct_round_shift(temp);
temp = output[26] * cospi_20_64 + output[21] * cospi_12_64;
step[26] = (tran_low_t)fdct_round_shift(temp);
step[27] = output[27];
step[28] = output[28];
temp = output[29] * cospi_28_64 + output[18] * -cospi_4_64;
step[29] = (tran_low_t)fdct_round_shift(temp);
temp = output[30] * cospi_4_64 + output[17] * cospi_28_64;
step[30] = (tran_low_t)fdct_round_shift(temp);
step[31] = output[31];
range_check(step, 32, 18);
// stage 7
output[0] = step[0];
output[1] = step[1];
output[2] = step[2];
output[3] = step[3];
output[4] = step[4];
output[5] = step[5];
output[6] = step[6];
output[7] = step[7];
temp = step[8] * cospi_30_64 + step[15] * cospi_2_64;
output[8] = (tran_low_t)fdct_round_shift(temp);
temp = step[9] * cospi_14_64 + step[14] * cospi_18_64;
output[9] = (tran_low_t)fdct_round_shift(temp);
temp = step[10] * cospi_22_64 + step[13] * cospi_10_64;
output[10] = (tran_low_t)fdct_round_shift(temp);
temp = step[11] * cospi_6_64 + step[12] * cospi_26_64;
output[11] = (tran_low_t)fdct_round_shift(temp);
temp = step[12] * cospi_6_64 + step[11] * -cospi_26_64;
output[12] = (tran_low_t)fdct_round_shift(temp);
temp = step[13] * cospi_22_64 + step[10] * -cospi_10_64;
output[13] = (tran_low_t)fdct_round_shift(temp);
temp = step[14] * cospi_14_64 + step[9] * -cospi_18_64;
output[14] = (tran_low_t)fdct_round_shift(temp);
temp = step[15] * cospi_30_64 + step[8] * -cospi_2_64;
output[15] = (tran_low_t)fdct_round_shift(temp);
output[16] = step[16] + step[17];
output[17] = step[16] - step[17];
output[18] = step[19] - step[18];
output[19] = step[19] + step[18];
output[20] = step[20] + step[21];
output[21] = step[20] - step[21];
output[22] = step[23] - step[22];
output[23] = step[23] + step[22];
output[24] = step[24] + step[25];
output[25] = step[24] - step[25];
output[26] = step[27] - step[26];
output[27] = step[27] + step[26];
output[28] = step[28] + step[29];
output[29] = step[28] - step[29];
output[30] = step[31] - step[30];
output[31] = step[31] + step[30];
range_check(output, 32, 18);
// stage 8
step[0] = output[0];
step[1] = output[1];
step[2] = output[2];
step[3] = output[3];
step[4] = output[4];
step[5] = output[5];
step[6] = output[6];
step[7] = output[7];
step[8] = output[8];
step[9] = output[9];
step[10] = output[10];
step[11] = output[11];
step[12] = output[12];
step[13] = output[13];
step[14] = output[14];
step[15] = output[15];
temp = output[16] * cospi_31_64 + output[31] * cospi_1_64;
step[16] = (tran_low_t)fdct_round_shift(temp);
temp = output[17] * cospi_15_64 + output[30] * cospi_17_64;
step[17] = (tran_low_t)fdct_round_shift(temp);
temp = output[18] * cospi_23_64 + output[29] * cospi_9_64;
step[18] = (tran_low_t)fdct_round_shift(temp);
temp = output[19] * cospi_7_64 + output[28] * cospi_25_64;
step[19] = (tran_low_t)fdct_round_shift(temp);
temp = output[20] * cospi_27_64 + output[27] * cospi_5_64;
step[20] = (tran_low_t)fdct_round_shift(temp);
temp = output[21] * cospi_11_64 + output[26] * cospi_21_64;
step[21] = (tran_low_t)fdct_round_shift(temp);
temp = output[22] * cospi_19_64 + output[25] * cospi_13_64;
step[22] = (tran_low_t)fdct_round_shift(temp);
temp = output[23] * cospi_3_64 + output[24] * cospi_29_64;
step[23] = (tran_low_t)fdct_round_shift(temp);
temp = output[24] * cospi_3_64 + output[23] * -cospi_29_64;
step[24] = (tran_low_t)fdct_round_shift(temp);
temp = output[25] * cospi_19_64 + output[22] * -cospi_13_64;
step[25] = (tran_low_t)fdct_round_shift(temp);
temp = output[26] * cospi_11_64 + output[21] * -cospi_21_64;
step[26] = (tran_low_t)fdct_round_shift(temp);
temp = output[27] * cospi_27_64 + output[20] * -cospi_5_64;
step[27] = (tran_low_t)fdct_round_shift(temp);
temp = output[28] * cospi_7_64 + output[19] * -cospi_25_64;
step[28] = (tran_low_t)fdct_round_shift(temp);
temp = output[29] * cospi_23_64 + output[18] * -cospi_9_64;
step[29] = (tran_low_t)fdct_round_shift(temp);
temp = output[30] * cospi_15_64 + output[17] * -cospi_17_64;
step[30] = (tran_low_t)fdct_round_shift(temp);
temp = output[31] * cospi_31_64 + output[16] * -cospi_1_64;
step[31] = (tran_low_t)fdct_round_shift(temp);
range_check(step, 32, 18);
// stage 9
output[0] = step[0];
output[1] = step[16];
output[2] = step[8];
output[3] = step[24];
output[4] = step[4];
output[5] = step[20];
output[6] = step[12];
output[7] = step[28];
output[8] = step[2];
output[9] = step[18];
output[10] = step[10];
output[11] = step[26];
output[12] = step[6];
output[13] = step[22];
output[14] = step[14];
output[15] = step[30];
output[16] = step[1];
output[17] = step[17];
output[18] = step[9];
output[19] = step[25];
output[20] = step[5];
output[21] = step[21];
output[22] = step[13];
output[23] = step[29];
output[24] = step[3];
output[25] = step[19];
output[26] = step[11];
output[27] = step[27];
output[28] = step[7];
output[29] = step[23];
output[30] = step[15];
output[31] = step[31];
range_check(output, 32, 18);
}
static void fadst4(const tran_low_t *input, tran_low_t *output) {
@ -727,19 +1215,19 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
output[4 * 8] = (tran_low_t)fdct_round_shift(t1);
output[6 * 8] = (tran_low_t)fdct_round_shift(t3);
// Stage 2
// stage 2
t0 = (s6 - s5) * cospi_16_64;
t1 = (s6 + s5) * cospi_16_64;
t2 = fdct_round_shift(t0);
t3 = fdct_round_shift(t1);
// Stage 3
// stage 3
x0 = s4 + t2;
x1 = s4 - t2;
x2 = s7 - t3;
x3 = s7 + t3;
// Stage 4
// stage 4
t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;

View File

@ -11,6 +11,7 @@
#include <assert.h>
#include <limits.h>
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_scale/yv12config.h"
#include "vpx/vpx_integer.h"
#include "vp10/common/reconinter.h"
@ -124,10 +125,10 @@ int vp10_denoiser_filter_c(const uint8_t *sig, int sig_stride,
adj = adj_val[2];
}
if (diff > 0) {
avg[c] = MIN(UINT8_MAX, sig[c] + adj);
avg[c] = VPXMIN(UINT8_MAX, sig[c] + adj);
total_adj += adj;
} else {
avg[c] = MAX(0, sig[c] - adj);
avg[c] = VPXMAX(0, sig[c] - adj);
total_adj -= adj;
}
}
@ -164,13 +165,13 @@ int vp10_denoiser_filter_c(const uint8_t *sig, int sig_stride,
// Diff positive means we made positive adjustment above
// (in first try/attempt), so now make negative adjustment to bring
// denoised signal down.
avg[c] = MAX(0, avg[c] - adj);
avg[c] = VPXMAX(0, avg[c] - adj);
total_adj -= adj;
} else {
// Diff negative means we made negative adjustment above
// (in first try/attempt), so now make positive adjustment to bring
// denoised signal up.
avg[c] = MIN(UINT8_MAX, avg[c] + adj);
avg[c] = VPXMIN(UINT8_MAX, avg[c] + adj);
total_adj += adj;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1380,7 +1380,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
if (p->eobs[block])
*(args->skip) = 0;
if (x->skip_encode || p->eobs[block] == 0)
if (p->eobs[block] == 0)
return;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@ -1528,8 +1528,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
vp10_predict_intra_block(xd, bwl, tx_size, mode, x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
vp10_predict_intra_block(xd, bwl, tx_size, mode, dst, dst_stride,
dst, dst_stride, i, j, plane);
#if CONFIG_VP9_HIGHBITDEPTH
@ -1546,7 +1545,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
}
if (!x->skip_encode && *eob)
if (*eob)
vp10_highbd_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, xd->bd,
tx_type);
break;
@ -1560,7 +1559,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
}
if (!x->skip_encode && *eob)
if (*eob)
vp10_highbd_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, xd->bd,
tx_type);
break;
@ -1574,7 +1573,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
}
if (!x->skip_encode && *eob)
if (*eob)
vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, xd->bd,
tx_type);
break;
@ -1590,7 +1589,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
scan_order->scan, scan_order->iscan);
}
if (!x->skip_encode && *eob)
if (*eob)
// this is like vp10_short_idct4x4 but has a special case around
// eob<=1 which is significant (not just an optimization) for the
// lossless case.
@ -1619,7 +1618,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
}
if (!x->skip_encode && *eob)
if (*eob)
vp10_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, tx_type);
break;
case TX_16X16:
@ -1632,7 +1631,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
}
if (!x->skip_encode && *eob)
if (*eob)
vp10_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, tx_type);
break;
case TX_8X8:
@ -1645,7 +1644,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
}
if (!x->skip_encode && *eob)
if (*eob)
vp10_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, tx_type);
break;
case TX_4X4:
@ -1659,7 +1658,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
scan_order->iscan);
}
if (!x->skip_encode && *eob) {
if (*eob) {
// this is like vp10_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.

View File

@ -16,6 +16,8 @@
#include "vp10/encoder/cost.h"
#include "vp10/encoder/encodemv.h"
#include "vpx_dsp/vpx_dsp_common.h"
static struct vp10_token mv_joint_encodings[MV_JOINTS];
static struct vp10_token mv_class_encodings[MV_CLASSES];
static struct vp10_token mv_fp_encodings[MV_FP_SIZE];
@ -216,8 +218,8 @@ void vp10_encode_mv(VP10_COMP* cpi, vpx_writer* w,
// If auto_mv_step_size is enabled then keep track of the largest
// motion vector component used.
if (cpi->sf.mv.auto_mv_step_size) {
unsigned int maxv = MAX(abs(mv->row), abs(mv->col)) >> 3;
cpi->max_mv_magnitude = MAX(maxv, cpi->max_mv_magnitude);
unsigned int maxv = VPXMAX(abs(mv->row), abs(mv->col)) >> 3;
cpi->max_mv_magnitude = VPXMAX(maxv, cpi->max_mv_magnitude);
}
}
@ -237,7 +239,7 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
const MV diff = {mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col};
vp10_inc_mv(&diff, counts);
vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
}
}

View File

@ -42,7 +42,6 @@
#include "vp10/encoder/segmentation.h"
#include "vp10/encoder/skin_detection.h"
#include "vp10/encoder/speed_features.h"
#include "vp10/encoder/svc_layercontext.h"
#include "vp10/encoder/temporal_filter.h"
#include "./vp10_rtcd.h"
@ -52,6 +51,7 @@
#if CONFIG_INTERNAL_STATS
#include "vpx_dsp/ssim.h"
#endif
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/vpx_filter.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/system_state.h"
@ -238,13 +238,11 @@ static void setup_frame(VP10_COMP *cpi) {
if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
vp10_setup_past_independence(cm);
} else {
if (!cpi->use_svc)
cm->frame_context_idx = cpi->refresh_alt_ref_frame;
cm->frame_context_idx = cpi->refresh_alt_ref_frame;
}
if (cm->frame_type == KEY_FRAME) {
if (!is_two_pass_svc(cpi))
cpi->refresh_golden_frame = 1;
cpi->refresh_golden_frame = 1;
cpi->refresh_alt_ref_frame = 1;
vp10_zero(cpi->interp_filter_selected);
} else {
@ -337,7 +335,6 @@ void vp10_initialize_enc(void) {
static void dealloc_compressor_data(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
int i;
vpx_free(cpi->mbmi_ext_base);
cpi->mbmi_ext_base = NULL;
@ -394,26 +391,10 @@ static void dealloc_compressor_data(VP10_COMP *cpi) {
vp10_free_pc_tree(&cpi->td);
for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
vpx_free(lc->rc_twopass_stats_in.buf);
lc->rc_twopass_stats_in.buf = NULL;
lc->rc_twopass_stats_in.sz = 0;
}
if (cpi->source_diff_var != NULL) {
vpx_free(cpi->source_diff_var);
cpi->source_diff_var = NULL;
}
for (i = 0; i < MAX_LAG_BUFFERS; ++i) {
vpx_free_frame_buffer(&cpi->svc.scaled_frames[i]);
}
memset(&cpi->svc.scaled_frames[0], 0,
MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
vpx_free_frame_buffer(&cpi->svc.empty_frame.img);
memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
}
static void save_coding_context(VP10_COMP *cpi) {
@ -718,16 +699,9 @@ static void set_tile_limits(VP10_COMP *cpi) {
int min_log2_tile_cols, max_log2_tile_cols;
vp10_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
if (is_two_pass_svc(cpi) &&
(cpi->svc.encode_empty_frame_state == ENCODING ||
cpi->svc.number_spatial_layers > 1)) {
cm->log2_tile_cols = 0;
cm->log2_tile_rows = 0;
} else {
cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns,
min_log2_tile_cols, max_log2_tile_cols);
cm->log2_tile_rows = cpi->oxcf.tile_rows;
}
cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns,
min_log2_tile_cols, max_log2_tile_cols);
cm->log2_tile_rows = cpi->oxcf.tile_rows;
}
static void update_frame_size(VP10_COMP *cpi) {
@ -742,19 +716,6 @@ static void update_frame_size(VP10_COMP *cpi) {
cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
set_tile_limits(cpi);
if (is_two_pass_svc(cpi)) {
if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth,
#endif
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
NULL, NULL, NULL))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to reallocate alt_ref_buffer");
}
}
static void init_buffer_indices(VP10_COMP *cpi) {
@ -775,28 +736,15 @@ static void init_config(struct VP10_COMP *cpi, VP10EncoderConfig *oxcf) {
cm->use_highbitdepth = oxcf->use_highbitdepth;
#endif
cm->color_space = oxcf->color_space;
cm->color_range = oxcf->color_range;
cm->width = oxcf->width;
cm->height = oxcf->height;
vp10_alloc_compressor_data(cpi);
cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
// Single thread case: use counts in common.
cpi->td.counts = &cm->counts;
// Spatial scalability.
cpi->svc.number_spatial_layers = oxcf->ss_number_layers;
// Temporal scalability.
cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
((cpi->svc.number_temporal_layers > 1 ||
cpi->svc.number_spatial_layers > 1) &&
cpi->oxcf.pass != 1)) {
vp10_init_layer_context(cpi);
}
// change includes all joint functionality
vp10_change_config(cpi, oxcf);
@ -1460,6 +1408,7 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) {
cm->profile = oxcf->profile;
cm->bit_depth = oxcf->bit_depth;
cm->color_space = oxcf->color_space;
cm->color_range = oxcf->color_range;
if (cm->profile <= PROFILE_1)
assert(cm->bit_depth == VPX_BITS_8);
@ -1475,8 +1424,11 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) {
cpi->refresh_golden_frame = 0;
cpi->refresh_last_frame = 1;
cm->refresh_frame_context = 1;
cm->reset_frame_context = 0;
cm->refresh_frame_context =
oxcf->error_resilient_mode ? REFRESH_FRAME_CONTEXT_OFF :
oxcf->frame_parallel_decoding_mode ? REFRESH_FRAME_CONTEXT_FORWARD
: REFRESH_FRAME_CONTEXT_BACKWARD;
cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE;
vp10_reset_segment_features(&cm->seg);
vp10_set_high_precision_mv(cpi, 0);
@ -1493,8 +1445,8 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) {
// Under a configuration change, where maximum_buffer_size may change,
// keep buffer level clipped to the maximum allowed buffer size.
rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size);
rc->buffer_level = MIN(rc->buffer_level, rc->maximum_buffer_size);
rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size);
// Set up frame rate and related parameters rate control values.
vp10_new_framerate(cpi, cpi->framerate);
@ -1520,15 +1472,6 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) {
}
update_frame_size(cpi);
if ((cpi->svc.number_temporal_layers > 1 &&
cpi->oxcf.rc_mode == VPX_CBR) ||
((cpi->svc.number_temporal_layers > 1 ||
cpi->svc.number_spatial_layers > 1) &&
cpi->oxcf.pass != 1)) {
vp10_update_layer_context_change_config(cpi,
(int)cpi->oxcf.target_bandwidth);
}
cpi->alt_ref_source = NULL;
rc->is_src_frame_alt_ref = 0;
@ -1619,7 +1562,6 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
(FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS,
sizeof(*cm->frame_contexts)));
cpi->use_svc = 0;
cpi->resize_state = 0;
cpi->resize_avg_qp = 0;
cpi->resize_buffer_underflow = 0;
@ -1758,63 +1700,24 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
const size_t packet_sz = sizeof(FIRSTPASS_STATS);
const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
if (cpi->svc.number_spatial_layers > 1
|| cpi->svc.number_temporal_layers > 1) {
FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf;
FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = {0};
int i;
for (i = 0; i < oxcf->ss_number_layers; ++i) {
FIRSTPASS_STATS *const last_packet_for_layer =
&stats[packets - oxcf->ss_number_layers + i];
const int layer_id = (int)last_packet_for_layer->spatial_layer_id;
const int packets_in_layer = (int)last_packet_for_layer->count + 1;
if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) {
LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id];
vpx_free(lc->rc_twopass_stats_in.buf);
lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz;
CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf,
vpx_malloc(lc->rc_twopass_stats_in.sz));
lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf;
lc->twopass.stats_in = lc->twopass.stats_in_start;
lc->twopass.stats_in_end = lc->twopass.stats_in_start
+ packets_in_layer - 1;
stats_copy[layer_id] = lc->rc_twopass_stats_in.buf;
}
}
for (i = 0; i < packets; ++i) {
const int layer_id = (int)stats[i].spatial_layer_id;
if (layer_id >= 0 && layer_id < oxcf->ss_number_layers
&& stats_copy[layer_id] != NULL) {
*stats_copy[layer_id] = stats[i];
++stats_copy[layer_id];
}
}
vp10_init_second_pass_spatial_svc(cpi);
} else {
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
const size_t psz = cpi->common.MBs * sizeof(uint8_t);
const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
if (cpi->use_fp_mb_stats) {
const size_t psz = cpi->common.MBs * sizeof(uint8_t);
const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
cpi->twopass.firstpass_mb_stats.mb_stats_start =
oxcf->firstpass_mb_stats_in.buf;
cpi->twopass.firstpass_mb_stats.mb_stats_end =
cpi->twopass.firstpass_mb_stats.mb_stats_start +
(ps - 1) * cpi->common.MBs * sizeof(uint8_t);
}
cpi->twopass.firstpass_mb_stats.mb_stats_start =
oxcf->firstpass_mb_stats_in.buf;
cpi->twopass.firstpass_mb_stats.mb_stats_end =
cpi->twopass.firstpass_mb_stats.mb_stats_start +
(ps - 1) * cpi->common.MBs * sizeof(uint8_t);
}
#endif
cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
cpi->twopass.stats_in = cpi->twopass.stats_in_start;
cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
cpi->twopass.stats_in = cpi->twopass.stats_in_start;
cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
vp10_init_second_pass(cpi);
}
vp10_init_second_pass(cpi);
}
vp10_set_speed_features_framesize_independent(cpi);
@ -2248,42 +2151,6 @@ typedef struct {
uint32_t samples[4]; // total/y/u/v
} PSNR_STATS;
static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
PSNR_STATS *psnr) {
static const double peak = 255.0;
const int widths[3] = {
a->y_crop_width, a->uv_crop_width, a->uv_crop_width};
const int heights[3] = {
a->y_crop_height, a->uv_crop_height, a->uv_crop_height};
const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer};
const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride};
const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer};
const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride};
int i;
uint64_t total_sse = 0;
uint32_t total_samples = 0;
for (i = 0; i < 3; ++i) {
const int w = widths[i];
const int h = heights[i];
const uint32_t samples = w * h;
const uint64_t sse = get_sse(a_planes[i], a_strides[i],
b_planes[i], b_strides[i],
w, h);
psnr->sse[1 + i] = sse;
psnr->samples[1 + i] = samples;
psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);
total_sse += sse;
total_samples += samples;
}
psnr->sse[0] = total_sse;
psnr->samples[0] = total_samples;
psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
(double)total_sse);
}
#if CONFIG_VP9_HIGHBITDEPTH
static void calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
const YV12_BUFFER_CONFIG *b,
@ -2336,6 +2203,44 @@ static void calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
(double)total_sse);
}
#else // !CONFIG_VP9_HIGHBITDEPTH
static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
PSNR_STATS *psnr) {
static const double peak = 255.0;
const int widths[3] = {
a->y_crop_width, a->uv_crop_width, a->uv_crop_width};
const int heights[3] = {
a->y_crop_height, a->uv_crop_height, a->uv_crop_height};
const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer};
const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride};
const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer};
const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride};
int i;
uint64_t total_sse = 0;
uint32_t total_samples = 0;
for (i = 0; i < 3; ++i) {
const int w = widths[i];
const int h = heights[i];
const uint32_t samples = w * h;
const uint64_t sse = get_sse(a_planes[i], a_strides[i],
b_planes[i], b_strides[i],
w, h);
psnr->sse[1 + i] = sse;
psnr->samples[1 + i] = samples;
psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);
total_sse += sse;
total_samples += samples;
}
psnr->sse[0] = total_sse;
psnr->samples[0] = total_samples;
psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
(double)total_sse);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
static void generate_psnr_packet(VP10_COMP *cpi) {
@ -2355,11 +2260,7 @@ static void generate_psnr_packet(VP10_COMP *cpi) {
pkt.data.psnr.psnr[i] = psnr.psnr[i];
}
pkt.kind = VPX_CODEC_PSNR_PKT;
if (cpi->use_svc)
cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers].psnr_pkt = pkt.data.psnr;
else
vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
}
int vp10_use_as_reference(VP10_COMP *cpi, int ref_frame_flags) {
@ -2616,7 +2517,7 @@ static int scale_down(VP10_COMP *cpi, int q) {
if (rc->frame_size_selector == UNSCALED &&
q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) {
const int max_size_thresh = (int)(rate_thresh_mult[SCALE_STEP1]
* MAX(rc->this_frame_target, rc->avg_frame_bandwidth));
* VPXMAX(rc->this_frame_target, rc->avg_frame_bandwidth));
scale = rc->projected_frame_size > max_size_thresh ? 1 : 0;
}
return scale;
@ -2688,11 +2589,6 @@ void vp10_update_reference_frames(VP10_COMP *cpi) {
tmp = cpi->alt_fb_idx;
cpi->alt_fb_idx = cpi->gld_fb_idx;
cpi->gld_fb_idx = tmp;
if (is_two_pass_svc(cpi)) {
cpi->svc.layer_context[0].gold_ref_idx = cpi->gld_fb_idx;
cpi->svc.layer_context[0].alt_ref_idx = cpi->alt_fb_idx;
}
} else { /* For non key/golden frames */
if (cpi->refresh_alt_ref_frame) {
int arf_idx = cpi->alt_fb_idx;
@ -2864,7 +2760,7 @@ void vp10_scale_references(VP10_COMP *cpi) {
++buf->ref_count;
}
} else {
if (cpi->oxcf.pass != 0 || cpi->use_svc)
if (cpi->oxcf.pass != 0)
cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
}
}
@ -2873,7 +2769,7 @@ void vp10_scale_references(VP10_COMP *cpi) {
static void release_scaled_references(VP10_COMP *cpi) {
VP10_COMMON *cm = &cpi->common;
int i;
if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
if (cpi->oxcf.pass == 0) {
// Only release scaled references under certain conditions:
// if reference will be updated, or if scaled reference has same resolution.
int refresh[3];
@ -2999,7 +2895,7 @@ static void output_frame_level_debug_stats(VP10_COMP *cpi) {
static void set_mv_search_params(VP10_COMP *cpi) {
const VP10_COMMON *const cm = &cpi->common;
const unsigned int max_mv_def = MIN(cm->width, cm->height);
const unsigned int max_mv_def = VPXMIN(cm->width, cm->height);
// Default based on max resolution.
cpi->mv_step_param = vp10_init_search_range(max_mv_def);
@ -3014,8 +2910,8 @@ static void set_mv_search_params(VP10_COMP *cpi) {
// Allow mv_steps to correspond to twice the max mv magnitude found
// in the previous frame, capped by the default max_mv_magnitude based
// on resolution.
cpi->mv_step_param =
vp10_init_search_range(MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
cpi->mv_step_param = vp10_init_search_range(
VPXMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
}
cpi->max_mv_magnitude = 0;
}
@ -3107,7 +3003,6 @@ static void set_frame_size(VP10_COMP *cpi) {
if (oxcf->pass == 0 &&
oxcf->rc_mode == VPX_CBR &&
!cpi->use_svc &&
oxcf->resize_mode == RESIZE_DYNAMIC) {
if (cpi->resize_pending == 1) {
oxcf->scaled_frame_width =
@ -3130,10 +3025,7 @@ static void set_frame_size(VP10_COMP *cpi) {
}
}
if ((oxcf->pass == 2) &&
(!cpi->use_svc ||
(is_two_pass_svc(cpi) &&
cpi->svc.encode_empty_frame_state != ENCODING))) {
if (oxcf->pass == 2) {
vp10_set_target_rate(cpi);
}
@ -3240,10 +3132,9 @@ static void encode_without_recode_loop(VP10_COMP *cpi) {
vp10_encode_frame(cpi);
// Update some stats from cyclic refresh, and check if we should not update
// golden reference, for non-SVC 1 pass CBR.
// golden reference, for 1 pass CBR.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
cm->frame_type != KEY_FRAME &&
!cpi->use_svc &&
(cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR))
vp10_cyclic_refresh_check_golden_update(cpi);
@ -3342,8 +3233,7 @@ static void encode_with_recode_loop(VP10_COMP *cpi,
// to recode.
if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
save_coding_context(cpi);
if (!cpi->sf.use_nonrd_pick_mode)
vp10_pack_bitstream(cpi, dest, size);
vp10_pack_bitstream(cpi, dest, size);
rc->projected_frame_size = (int)(*size) << 3;
restore_coding_context(cpi);
@ -3388,7 +3278,7 @@ static void encode_with_recode_loop(VP10_COMP *cpi,
// Adjust Q
q = (int)((q * high_err_target) / kf_err);
q = MIN(q, (q_high + q_low) >> 1);
q = VPXMIN(q, (q_high + q_low) >> 1);
} else if (kf_err < low_err_target &&
rc->projected_frame_size >= frame_under_shoot_limit) {
// The key frame is much better than the previous frame
@ -3397,7 +3287,7 @@ static void encode_with_recode_loop(VP10_COMP *cpi,
// Adjust Q
q = (int)((q * low_err_target) / kf_err);
q = MIN(q, (q_high + q_low + 1) >> 1);
q = VPXMIN(q, (q_high + q_low + 1) >> 1);
}
// Clamp Q to upper and lower limits:
@ -3406,7 +3296,7 @@ static void encode_with_recode_loop(VP10_COMP *cpi,
loop = q != last_q;
} else if (recode_loop_test(
cpi, frame_over_shoot_limit, frame_under_shoot_limit,
q, MAX(q_high, top_index), bottom_index)) {
q, VPXMAX(q_high, top_index), bottom_index)) {
// Is the projected frame size out of range and are we allowed
// to attempt to recode.
int last_q = q;
@ -3448,12 +3338,12 @@ static void encode_with_recode_loop(VP10_COMP *cpi,
vp10_rc_update_rate_correction_factors(cpi);
q = vp10_rc_regulate_q(cpi, rc->this_frame_target,
bottom_index, MAX(q_high, top_index));
bottom_index, VPXMAX(q_high, top_index));
while (q < q_low && retries < 10) {
vp10_rc_update_rate_correction_factors(cpi);
q = vp10_rc_regulate_q(cpi, rc->this_frame_target,
bottom_index, MAX(q_high, top_index));
bottom_index, VPXMAX(q_high, top_index));
retries++;
}
}
@ -3525,9 +3415,7 @@ static int get_ref_frame_flags(const VP10_COMP *cpi) {
if (gold_is_last)
flags &= ~VP9_GOLD_FLAG;
if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
(cpi->svc.number_temporal_layers == 1 &&
cpi->svc.number_spatial_layers == 1))
if (cpi->rc.frames_till_gf_update_due == INT_MAX)
flags &= ~VP9_GOLD_FLAG;
if (alt_is_last)
@ -3667,54 +3555,14 @@ static void encode_frame_to_data_rate(VP10_COMP *cpi,
cpi->rc.source_alt_ref_active = 0;
cm->error_resilient_mode = oxcf->error_resilient_mode;
cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
// By default, encoder assumes decoder can use prev_mi.
if (cm->error_resilient_mode) {
cm->frame_parallel_decoding_mode = 1;
cm->reset_frame_context = 0;
cm->refresh_frame_context = 0;
cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE;
cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_OFF;
} else if (cm->intra_only) {
// Only reset the current context.
cm->reset_frame_context = 2;
}
}
if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) {
// Use context 0 for intra only empty frame, but the last frame context
// for other empty frames.
if (cpi->svc.encode_empty_frame_state == ENCODING) {
if (cpi->svc.encode_intra_empty_frame != 0)
cm->frame_context_idx = 0;
else
cm->frame_context_idx = FRAME_CONTEXTS - 1;
} else {
cm->frame_context_idx =
cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id;
}
cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
// The probs will be updated based on the frame type of its previous
// frame if frame_parallel_decoding_mode is 0. The type may vary for
// the frame after a key frame in base layer since we may drop enhancement
// layers. So set frame_parallel_decoding_mode to 1 in this case.
if (cm->frame_parallel_decoding_mode == 0) {
if (cpi->svc.number_temporal_layers == 1) {
if (cpi->svc.spatial_layer_id == 0 &&
cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)
cm->frame_parallel_decoding_mode = 1;
} else if (cpi->svc.spatial_layer_id == 0) {
// Find the 2nd frame in temporal base layer and 1st frame in temporal
// enhancement layers from the key frame.
int i;
for (i = 0; i < cpi->svc.number_temporal_layers; ++i) {
if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) {
cm->frame_parallel_decoding_mode = 1;
break;
}
}
}
cm->reset_frame_context = RESET_FRAME_CONTEXT_CURRENT;
}
}
@ -3778,6 +3626,8 @@ static void encode_frame_to_data_rate(VP10_COMP *cpi,
cpi->refresh_last_frame = 1;
cm->frame_to_show = get_frame_new_buffer(cm);
cm->frame_to_show->color_space = cm->color_space;
cm->frame_to_show->color_range = cm->color_range;
// Pick the loop filter level for the frame.
loopfilter_frame(cpi, cm);
@ -3797,11 +3647,11 @@ static void encode_frame_to_data_rate(VP10_COMP *cpi,
full_to_model_counts(cpi->td.counts->coef[t],
cpi->td.rd_counts.coef_counts[t]);
if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode)
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD)
vp10_adapt_coef_probs(cm);
if (!frame_is_intra_only(cm)) {
if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
vp10_adapt_mode_probs(cm);
vp10_adapt_mv_probs(cm, cm->allow_high_precision_mv);
}
@ -3821,8 +3671,7 @@ static void encode_frame_to_data_rate(VP10_COMP *cpi,
cm->last_frame_type = cm->frame_type;
if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
vp10_rc_postencode_update(cpi, *size);
vp10_rc_postencode_update(cpi, *size);
#if 0
output_frame_level_debug_stats(cpi);
@ -3854,22 +3703,8 @@ static void encode_frame_to_data_rate(VP10_COMP *cpi,
// Don't increment frame counters if this was an altref buffer
// update not a real frame
++cm->current_video_frame;
if (cpi->use_svc)
vp10_inc_frame_in_layer(cpi);
}
cm->prev_frame = cm->cur_frame;
if (cpi->use_svc)
cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id].last_frame_type =
cm->frame_type;
}
static void SvcEncode(VP10_COMP *cpi, size_t *size, uint8_t *dest,
unsigned int *frame_flags) {
vp10_rc_get_svc_params(cpi);
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
}
static void Pass0Encode(VP10_COMP *cpi, size_t *size, uint8_t *dest,
@ -3887,8 +3722,7 @@ static void Pass2Encode(VP10_COMP *cpi, size_t *size,
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
vp10_twopass_postencode_update(cpi);
vp10_twopass_postencode_update(cpi);
}
static void init_ref_frame_bufs(VP10_COMMON *cm) {
@ -4000,7 +3834,7 @@ static int frame_is_reference(const VP10_COMP *cpi) {
cpi->refresh_last_frame ||
cpi->refresh_golden_frame ||
cpi->refresh_alt_ref_frame ||
cm->refresh_frame_context ||
cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF ||
cm->lf.mode_ref_delta_update ||
cm->seg.update_map ||
cm->seg.update_data;
@ -4032,8 +3866,8 @@ static void adjust_frame_rate(VP10_COMP *cpi,
// Average this frame's rate into the last second's average
// frame rate. If we haven't seen 1 second yet, then average
// over the whole interval seen.
const double interval = MIN((double)(source->ts_end
- cpi->first_time_stamp_ever), 10000000.0);
const double interval = VPXMIN(
(double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
double avg_duration = 10000000.0 / cpi->framerate;
avg_duration *= (interval - avg_duration + this_duration);
avg_duration /= interval;
@ -4097,7 +3931,7 @@ static void adjust_image_stat(double y, double u, double v, double all,
s->stat[U] += u;
s->stat[V] += v;
s->stat[ALL] += all;
s->worst = MIN(s->worst, all);
s->worst = VPXMIN(s->worst, all);
}
#endif // CONFIG_INTERNAL_STATS
@ -4115,68 +3949,37 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
int arf_src_index;
int i;
if (is_two_pass_svc(cpi)) {
#if CONFIG_SPATIAL_SVC
vp10_svc_start_frame(cpi);
// Use a small empty frame instead of a real frame
if (cpi->svc.encode_empty_frame_state == ENCODING)
source = &cpi->svc.empty_frame;
#endif
if (oxcf->pass == 2)
vp10_restore_layer_context(cpi);
} else if (is_one_pass_cbr_svc(cpi)) {
vp10_one_pass_cbr_svc_start_layer(cpi);
}
vpx_usec_timer_start(&cmptimer);
vp10_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
// Is multi-arf enabled.
// Note that at the moment multi_arf is only configured for 2 pass VBR and
// will not work properly with svc.
if ((oxcf->pass == 2) && !cpi->use_svc &&
(cpi->oxcf.enable_auto_arf > 1))
// Note that at the moment multi_arf is only configured for 2 pass VBR
if ((oxcf->pass == 2) && (cpi->oxcf.enable_auto_arf > 1))
cpi->multi_arf_allowed = 1;
else
cpi->multi_arf_allowed = 0;
// Normal defaults
cm->reset_frame_context = 0;
cm->refresh_frame_context = 1;
if (!is_one_pass_cbr_svc(cpi)) {
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_alt_ref_frame = 0;
}
cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE;
cm->refresh_frame_context =
oxcf->error_resilient_mode ? REFRESH_FRAME_CONTEXT_OFF :
oxcf->frame_parallel_decoding_mode ? REFRESH_FRAME_CONTEXT_FORWARD
: REFRESH_FRAME_CONTEXT_BACKWARD;
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_alt_ref_frame = 0;
// Should we encode an arf frame.
arf_src_index = get_arf_src_index(cpi);
// Skip alt frame if we encode the empty frame
if (is_two_pass_svc(cpi) && source != NULL)
arf_src_index = 0;
if (arf_src_index) {
assert(arf_src_index <= rc->frames_to_key);
if ((source = vp10_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
cpi->alt_ref_source = source;
#if CONFIG_SPATIAL_SVC
if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0) {
int i;
// Reference a hidden frame from a lower layer
for (i = cpi->svc.spatial_layer_id - 1; i >= 0; --i) {
if (oxcf->ss_enable_auto_arf[i]) {
cpi->gld_fb_idx = cpi->svc.layer_context[i].alt_ref_idx;
break;
}
}
}
cpi->svc.layer_context[cpi->svc.spatial_layer_id].has_alt_frame = 1;
#endif
if (oxcf->arnr_max_frames > 0) {
// Produce the filtered ARF frame.
vp10_temporal_filter(cpi, arf_src_index);
@ -4204,21 +4007,11 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
}
// Read in the source frame.
if (cpi->use_svc)
source = vp10_svc_lookahead_pop(cpi, cpi->lookahead, flush);
else
source = vp10_lookahead_pop(cpi->lookahead, flush);
source = vp10_lookahead_pop(cpi->lookahead, flush);
if (source != NULL) {
cm->show_frame = 1;
cm->intra_only = 0;
// if the flags indicate intra frame, but if the current picture is for
// non-zero spatial layer, it should not be an intra picture.
// TODO(Won Kap): this needs to change if per-layer intra frame is
// allowed.
if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->svc.spatial_layer_id) {
source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
}
// Check to see if the frame should be encoded as an arf overlay.
check_src_altref(cpi, source);
@ -4257,11 +4050,6 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
adjust_frame_rate(cpi, source);
}
if (is_one_pass_cbr_svc(cpi)) {
vp10_update_temporal_layer_framerate(cpi);
vp10_restore_layer_context(cpi);
}
// Find a free buffer for the new frame, releasing the reference previously
// held.
if (cm->new_fb_idx != INVALID_IDX) {
@ -4274,7 +4062,7 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
if (!cpi->use_svc && cpi->multi_arf_allowed) {
if (cpi->multi_arf_allowed) {
if (cm->frame_type == KEY_FRAME) {
init_buffer_indices(cpi);
} else if (oxcf->pass == 2) {
@ -4288,24 +4076,18 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
cpi->frame_flags = *frame_flags;
if ((oxcf->pass == 2) &&
(!cpi->use_svc ||
(is_two_pass_svc(cpi) &&
cpi->svc.encode_empty_frame_state != ENCODING))) {
if (oxcf->pass == 2) {
vp10_rc_get_second_pass_params(cpi);
} else if (oxcf->pass == 1) {
set_frame_size(cpi);
}
if (cpi->oxcf.pass != 0 ||
cpi->use_svc ||
frame_is_intra_only(cm) == 1) {
if (cpi->oxcf.pass != 0 || frame_is_intra_only(cm) == 1) {
for (i = 0; i < MAX_REF_FRAMES; ++i)
cpi->scaled_ref_idx[i] = INVALID_IDX;
}
if (oxcf->pass == 1 &&
(!cpi->use_svc || is_two_pass_svc(cpi))) {
if (oxcf->pass == 1) {
const int lossless = is_lossless_requested(oxcf);
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->oxcf.use_highbitdepth)
@ -4320,17 +4102,14 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
#endif // CONFIG_VP9_HIGHBITDEPTH
cpi->td.mb.itxm_add = lossless ? vp10_iwht4x4_add : vp10_idct4x4_add;
vp10_first_pass(cpi, source);
} else if (oxcf->pass == 2 &&
(!cpi->use_svc || is_two_pass_svc(cpi))) {
} else if (oxcf->pass == 2) {
Pass2Encode(cpi, size, dest, frame_flags);
} else if (cpi->use_svc) {
SvcEncode(cpi, size, dest, frame_flags);
} else {
// One pass encode
Pass0Encode(cpi, size, dest, frame_flags);
}
if (cm->refresh_frame_context)
if (cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_OFF)
cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
// No frame encoded, or frame was dropped, release scaled references.
@ -4342,14 +4121,6 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
cpi->droppable = !frame_is_reference(cpi);
}
// Save layer specific state.
if (is_one_pass_cbr_svc(cpi) ||
((cpi->svc.number_temporal_layers > 1 ||
cpi->svc.number_spatial_layers > 1) &&
oxcf->pass == 2)) {
vp10_save_layer_context(cpi);
}
vpx_usec_timer_mark(&cmptimer);
cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
@ -4427,7 +4198,7 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
#endif // CONFIG_VP9_HIGHBITDEPTH
cpi->worst_ssim= MIN(cpi->worst_ssim, frame_ssim2);
cpi->worst_ssim= VPXMIN(cpi->worst_ssim, frame_ssim2);
cpi->summed_quality += frame_ssim2 * weight;
cpi->summed_weights += weight;
@ -4464,7 +4235,8 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
cpi->Source->y_buffer, cpi->Source->y_stride,
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
cpi->Source->y_width, cpi->Source->y_height);
cpi->worst_blockiness = MAX(cpi->worst_blockiness, frame_blockiness);
cpi->worst_blockiness =
VPXMAX(cpi->worst_blockiness, frame_blockiness);
cpi->total_blockiness += frame_blockiness;
}
}
@ -4484,8 +4256,8 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
double consistency = vpx_sse_to_psnr(samples, peak,
(double)cpi->total_inconsistency);
if (consistency > 0.0)
cpi->worst_consistency = MIN(cpi->worst_consistency,
consistency);
cpi->worst_consistency =
VPXMIN(cpi->worst_consistency, consistency);
cpi->total_inconsistency += this_inconsistency;
}
}
@ -4527,27 +4299,6 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
}
#endif
if (is_two_pass_svc(cpi)) {
if (cpi->svc.encode_empty_frame_state == ENCODING) {
cpi->svc.encode_empty_frame_state = ENCODED;
cpi->svc.encode_intra_empty_frame = 0;
}
if (cm->show_frame) {
++cpi->svc.spatial_layer_to_encode;
if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
cpi->svc.spatial_layer_to_encode = 0;
// May need the empty frame after an visible frame.
cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE;
}
} else if (is_one_pass_cbr_svc(cpi)) {
if (cm->show_frame) {
++cpi->svc.spatial_layer_to_encode;
if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
cpi->svc.spatial_layer_to_encode = 0;
}
}
vpx_clear_system_state();
return 0;
}
@ -4640,11 +4391,6 @@ int vp10_set_size_literal(VP10_COMP *cpi, unsigned int width,
return 0;
}
void vp10_set_svc(VP10_COMP *cpi, int use_svc) {
cpi->use_svc = use_svc;
return;
}
int64_t vp10_get_y_sse(const YV12_BUFFER_CONFIG *a,
const YV12_BUFFER_CONFIG *b) {
assert(a->y_crop_width == b->y_crop_width);

View File

@ -33,7 +33,6 @@
#include "vp10/encoder/ratectrl.h"
#include "vp10/encoder/rd.h"
#include "vp10/encoder/speed_features.h"
#include "vp10/encoder/svc_layercontext.h"
#include "vp10/encoder/tokenize.h"
#if CONFIG_VP9_TEMPORAL_DENOISING
@ -116,7 +115,7 @@ typedef enum {
} AQ_MODE;
typedef enum {
RESIZE_NONE = 0, // No frame resizing allowed (except for SVC).
RESIZE_NONE = 0, // No frame resizing allowed.
RESIZE_FIXED = 1, // All frames are coded at the specified dimension.
RESIZE_DYNAMIC = 2 // Coded size of each frame is determined by the codec.
} RESIZE_TYPE;
@ -189,16 +188,6 @@ typedef struct VP10EncoderConfig {
// END DATARATE CONTROL OPTIONS
// ----------------------------------------------------------------
// Spatial and temporal scalability.
int ss_number_layers; // Number of spatial layers.
int ts_number_layers; // Number of temporal layers.
// Bitrate allocation for spatial layers.
int layer_target_bitrate[VPX_MAX_LAYERS];
int ss_target_bitrate[VPX_SS_MAX_LAYERS];
int ss_enable_auto_arf[VPX_SS_MAX_LAYERS];
// Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
int ts_rate_decimator[VPX_TS_MAX_LAYERS];
int enable_auto_arf;
int encode_breakout; // early breakout : for video conf recommend 800
@ -239,7 +228,7 @@ typedef struct VP10EncoderConfig {
int use_highbitdepth;
#endif
vpx_color_space_t color_space;
VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
int color_range;
} VP10EncoderConfig;
static INLINE int is_lossless_requested(const VP10EncoderConfig *cfg) {
@ -451,10 +440,6 @@ typedef struct VP10_COMP {
// number of MBs in the current frame when the frame is
// scaled.
int use_svc;
SVC svc;
// Store frame variance info in SOURCE_VAR_BASED_PARTITION search type.
diff *source_diff_var;
// The threshold used in SOURCE_VAR_BASED_PARTITION search type.
@ -549,8 +534,6 @@ int vp10_set_internal_size(VP10_COMP *cpi,
int vp10_set_size_literal(VP10_COMP *cpi, unsigned int width,
unsigned int height);
void vp10_set_svc(VP10_COMP *cpi, int use_svc);
int vp10_get_quantizer(struct VP10_COMP *cpi);
static INLINE int frame_is_kf_gf_arf(const VP10_COMP *cpi) {
@ -627,19 +610,9 @@ YV12_BUFFER_CONFIG *vp10_scale_if_required(VP10_COMMON *cm,
void vp10_apply_encoding_flags(VP10_COMP *cpi, vpx_enc_frame_flags_t flags);
static INLINE int is_two_pass_svc(const struct VP10_COMP *const cpi) {
return cpi->use_svc && cpi->oxcf.pass != 0;
}
static INLINE int is_one_pass_cbr_svc(const struct VP10_COMP *const cpi) {
return (cpi->use_svc && cpi->oxcf.pass == 0);
}
static INLINE int is_altref_enabled(const VP10_COMP *const cpi) {
return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 &&
(cpi->oxcf.enable_auto_arf &&
(!is_two_pass_svc(cpi) ||
cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id]));
cpi->oxcf.enable_auto_arf;
}
static INLINE void set_ref_ptrs(VP10_COMMON *cm, MACROBLOCKD *xd,

View File

@ -11,6 +11,7 @@
#include "vp10/encoder/encodeframe.h"
#include "vp10/encoder/encoder.h"
#include "vp10/encoder/ethread.h"
#include "vpx_dsp/vpx_dsp_common.h"
static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
int i, j, k, l, m, n;
@ -51,23 +52,11 @@ static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
return 0;
}
static int get_max_tile_cols(VP10_COMP *cpi) {
const int aligned_width = ALIGN_POWER_OF_TWO(cpi->oxcf.width, MI_SIZE_LOG2);
int mi_cols = aligned_width >> MI_SIZE_LOG2;
int min_log2_tile_cols, max_log2_tile_cols;
int log2_tile_cols;
vp10_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
log2_tile_cols = clamp(cpi->oxcf.tile_columns,
min_log2_tile_cols, max_log2_tile_cols);
return (1 << log2_tile_cols);
}
void vp10_encode_tiles_mt(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
const int num_workers = MIN(cpi->oxcf.max_threads, tile_cols);
const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
int i;
vp10_init_tile_data(cpi);
@ -76,13 +65,6 @@ void vp10_encode_tiles_mt(VP10_COMP *cpi) {
if (cpi->num_workers == 0) {
int allocated_workers = num_workers;
// While using SVC, we need to allocate threads according to the highest
// resolution.
if (cpi->use_svc) {
int max_tile_cols = get_max_tile_cols(cpi);
allocated_workers = MIN(cpi->oxcf.max_threads, max_tile_cols);
}
CHECK_MEM_ERROR(cm, cpi->workers,
vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
@ -146,23 +128,6 @@ void vp10_encode_tiles_mt(VP10_COMP *cpi) {
memcpy(thread_data->td->counts, &cpi->common.counts,
sizeof(cpi->common.counts));
}
// Handle use_nonrd_pick_mode case.
if (cpi->sf.use_nonrd_pick_mode) {
MACROBLOCK *const x = &thread_data->td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = xd->plane;
PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
int j;
for (j = 0; j < MAX_MB_PLANE; ++j) {
p[j].coeff = ctx->coeff_pbuf[j][0];
p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
p[j].eobs = ctx->eobs_pbuf[j][0];
}
}
}
// Encode a frame

View File

@ -11,6 +11,10 @@
#ifndef VP10_ENCODER_ETHREAD_H_
#define VP10_ENCODER_ETHREAD_H_
#ifdef __cplusplus
extern "C" {
#endif
struct VP10_COMP;
struct ThreadData;
@ -22,4 +26,8 @@ typedef struct EncWorkerData {
void vp10_encode_tiles_mt(struct VP10_COMP *cpi);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP10_ENCODER_ETHREAD_H_

View File

@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@ -111,10 +112,12 @@ void vp10_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
// Motion estimation may use src block variance with the block size up
// to 64x64, so the right and bottom need to be extended to 64 multiple
// or up to 16, whichever is greater.
const int er_y = MAX(src->y_width + 16, ALIGN_POWER_OF_TWO(src->y_width, 6))
- src->y_crop_width;
const int eb_y = MAX(src->y_height + 16, ALIGN_POWER_OF_TWO(src->y_height, 6))
- src->y_crop_height;
const int er_y =
VPXMAX(src->y_width + 16, ALIGN_POWER_OF_TWO(src->y_width, 6)) -
src->y_crop_width;
const int eb_y =
VPXMAX(src->y_height + 16, ALIGN_POWER_OF_TWO(src->y_height, 6)) -
src->y_crop_height;
const int uv_width_subsampling = (src->uv_width != src->y_width);
const int uv_height_subsampling = (src->uv_height != src->y_height);
const int et_uv = et_y >> uv_height_subsampling;

View File

@ -15,6 +15,7 @@
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/system_state.h"
@ -55,7 +56,6 @@
#define MIN_DECAY_FACTOR 0.01
#define MIN_KF_BOOST 300
#define NEW_MV_MODE_PENALTY 32
#define SVC_FACTOR_PT_LOW 0.45
#define DARK_THRESH 64
#define DEFAULT_GRP_WEIGHT 1.0
#define RC_FACTOR_MIN 0.75
@ -177,14 +177,12 @@ static void zero_stats(FIRSTPASS_STATS *section) {
section->new_mv_count = 0.0;
section->count = 0.0;
section->duration = 1.0;
section->spatial_layer_id = 0;
}
static void accumulate_stats(FIRSTPASS_STATS *section,
const FIRSTPASS_STATS *frame) {
section->frame += frame->frame;
section->weight += frame->weight;
section->spatial_layer_id = frame->spatial_layer_id;
section->intra_error += frame->intra_error;
section->coded_error += frame->coded_error;
section->sr_coded_error += frame->sr_coded_error;
@ -292,15 +290,7 @@ void vp10_init_first_pass(VP10_COMP *cpi) {
}
void vp10_end_first_pass(VP10_COMP *cpi) {
if (is_two_pass_svc(cpi)) {
int i;
for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
output_stats(&cpi->svc.layer_context[i].twopass.total_stats,
cpi->output_pkt_list);
}
} else {
output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
}
output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
}
static vpx_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
@ -383,7 +373,7 @@ static unsigned int highbd_get_prediction_error(BLOCK_SIZE bsize,
// for first pass test.
static int get_search_range(const VP10_COMP *cpi) {
int sr = 0;
const int dim = MIN(cpi->initial_width, cpi->initial_height);
const int dim = VPXMIN(cpi->initial_width, cpi->initial_height);
while ((dim << sr) < MAX_FULL_PEL_VAL)
++sr;
@ -530,16 +520,13 @@ void vp10_first_pass(VP10_COMP *cpi, const struct lookahead_entry *source) {
YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
LAYER_CONTEXT *const lc = is_two_pass_svc(cpi) ?
&cpi->svc.layer_context[cpi->svc.spatial_layer_id] : NULL;
double intra_factor;
double brightness_factor;
BufferPool *const pool = cm->buffer_pool;
// First pass code requires valid last and new frame buffers.
assert(new_yv12 != NULL);
assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL));
assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
@ -556,51 +543,6 @@ void vp10_first_pass(VP10_COMP *cpi, const struct lookahead_entry *source) {
set_first_pass_params(cpi);
vp10_set_quantizer(cm, find_fp_qindex(cm->bit_depth));
if (lc != NULL) {
twopass = &lc->twopass;
cpi->lst_fb_idx = cpi->svc.spatial_layer_id;
cpi->ref_frame_flags = VP9_LAST_FLAG;
if (cpi->svc.number_spatial_layers + cpi->svc.spatial_layer_id <
REF_FRAMES) {
cpi->gld_fb_idx =
cpi->svc.number_spatial_layers + cpi->svc.spatial_layer_id;
cpi->ref_frame_flags |= VP9_GOLD_FLAG;
cpi->refresh_golden_frame = (lc->current_video_frame_in_layer == 0);
} else {
cpi->refresh_golden_frame = 0;
}
if (lc->current_video_frame_in_layer == 0)
cpi->ref_frame_flags = 0;
vp10_scale_references(cpi);
// Use either last frame or alt frame for motion search.
if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
first_ref_buf = vp10_get_scaled_ref_frame(cpi, LAST_FRAME);
if (first_ref_buf == NULL)
first_ref_buf = get_ref_frame_buffer(cpi, LAST_FRAME);
}
if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
gld_yv12 = vp10_get_scaled_ref_frame(cpi, GOLDEN_FRAME);
if (gld_yv12 == NULL) {
gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
}
} else {
gld_yv12 = NULL;
}
set_ref_ptrs(cm, xd,
(cpi->ref_frame_flags & VP9_LAST_FLAG) ? LAST_FRAME: NONE,
(cpi->ref_frame_flags & VP9_GOLD_FLAG) ? GOLDEN_FRAME : NONE);
cpi->Source = vp10_scale_if_required(cm, cpi->un_scaled_source,
&cpi->scaled_source);
}
vp10_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
vp10_setup_src_planes(x, cpi->Source, 0, 0);
@ -672,7 +614,6 @@ void vp10_first_pass(VP10_COMP *cpi, const struct lookahead_entry *source) {
cm->mi_rows, cm->mi_cols);
// Do intra 16x16 prediction.
x->skip_encode = 0;
xd->mi[0]->mbmi.mode = DC_PRED;
xd->mi[0]->mbmi.tx_size = use_dc_pred ?
(bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
@ -754,8 +695,7 @@ void vp10_first_pass(VP10_COMP *cpi, const struct lookahead_entry *source) {
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;
// Other than for the first frame do a motion search.
if ((lc == NULL && cm->current_video_frame > 0) ||
(lc != NULL && lc->current_video_frame_in_layer > 0)) {
if (cm->current_video_frame > 0) {
int tmp_err, motion_error, raw_motion_error;
// Assume 0,0 motion with no mv overhead.
MV mv = {0, 0} , tmp_mv = {0, 0};
@ -796,7 +736,7 @@ void vp10_first_pass(VP10_COMP *cpi, const struct lookahead_entry *source) {
#endif // CONFIG_VP9_HIGHBITDEPTH
// TODO(pengchong): Replace the hard-coded threshold
if (raw_motion_error > 25 || lc != NULL) {
if (raw_motion_error > 25) {
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search.
first_pass_motion_search(cpi, x, &best_ref_mv, &mv, &motion_error);
@ -814,9 +754,7 @@ void vp10_first_pass(VP10_COMP *cpi, const struct lookahead_entry *source) {
}
// Search in an older reference frame.
if (((lc == NULL && cm->current_video_frame > 1) ||
(lc != NULL && lc->current_video_frame_in_layer > 1))
&& gld_yv12 != NULL) {
if ((cm->current_video_frame > 1) && gld_yv12 != NULL) {
// Assume 0,0 motion with no mv overhead.
int gf_motion_error;
@ -1026,7 +964,7 @@ void vp10_first_pass(VP10_COMP *cpi, const struct lookahead_entry *source) {
// Exclude any image dead zone
if (image_data_start_row > 0) {
intra_skip_count =
MAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2));
VPXMAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2));
}
{
@ -1045,7 +983,6 @@ void vp10_first_pass(VP10_COMP *cpi, const struct lookahead_entry *source) {
fps.weight = intra_factor * brightness_factor;
fps.frame = cm->current_video_frame;
fps.spatial_layer_id = cpi->svc.spatial_layer_id;
fps.coded_error = (double)(coded_error >> 8) + min_err;
fps.sr_coded_error = (double)(sr_coded_error >> 8) + min_err;
fps.intra_error = (double)(intra_error >> 8) + min_err;
@ -1116,18 +1053,13 @@ void vp10_first_pass(VP10_COMP *cpi, const struct lookahead_entry *source) {
vpx_extend_frame_borders(new_yv12);
if (lc != NULL) {
vp10_update_reference_frames(cpi);
} else {
// The frame we just compressed now becomes the last frame.
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
cm->new_fb_idx);
}
// The frame we just compressed now becomes the last frame.
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
cm->new_fb_idx);
// Special case for the first frame. Copy into the GF buffer as a second
// reference.
if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX &&
lc == NULL) {
if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX) {
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
cm->ref_frame_map[cpi->lst_fb_idx]);
}
@ -1149,8 +1081,6 @@ void vp10_first_pass(VP10_COMP *cpi, const struct lookahead_entry *source) {
}
++cm->current_video_frame;
if (cpi->use_svc)
vp10_inc_frame_in_layer(cpi);
}
static double calc_correction_factor(double err_per_mb,
@ -1163,7 +1093,7 @@ static double calc_correction_factor(double err_per_mb,
// Adjustment based on actual quantizer to power term.
const double power_term =
MIN(vp10_convert_qindex_to_q(q, bit_depth) * 0.01 + pt_low, pt_high);
VPXMIN(vp10_convert_qindex_to_q(q, bit_depth) * 0.01 + pt_low, pt_high);
// Calculate correction factor.
if (power_term < 1.0)
@ -1192,7 +1122,7 @@ static int get_twopass_worst_quality(const VP10_COMP *cpi,
} else {
const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
? cpi->initial_mbs : cpi->common.MBs;
const int active_mbs = MAX(1, num_mbs - (int)(num_mbs * inactive_zone));
const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
const double av_err_per_mb = section_err / active_mbs;
const double speed_term = 1.0 + 0.04 * oxcf->speed;
const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
@ -1200,11 +1130,6 @@ static int get_twopass_worst_quality(const VP10_COMP *cpi,
BPER_MB_NORMBITS) / active_mbs;
int q;
int is_svc_upper_layer = 0;
if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0)
is_svc_upper_layer = 1;
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.
@ -1212,7 +1137,6 @@ static int get_twopass_worst_quality(const VP10_COMP *cpi,
const double factor =
calc_correction_factor(av_err_per_mb,
ERR_DIVISOR - ediv_size_correction,
is_svc_upper_layer ? SVC_FACTOR_PT_LOW :
FACTOR_PT_LOW, FACTOR_PT_HIGH, q,
cpi->common.bit_depth);
const int bits_per_mb =
@ -1225,7 +1149,7 @@ static int get_twopass_worst_quality(const VP10_COMP *cpi,
// Restriction on active max q for constrained quality mode.
if (cpi->oxcf.rc_mode == VPX_CQ)
q = MAX(q, oxcf->cq_level);
q = VPXMAX(q, oxcf->cq_level);
return q;
}
}
@ -1235,7 +1159,7 @@ static void setup_rf_level_maxq(VP10_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
for (i = INTER_NORMAL; i < RATE_FACTOR_LEVELS; ++i) {
int qdelta = vp10_frame_type_qdelta(cpi, i, rc->worst_quality);
rc->rf_level_maxq[i] = MAX(rc->worst_quality + qdelta, rc->best_quality);
rc->rf_level_maxq[i] = VPXMAX(rc->worst_quality + qdelta, rc->best_quality);
}
}
@ -1264,12 +1188,8 @@ void vp10_calculate_coded_size(VP10_COMP *cpi,
}
void vp10_init_second_pass(VP10_COMP *cpi) {
SVC *const svc = &cpi->svc;
const VP10EncoderConfig *const oxcf = &cpi->oxcf;
const int is_two_pass_svc = (svc->number_spatial_layers > 1) ||
(svc->number_temporal_layers > 1);
TWO_PASS *const twopass = is_two_pass_svc ?
&svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass;
TWO_PASS *const twopass = &cpi->twopass;
double frame_rate;
FIRSTPASS_STATS *stats;
@ -1290,17 +1210,9 @@ void vp10_init_second_pass(VP10_COMP *cpi) {
// encoded in the second pass is a guess. However, the sum duration is not.
// It is calculated based on the actual durations of all frames from the
// first pass.
if (is_two_pass_svc) {
vp10_update_spatial_layer_framerate(cpi, frame_rate);
twopass->bits_left = (int64_t)(stats->duration *
svc->layer_context[svc->spatial_layer_id].target_bandwidth /
10000000.0);
} else {
vp10_new_framerate(cpi, frame_rate);
twopass->bits_left = (int64_t)(stats->duration * oxcf->target_bandwidth /
10000000.0);
}
vp10_new_framerate(cpi, frame_rate);
twopass->bits_left = (int64_t)(stats->duration * oxcf->target_bandwidth /
10000000.0);
// This variable monitors how far behind the second ref update is lagging.
twopass->sr_update_lag = 1;
@ -1366,12 +1278,12 @@ static double get_sr_decay_rate(const VP10_COMP *cpi,
if ((sr_diff > LOW_SR_DIFF_TRHESH)) {
sr_diff = MIN(sr_diff, SR_DIFF_MAX);
sr_diff = VPXMIN(sr_diff, SR_DIFF_MAX);
sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) -
(MOTION_AMP_PART * motion_amplitude_factor) -
(INTRA_PART * modified_pcnt_intra);
}
return MAX(sr_decay, MIN(DEFAULT_DECAY_LIMIT, modified_pct_inter));
return VPXMAX(sr_decay, VPXMIN(DEFAULT_DECAY_LIMIT, modified_pct_inter));
}
// This function gives an estimate of how badly we believe the prediction
@ -1381,7 +1293,7 @@ static double get_zero_motion_factor(const VP10_COMP *cpi,
const double zero_motion_pct = frame->pcnt_inter -
frame->pcnt_motion;
double sr_decay = get_sr_decay_rate(cpi, frame);
return MIN(sr_decay, zero_motion_pct);
return VPXMIN(sr_decay, zero_motion_pct);
}
#define ZM_POWER_FACTOR 0.75
@ -1393,8 +1305,8 @@ static double get_prediction_decay_rate(const VP10_COMP *cpi,
(0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion),
ZM_POWER_FACTOR));
return MAX(zero_motion_factor,
(sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor)));
return VPXMAX(zero_motion_factor,
(sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor)));
}
// Function to test for a condition where a complex transition is followed
@ -1485,12 +1397,12 @@ static double calc_frame_boost(VP10_COMP *cpi,
const double lq =
vp10_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
cpi->common.bit_depth);
const double boost_q_correction = MIN((0.5 + (lq * 0.015)), 1.5);
const double boost_q_correction = VPXMIN((0.5 + (lq * 0.015)), 1.5);
int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
? cpi->initial_mbs : cpi->common.MBs;
// Correct for any inactive region in the image
num_mbs = (int)MAX(1, num_mbs * calculate_active_area(cpi, this_frame));
num_mbs = (int)VPXMAX(1, num_mbs * calculate_active_area(cpi, this_frame));
// Underlying boost factor is based on inter error ratio.
frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
@ -1506,7 +1418,7 @@ static double calc_frame_boost(VP10_COMP *cpi,
else
frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
return MIN(frame_boost, max_boost * boost_q_correction);
return VPXMIN(frame_boost, max_boost * boost_q_correction);
}
static int calc_arf_boost(VP10_COMP *cpi, int offset,
@ -1595,7 +1507,7 @@ static int calc_arf_boost(VP10_COMP *cpi, int offset,
arf_boost = (*f_boost + *b_boost);
if (arf_boost < ((b_frames + f_frames) * 20))
arf_boost = ((b_frames + f_frames) * 20);
arf_boost = MAX(arf_boost, MIN_ARF_GF_BOOST);
arf_boost = VPXMAX(arf_boost, MIN_ARF_GF_BOOST);
return arf_boost;
}
@ -1666,7 +1578,8 @@ static int calculate_boost_bits(int frame_count,
}
// Calculate the number of extra bits for use in the boosted frame or frames.
return MAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0);
return VPXMAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks),
0);
}
// Current limit on maximum number of active arfs in a GF/ARF group.
@ -1700,15 +1613,8 @@ static void allocate_gf_group_bits(VP10_COMP *cpi, int64_t gf_group_bits,
int mid_frame_idx;
unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS];
int alt_frame_index = frame_index;
int has_temporal_layers = is_two_pass_svc(cpi) &&
cpi->svc.number_temporal_layers > 1;
// Only encode alt reference frame in temporal base layer.
if (has_temporal_layers)
alt_frame_index = cpi->svc.number_temporal_layers;
key_frame = cpi->common.frame_type == KEY_FRAME ||
vp10_is_upper_layer_key_frame(cpi);
key_frame = cpi->common.frame_type == KEY_FRAME;
get_arf_buffer_indices(arf_buffer_indices);
@ -1745,20 +1651,14 @@ static void allocate_gf_group_bits(VP10_COMP *cpi, int64_t gf_group_bits,
gf_group->rf_level[alt_frame_index] = GF_ARF_STD;
gf_group->bit_allocation[alt_frame_index] = gf_arf_bits;
if (has_temporal_layers)
gf_group->arf_src_offset[alt_frame_index] =
(unsigned char)(rc->baseline_gf_interval -
cpi->svc.number_temporal_layers);
else
gf_group->arf_src_offset[alt_frame_index] =
(unsigned char)(rc->baseline_gf_interval - 1);
gf_group->arf_src_offset[alt_frame_index] =
(unsigned char)(rc->baseline_gf_interval - 1);
gf_group->arf_update_idx[alt_frame_index] = arf_buffer_indices[0];
gf_group->arf_ref_idx[alt_frame_index] =
arf_buffer_indices[cpi->multi_arf_last_grp_enabled &&
rc->source_alt_ref_active];
if (!has_temporal_layers)
++frame_index;
++frame_index;
if (cpi->multi_arf_enabled) {
// Set aside a slot for a level 1 arf.
@ -1781,10 +1681,6 @@ static void allocate_gf_group_bits(VP10_COMP *cpi, int64_t gf_group_bits,
if (EOF == input_stats(twopass, &frame_stats))
break;
if (has_temporal_layers && frame_index == alt_frame_index) {
++frame_index;
}
modified_err = calculate_modified_err(cpi, twopass, oxcf, &frame_stats);
if (group_error > 0)
@ -1805,7 +1701,7 @@ static void allocate_gf_group_bits(VP10_COMP *cpi, int64_t gf_group_bits,
gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
target_frame_size = clamp(target_frame_size, 0,
MIN(max_bits, (int)total_group_bits));
VPXMIN(max_bits, (int)total_group_bits));
gf_group->update_type[frame_index] = LF_UPDATE;
gf_group->rf_level[frame_index] = INTER_NORMAL;
@ -1926,7 +1822,7 @@ static void define_gf_group(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
int int_lbq =
(int)(vp10_convert_qindex_to_q(rc->last_boosted_qindex,
cpi->common.bit_depth));
active_min_gf_interval = rc->min_gf_interval + MIN(2, int_max_q / 200);
active_min_gf_interval = rc->min_gf_interval + VPXMIN(2, int_max_q / 200);
if (active_min_gf_interval > rc->max_gf_interval)
active_min_gf_interval = rc->max_gf_interval;
@ -1937,7 +1833,7 @@ static void define_gf_group(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// bits to spare and are better with a smaller interval and smaller boost.
// At high Q when there are few bits to spare we are better with a longer
// interval to spread the cost of the GF.
active_max_gf_interval = 12 + MIN(4, (int_lbq / 6));
active_max_gf_interval = 12 + VPXMIN(4, (int_lbq / 6));
if (active_max_gf_interval < active_min_gf_interval)
active_max_gf_interval = active_min_gf_interval;
@ -1982,8 +1878,8 @@ static void define_gf_group(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
decay_accumulator = decay_accumulator * loop_decay_rate;
// Monitor for static sections.
zero_motion_accumulator =
MIN(zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
zero_motion_accumulator = VPXMIN(
zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
// Break clause to detect very still sections after motion. For example,
// a static image after a fade or other transition.
@ -2039,33 +1935,13 @@ static void define_gf_group(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
(cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) &&
(zero_motion_accumulator < 0.995)) ? 1 : 0;
} else {
rc->gfu_boost = MAX((int)boost_score, MIN_ARF_GF_BOOST);
rc->gfu_boost = VPXMAX((int)boost_score, MIN_ARF_GF_BOOST);
rc->source_alt_ref_pending = 0;
}
// Set the interval until the next gf.
rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
// Only encode alt reference frame in temporal base layer. So
// baseline_gf_interval should be multiple of a temporal layer group
// (typically the frame distance between two base layer frames)
if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) {
int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1;
int new_gf_interval = (rc->baseline_gf_interval + count) & (~count);
int j;
for (j = 0; j < new_gf_interval - rc->baseline_gf_interval; ++j) {
if (EOF == input_stats(twopass, this_frame))
break;
gf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame);
#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error += this_frame->coded_error;
#endif
gf_group_skip_pct += this_frame->intra_skip_pct;
gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
}
rc->baseline_gf_interval = new_gf_interval;
}
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
// Reset the file position.
@ -2094,11 +1970,11 @@ static void define_gf_group(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// rc factor is a weight factor that corrects for local rate control drift.
double rc_factor = 1.0;
if (rc->rate_error_estimate > 0) {
rc_factor = MAX(RC_FACTOR_MIN,
(double)(100 - rc->rate_error_estimate) / 100.0);
rc_factor = VPXMAX(RC_FACTOR_MIN,
(double)(100 - rc->rate_error_estimate) / 100.0);
} else {
rc_factor = MIN(RC_FACTOR_MAX,
(double)(100 - rc->rate_error_estimate) / 100.0);
rc_factor = VPXMIN(RC_FACTOR_MAX,
(double)(100 - rc->rate_error_estimate) / 100.0);
}
tmp_q =
get_twopass_worst_quality(cpi, group_av_err,
@ -2106,7 +1982,7 @@ static void define_gf_group(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
vbr_group_bits_per_frame,
twopass->kfgroup_inter_fraction * rc_factor);
twopass->active_worst_quality =
MAX(tmp_q, twopass->active_worst_quality >> 1);
VPXMAX(tmp_q, twopass->active_worst_quality >> 1);
}
#endif
@ -2385,18 +2261,6 @@ static void find_next_key_frame(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->next_key_frame_forced = 0;
}
if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) {
int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1;
int new_frame_to_key = (rc->frames_to_key + count) & (~count);
int j;
for (j = 0; j < new_frame_to_key - rc->frames_to_key; ++j) {
if (EOF == input_stats(twopass, this_frame))
break;
kf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame);
}
rc->frames_to_key = new_frame_to_key;
}
// Special case for the last key frame of the file.
if (twopass->stats_in >= twopass->stats_in_end) {
// Accumulate kf group error.
@ -2423,7 +2287,7 @@ static void find_next_key_frame(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
} else {
twopass->kf_group_bits = 0;
}
twopass->kf_group_bits = MAX(0, twopass->kf_group_bits);
twopass->kf_group_bits = VPXMAX(0, twopass->kf_group_bits);
// Reset the first pass file position.
reset_fpf_position(twopass, start_position);
@ -2437,9 +2301,8 @@ static void find_next_key_frame(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
break;
// Monitor for static sections.
zero_motion_accumulator =
MIN(zero_motion_accumulator,
get_zero_motion_factor(cpi, &next_frame));
zero_motion_accumulator = VPXMIN(
zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
// Not all frames in the group are necessarily used in calculating boost.
if ((i <= rc->max_gf_interval) ||
@ -2452,7 +2315,7 @@ static void find_next_key_frame(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
const double loop_decay_rate =
get_prediction_decay_rate(cpi, &next_frame);
decay_accumulator *= loop_decay_rate;
decay_accumulator = MAX(decay_accumulator, MIN_DECAY_FACTOR);
decay_accumulator = VPXMAX(decay_accumulator, MIN_DECAY_FACTOR);
av_decay_accumulator += decay_accumulator;
++loop_decay_counter;
}
@ -2473,8 +2336,8 @@ static void find_next_key_frame(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Apply various clamps for min and max boost
rc->kf_boost = (int)(av_decay_accumulator * boost_score);
rc->kf_boost = MAX(rc->kf_boost, (rc->frames_to_key * 3));
rc->kf_boost = MAX(rc->kf_boost, MIN_KF_BOOST);
rc->kf_boost = VPXMAX(rc->kf_boost, (rc->frames_to_key * 3));
rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_BOOST);
// Work out how many bits to allocate for the key frame itself.
kf_bits = calculate_boost_bits((rc->frames_to_key - 1),
@ -2547,16 +2410,6 @@ static void configure_buffer_updates(VP10_COMP *cpi) {
assert(0);
break;
}
if (is_two_pass_svc(cpi)) {
if (cpi->svc.temporal_layer_id > 0) {
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
}
if (cpi->svc.layer_context[cpi->svc.spatial_layer_id].gold_ref_idx < 0)
cpi->refresh_golden_frame = 0;
if (cpi->alt_ref_source == NULL)
cpi->refresh_alt_ref_frame = 0;
}
}
static int is_skippable_frame(const VP10_COMP *cpi) {
@ -2564,9 +2417,7 @@ static int is_skippable_frame(const VP10_COMP *cpi) {
// first pass, and so do its previous and forward frames, then this frame
// can be skipped for partition check, and the partition size is assigned
// according to the variance
const SVC *const svc = &cpi->svc;
const TWO_PASS *const twopass = is_two_pass_svc(cpi) ?
&svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass;
const TWO_PASS *const twopass = &cpi->twopass;
return (!frame_is_intra_only(&cpi->common) &&
twopass->stats_in - 2 > twopass->stats_in_start &&
@ -2587,16 +2438,9 @@ void vp10_rc_get_second_pass_params(VP10_COMP *cpi) {
FIRSTPASS_STATS this_frame;
int target_rate;
LAYER_CONTEXT *const lc = is_two_pass_svc(cpi) ?
&cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0;
if (lc != NULL) {
frames_left = (int)(twopass->total_stats.count -
lc->current_video_frame_in_layer);
} else {
frames_left = (int)(twopass->total_stats.count -
cm->current_video_frame);
}
frames_left = (int)(twopass->total_stats.count -
cm->current_video_frame);
if (!twopass->stats_in)
return;
@ -2612,21 +2456,9 @@ void vp10_rc_get_second_pass_params(VP10_COMP *cpi) {
cm->frame_type = INTER_FRAME;
if (lc != NULL) {
if (cpi->svc.spatial_layer_id == 0) {
lc->is_key_frame = 0;
} else {
lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
if (lc->is_key_frame)
cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
}
}
// Do the firstpass stats indicate that this frame is skippable for the
// partition search?
if (cpi->sf.allow_partition_search_skip &&
cpi->oxcf.pass == 2 && (!cpi->use_svc || is_two_pass_svc(cpi))) {
if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2) {
cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
}
@ -2637,8 +2469,7 @@ void vp10_rc_get_second_pass_params(VP10_COMP *cpi) {
if (cpi->oxcf.rc_mode == VPX_Q) {
twopass->active_worst_quality = cpi->oxcf.cq_level;
} else if (cm->current_video_frame == 0 ||
(lc != NULL && lc->current_video_frame_in_layer == 0)) {
} else if (cm->current_video_frame == 0) {
// Special case code for first frame.
const int section_target_bandwidth = (int)(twopass->bits_left /
frames_left);
@ -2685,34 +2516,11 @@ void vp10_rc_get_second_pass_params(VP10_COMP *cpi) {
cm->frame_type = INTER_FRAME;
}
if (lc != NULL) {
if (cpi->svc.spatial_layer_id == 0) {
lc->is_key_frame = (cm->frame_type == KEY_FRAME);
if (lc->is_key_frame) {
cpi->ref_frame_flags &=
(~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
lc->frames_from_key_frame = 0;
// Encode an intra only empty frame since we have a key frame.
cpi->svc.encode_intra_empty_frame = 1;
}
} else {
cm->frame_type = INTER_FRAME;
lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
if (lc->is_key_frame) {
cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
lc->frames_from_key_frame = 0;
}
}
}
// Define a new GF/ARF group. (Should always enter here for key frames).
if (rc->frames_till_gf_update_due == 0) {
define_gf_group(cpi, &this_frame);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
if (lc != NULL)
cpi->refresh_golden_frame = 1;
#if ARF_STATS_OUTPUT
{
@ -2732,8 +2540,7 @@ void vp10_rc_get_second_pass_params(VP10_COMP *cpi) {
// Do the firstpass stats indicate that this frame is skippable for the
// partition search?
if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 &&
(!cpi->use_svc || is_two_pass_svc(cpi))) {
if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2) {
cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
}
@ -2772,7 +2579,7 @@ void vp10_twopass_postencode_update(VP10_COMP *cpi) {
// is designed to prevent extreme behaviour at the end of a clip
// or group of frames.
rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size;
twopass->bits_left = MAX(twopass->bits_left - bits_used, 0);
twopass->bits_left = VPXMAX(twopass->bits_left - bits_used, 0);
// Calculate the pct rc error.
if (rc->total_actual_bits) {
@ -2783,12 +2590,11 @@ void vp10_twopass_postencode_update(VP10_COMP *cpi) {
rc->rate_error_estimate = 0;
}
if (cpi->common.frame_type != KEY_FRAME &&
!vp10_is_upper_layer_key_frame(cpi)) {
if (cpi->common.frame_type != KEY_FRAME) {
twopass->kf_group_bits -= bits_used;
twopass->last_kfgroup_zeromotion_pct = twopass->kf_zeromotion_pct;
}
twopass->kf_group_bits = MAX(twopass->kf_group_bits, 0);
twopass->kf_group_bits = VPXMAX(twopass->kf_group_bits, 0);
// Increment the gf group index ready for the next frame.
++twopass->gf_group.index;
@ -2838,18 +2644,18 @@ void vp10_twopass_postencode_update(VP10_COMP *cpi) {
rc->vbr_bits_off_target_fast +=
fast_extra_thresh - rc->projected_frame_size;
rc->vbr_bits_off_target_fast =
MIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth));
VPXMIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth));
// Fast adaptation of minQ if necessary to use up the extra bits.
if (rc->avg_frame_bandwidth) {
twopass->extend_minq_fast =
(int)(rc->vbr_bits_off_target_fast * 8 / rc->avg_frame_bandwidth);
}
twopass->extend_minq_fast = MIN(twopass->extend_minq_fast,
minq_adj_limit - twopass->extend_minq);
twopass->extend_minq_fast = VPXMIN(
twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq);
} else if (rc->vbr_bits_off_target_fast) {
twopass->extend_minq_fast = MIN(twopass->extend_minq_fast,
minq_adj_limit - twopass->extend_minq);
twopass->extend_minq_fast = VPXMIN(
twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq);
} else {
twopass->extend_minq_fast = 0;
}

View File

@ -64,7 +64,6 @@ typedef struct {
double new_mv_count;
double duration;
double count;
int64_t spatial_layer_id;
} FIRSTPASS_STATS;
typedef enum {

View File

@ -14,11 +14,6 @@
#include "vpx_scale/yv12config.h"
#include "vpx/vpx_integer.h"
#if CONFIG_SPATIAL_SVC
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif

View File

@ -13,6 +13,7 @@
#include "./vp10_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/system_state.h"
#include "vp10/encoder/segmentation.h"
@ -41,7 +42,7 @@ static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi,
// Further step/diamond searches as necessary
int step_param = mv_sf->reduce_first_step_size;
step_param = MIN(step_param, MAX_MVSEARCH_STEPS - 2);
step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
vp10_set_mv_search_range(x, ref_mv);

View File

@ -15,6 +15,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@ -37,10 +38,10 @@ void vp10_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
col_min = MAX(col_min, (MV_LOW >> 3) + 1);
row_min = MAX(row_min, (MV_LOW >> 3) + 1);
col_max = MIN(col_max, (MV_UPP >> 3) - 1);
row_max = MIN(row_max, (MV_UPP >> 3) - 1);
col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1);
row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1);
col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1);
row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1);
// Get intersection of UMV window and valid MV window to reduce # of checks
// in diamond search.
@ -57,12 +58,12 @@ void vp10_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
int vp10_init_search_range(int size) {
int sr = 0;
// Minimum search size no matter what the passed in value.
size = MAX(16, size);
size = VPXMAX(16, size);
while ((size << sr) < MAX_FULL_PEL_VAL)
sr++;
sr = MIN(sr, MAX_MVSEARCH_STEPS - 2);
sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2);
return sr;
}
@ -297,10 +298,10 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
int br = bestmv->row * 8; \
int bc = bestmv->col * 8; \
int hstep = 4; \
const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); \
const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); \
const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); \
const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); \
const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); \
const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); \
const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); \
const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); \
int tr = br; \
int tc = bc; \
\
@ -668,10 +669,10 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x,
int bc = bestmv->col * 8;
int hstep = 4;
int iter, round = 3 - forced_stop;
const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
int tr = br;
int tc = bc;
const MV *search_step = search_step_table;
@ -1500,9 +1501,9 @@ int vp10_fast_hex_search(const MACROBLOCK *x,
int use_mvcost,
const MV *center_mv,
MV *best_mv) {
return vp10_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
center_mv, best_mv);
return vp10_hex_search(
x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), sad_per_bit,
do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv);
}
int vp10_fast_dia_search(const MACROBLOCK *x,
@ -1515,9 +1516,9 @@ int vp10_fast_dia_search(const MACROBLOCK *x,
int use_mvcost,
const MV *center_mv,
MV *best_mv) {
return vp10_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
sad_per_bit, do_init_search, cost_list, vfp,
use_mvcost, center_mv, best_mv);
return vp10_bigdia_search(
x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), sad_per_bit,
do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv);
}
#undef CHECK_BETTER
@ -1547,10 +1548,10 @@ int vp10_full_range_search_c(const MACROBLOCK *x,
best_sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv), in_what->stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
start_row = MAX(-range, x->mv_row_min - ref_mv->row);
start_col = MAX(-range, x->mv_col_min - ref_mv->col);
end_row = MIN(range, x->mv_row_max - ref_mv->row);
end_col = MIN(range, x->mv_col_max - ref_mv->col);
start_row = VPXMAX(-range, x->mv_row_min - ref_mv->row);
start_col = VPXMAX(-range, x->mv_col_min - ref_mv->col);
end_row = VPXMIN(range, x->mv_row_max - ref_mv->row);
end_col = VPXMIN(range, x->mv_col_max - ref_mv->col);
for (r = start_row; r <= end_row; ++r) {
for (c = start_col; c <= end_col; c += 4) {
@ -2021,10 +2022,10 @@ int vp10_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min);
const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max);
const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min);
const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
int best_sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv), in_what->stride) +
@ -2054,10 +2055,10 @@ int vp10_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min);
const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max);
const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min);
const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv), in_what->stride) +
@ -2119,10 +2120,10 @@ int vp10_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min);
const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max);
const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min);
const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv), in_what->stride) +

View File

@ -13,6 +13,7 @@
#include "./vpx_scale_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@ -92,8 +93,8 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
ss_err[filt_mid] = best_err;
while (filter_step > 0) {
const int filt_high = MIN(filt_mid + filter_step, max_filter_level);
const int filt_low = MAX(filt_mid - filter_step, min_filter_level);
const int filt_high = VPXMIN(filt_mid + filter_step, max_filter_level);
const int filt_low = VPXMAX(filt_mid - filter_step, min_filter_level);
// Bias against raising loop filter in favor of lowering it.
int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;

File diff suppressed because it is too large Load Diff

View File

@ -1,38 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_PICKMODE_H_
#define VP10_ENCODER_PICKMODE_H_
#include "vp10/encoder/encoder.h"
#ifdef __cplusplus
extern "C" {
#endif
void vp10_pick_intra_mode(VP10_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
void vp10_pick_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
TileDataEnc *tile_data,
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx);
void vp10_pick_inter_mode_sub8x8(VP10_COMP *cpi, MACROBLOCK *x,
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP10_ENCODER_PICKMODE_H_

View File

@ -15,6 +15,7 @@
#include <stdlib.h>
#include <string.h>
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/system_state.h"
@ -106,8 +107,7 @@ static int kf_low = 400;
static int get_minq_index(double maxq, double x3, double x2, double x1,
vpx_bit_depth_t bit_depth) {
int i;
const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq,
maxq);
const double minqtarget = VPXMIN(((x3 * maxq + x2) * maxq + x1) * maxq, maxq);
// Special case handling to deal with the step from q2.0
// down to lossless mode represented by q 1.0.
@ -192,15 +192,15 @@ int vp10_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs,
vpx_bit_depth_t bit_depth) {
const int bpm = (int)(vp10_rc_bits_per_mb(frame_type, q, correction_factor,
bit_depth));
return MAX(FRAME_OVERHEAD_BITS,
(int)((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS);
return VPXMAX(FRAME_OVERHEAD_BITS,
(int)((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS);
}
int vp10_rc_clamp_pframe_target_size(const VP10_COMP *const cpi, int target) {
const RATE_CONTROL *rc = &cpi->rc;
const VP10EncoderConfig *oxcf = &cpi->oxcf;
const int min_frame_target = MAX(rc->min_frame_bandwidth,
rc->avg_frame_bandwidth >> 5);
const int min_frame_target = VPXMAX(rc->min_frame_bandwidth,
rc->avg_frame_bandwidth >> 5);
if (target < min_frame_target)
target = min_frame_target;
if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) {
@ -216,7 +216,7 @@ int vp10_rc_clamp_pframe_target_size(const VP10_COMP *const cpi, int target) {
if (oxcf->rc_max_inter_bitrate_pct) {
const int max_rate = rc->avg_frame_bandwidth *
oxcf->rc_max_inter_bitrate_pct / 100;
target = MIN(target, max_rate);
target = VPXMIN(target, max_rate);
}
return target;
}
@ -227,34 +227,13 @@ int vp10_rc_clamp_iframe_target_size(const VP10_COMP *const cpi, int target) {
if (oxcf->rc_max_intra_bitrate_pct) {
const int max_rate = rc->avg_frame_bandwidth *
oxcf->rc_max_intra_bitrate_pct / 100;
target = MIN(target, max_rate);
target = VPXMIN(target, max_rate);
}
if (target > rc->max_frame_bandwidth)
target = rc->max_frame_bandwidth;
return target;
}
// Update the buffer level for higher temporal layers, given the encoded current
// temporal layer.
static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
int i = 0;
int current_temporal_layer = svc->temporal_layer_id;
for (i = current_temporal_layer + 1;
i < svc->number_temporal_layers; ++i) {
const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i,
svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -
encoded_frame_size);
lrc->bits_off_target += bits_off_for_this_layer;
// Clip buffer level to maximum buffer size for the layer.
lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
lrc->buffer_level = lrc->bits_off_target;
}
}
// Update the buffer level: leaky bucket model.
static void update_buffer_level(VP10_COMP *cpi, int encoded_frame_size) {
const VP10_COMMON *const cm = &cpi->common;
@ -268,12 +247,8 @@ static void update_buffer_level(VP10_COMP *cpi, int encoded_frame_size) {
}
// Clip the buffer level to the maximum specified buffer size.
rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size);
rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
rc->buffer_level = rc->bits_off_target;
if (is_one_pass_cbr_svc(cpi)) {
update_layer_buffer_level(&cpi->svc, encoded_frame_size);
}
}
int vp10_rc_get_default_min_gf_interval(
@ -287,8 +262,8 @@ int vp10_rc_get_default_min_gf_interval(
if (factor <= factor_safe)
return default_interval;
else
return MAX(default_interval,
(int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5));
return VPXMAX(default_interval,
(int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5));
// Note this logic makes:
// 4K24: 5
// 4K30: 6
@ -296,9 +271,9 @@ int vp10_rc_get_default_min_gf_interval(
}
int vp10_rc_get_default_max_gf_interval(double framerate, int min_gf_interval) {
int interval = MIN(MAX_GF_INTERVAL, (int)(framerate * 0.75));
int interval = VPXMIN(MAX_GF_INTERVAL, (int)(framerate * 0.75));
interval += (interval & 0x01); // Round to even value
return MAX(interval, min_gf_interval);
return VPXMAX(interval, min_gf_interval);
}
void vp10_rc_init(const VP10EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
@ -408,7 +383,7 @@ static double get_rate_correction_factor(const VP10_COMP *cpi) {
rcf = rc->rate_correction_factors[rf_lvl];
} else {
if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
!rc->is_src_frame_alt_ref && !cpi->use_svc &&
!rc->is_src_frame_alt_ref &&
(cpi->oxcf.rc_mode != VPX_CBR || cpi->oxcf.gf_cbr_boost_pct > 20))
rcf = rc->rate_correction_factors[GF_ARF_STD];
else
@ -434,7 +409,7 @@ static void set_rate_correction_factor(VP10_COMP *cpi, double factor) {
rc->rate_correction_factors[rf_lvl] = factor;
} else {
if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
!rc->is_src_frame_alt_ref && !cpi->use_svc &&
!rc->is_src_frame_alt_ref &&
(cpi->oxcf.rc_mode != VPX_CBR || cpi->oxcf.gf_cbr_boost_pct > 20))
rc->rate_correction_factors[GF_ARF_STD] = factor;
else
@ -478,7 +453,7 @@ void vp10_rc_update_rate_correction_factors(VP10_COMP *cpi) {
// More heavily damped adjustment used if we have been oscillating either side
// of target.
adjustment_limit = 0.25 +
0.5 * MIN(1, fabs(log10(0.01 * correction_factor)));
0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor)));
cpi->rc.q_2_frame = cpi->rc.q_1_frame;
cpi->rc.q_1_frame = cm->base_qindex;
@ -529,10 +504,7 @@ int vp10_rc_regulate_q(const VP10_COMP *cpi, int target_bits_per_frame,
i = active_best_quality;
do {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
cm->seg.enabled &&
cpi->svc.temporal_layer_id == 0 &&
cpi->svc.spatial_layer_id == 0) {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
bits_per_mb_at_this_q =
(int)vp10_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor);
} else {
@ -558,8 +530,8 @@ int vp10_rc_regulate_q(const VP10_COMP *cpi, int target_bits_per_frame,
if (cpi->oxcf.rc_mode == VPX_CBR &&
(cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) &&
cpi->rc.q_1_frame != cpi->rc.q_2_frame) {
q = clamp(q, MIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
MAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
q = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
}
return q;
}
@ -617,7 +589,7 @@ static int calc_active_worst_quality_one_pass_vbr(const VP10_COMP *cpi) {
: rc->last_q[INTER_FRAME] * 2;
}
}
return MIN(active_worst_quality, rc->worst_quality);
return VPXMIN(active_worst_quality, rc->worst_quality);
}
// Adjust active_worst_quality level based on buffer level.
@ -643,10 +615,10 @@ static int calc_active_worst_quality_one_pass_cbr(const VP10_COMP *cpi) {
// So for first few frames following key, the qp of that key frame is weighted
// into the active_worst_quality setting.
ambient_qp = (cm->current_video_frame < 5) ?
MIN(rc->avg_frame_qindex[INTER_FRAME], rc->avg_frame_qindex[KEY_FRAME]) :
rc->avg_frame_qindex[INTER_FRAME];
active_worst_quality = MIN(rc->worst_quality,
ambient_qp * 5 / 4);
VPXMIN(rc->avg_frame_qindex[INTER_FRAME],
rc->avg_frame_qindex[KEY_FRAME]) :
rc->avg_frame_qindex[INTER_FRAME];
active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 / 4);
if (rc->buffer_level > rc->optimal_buffer_level) {
// Adjust down.
// Maximum limit for down adjustment, ~30%.
@ -699,7 +671,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP10_COMP *cpi,
int delta_qindex = vp10_compute_qdelta(rc, last_boosted_q,
(last_boosted_q * 0.75),
cm->bit_depth);
active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else if (cm->current_video_frame > 0) {
// not first frame of one pass and kf_boost is set
double q_adj_factor = 1.0;
@ -722,7 +694,6 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP10_COMP *cpi,
cm->bit_depth);
}
} else if (!rc->is_src_frame_alt_ref &&
!cpi->use_svc &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
// Use the lower of active_worst_quality and recent
// average Q as basis for GF/ARF best Q limit unless last frame was
@ -833,7 +804,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP10_COMP *cpi,
int delta_qindex = vp10_compute_qdelta(rc, last_boosted_q,
last_boosted_q * 0.75,
cm->bit_depth);
active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else {
// not first frame of one pass and kf_boost is set
double q_adj_factor = 1.0;
@ -992,7 +963,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP10_COMP *cpi,
int *inter_minq;
ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq);
if (frame_is_intra_only(cm) || vp10_is_upper_layer_key_frame(cpi)) {
if (frame_is_intra_only(cm)) {
// Handle the special case for key frames forced when we have reached
// the maximum key frame interval. Here force the Q to a range
// based on the ambient Q to reduce the risk of popping.
@ -1002,21 +973,21 @@ static int rc_pick_q_and_bounds_two_pass(const VP10_COMP *cpi,
int qindex;
if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
qindex = MIN(rc->last_kf_qindex, rc->last_boosted_qindex);
qindex = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
active_best_quality = qindex;
last_boosted_q = vp10_convert_qindex_to_q(qindex, cm->bit_depth);
delta_qindex = vp10_compute_qdelta(rc, last_boosted_q,
last_boosted_q * 1.25,
cm->bit_depth);
active_worst_quality = MIN(qindex + delta_qindex, active_worst_quality);
active_worst_quality =
VPXMIN(qindex + delta_qindex, active_worst_quality);
} else {
qindex = rc->last_boosted_qindex;
last_boosted_q = vp10_convert_qindex_to_q(qindex, cm->bit_depth);
delta_qindex = vp10_compute_qdelta(rc, last_boosted_q,
last_boosted_q * 0.75,
cm->bit_depth);
active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
}
} else {
// Not forced keyframe.
@ -1111,13 +1082,13 @@ static int rc_pick_q_and_bounds_two_pass(const VP10_COMP *cpi,
#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
vpx_clear_system_state();
// Static forced key frames Q restrictions dealt with elsewhere.
if (!((frame_is_intra_only(cm) || vp10_is_upper_layer_key_frame(cpi))) ||
if (!(frame_is_intra_only(cm)) ||
!rc->this_key_frame_forced ||
(cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) {
int qdelta = vp10_frame_type_qdelta(cpi, gf_group->rf_level[gf_group->index],
active_worst_quality);
active_worst_quality = MAX(active_worst_quality + qdelta,
active_best_quality);
active_worst_quality = VPXMAX(active_worst_quality + qdelta,
active_best_quality);
}
#endif
@ -1126,7 +1097,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP10_COMP *cpi,
int qdelta = vp10_compute_qdelta_by_rate(rc, cm->frame_type,
active_best_quality, 2.0,
cm->bit_depth);
active_best_quality = MAX(active_best_quality + qdelta, rc->best_quality);
active_best_quality =
VPXMAX(active_best_quality + qdelta, rc->best_quality);
}
active_best_quality = clamp(active_best_quality,
@ -1137,11 +1109,10 @@ static int rc_pick_q_and_bounds_two_pass(const VP10_COMP *cpi,
if (oxcf->rc_mode == VPX_Q) {
q = active_best_quality;
// Special case code to try and match quality with forced key frames.
} else if ((frame_is_intra_only(cm) || vp10_is_upper_layer_key_frame(cpi)) &&
rc->this_key_frame_forced) {
} else if (frame_is_intra_only(cm) && rc->this_key_frame_forced) {
// If static since last kf use better of last boosted and last kf q.
if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
q = MIN(rc->last_kf_qindex, rc->last_boosted_qindex);
q = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
} else {
q = rc->last_boosted_qindex;
}
@ -1180,15 +1151,7 @@ int vp10_rc_pick_q_and_bounds(const VP10_COMP *cpi,
} else {
q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index);
}
if (cpi->sf.use_nonrd_pick_mode) {
if (cpi->sf.force_frame_boost == 1)
q -= cpi->sf.max_delta_qindex;
if (q < *bottom_index)
*bottom_index = q;
else if (q > *top_index)
*top_index = q;
}
return q;
}
@ -1203,9 +1166,9 @@ void vp10_rc_compute_frame_size_bounds(const VP10_COMP *cpi,
// For very small rate targets where the fractional adjustment
// may be tiny make sure there is at least a minimum range.
const int tolerance = (cpi->sf.recode_tolerance * frame_target) / 100;
*frame_under_shoot_limit = MAX(frame_target - tolerance - 200, 0);
*frame_over_shoot_limit = MIN(frame_target + tolerance + 200,
cpi->rc.max_frame_bandwidth);
*frame_under_shoot_limit = VPXMAX(frame_target - tolerance - 200, 0);
*frame_over_shoot_limit = VPXMIN(frame_target + tolerance + 200,
cpi->rc.max_frame_bandwidth);
}
}
@ -1288,8 +1251,7 @@ void vp10_rc_postencode_update(VP10_COMP *cpi, uint64_t bytes_used) {
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
} else {
if (rc->is_src_frame_alt_ref ||
!(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) ||
(cpi->use_svc && oxcf->rc_mode == VPX_CBR)) {
!(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
rc->last_q[INTER_FRAME] = qindex;
rc->avg_frame_qindex[INTER_FRAME] =
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
@ -1441,10 +1403,10 @@ void vp10_rc_get_one_pass_vbr_params(VP10_COMP *cpi) {
static int calc_pframe_target_size_one_pass_cbr(const VP10_COMP *cpi) {
const VP10EncoderConfig *oxcf = &cpi->oxcf;
const RATE_CONTROL *rc = &cpi->rc;
const SVC *const svc = &cpi->svc;
const int64_t diff = rc->optimal_buffer_level - rc->buffer_level;
const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100;
int min_frame_target = MAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
int min_frame_target =
VPXMAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
int target;
if (oxcf->gf_cbr_boost_pct) {
@ -1457,38 +1419,27 @@ static int calc_pframe_target_size_one_pass_cbr(const VP10_COMP *cpi) {
} else {
target = rc->avg_frame_bandwidth;
}
if (is_one_pass_cbr_svc(cpi)) {
// Note that for layers, avg_frame_bandwidth is the cumulative
// per-frame-bandwidth. For the target size of this frame, use the
// layer average frame size (i.e., non-cumulative per-frame-bw).
int layer =
LAYER_IDS_TO_IDX(svc->spatial_layer_id,
svc->temporal_layer_id, svc->number_temporal_layers);
const LAYER_CONTEXT *lc = &svc->layer_context[layer];
target = lc->avg_frame_size;
min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
}
if (diff > 0) {
// Lower the target bandwidth for this frame.
const int pct_low = (int)MIN(diff / one_pct_bits, oxcf->under_shoot_pct);
const int pct_low = (int)VPXMIN(diff / one_pct_bits, oxcf->under_shoot_pct);
target -= (target * pct_low) / 200;
} else if (diff < 0) {
// Increase the target bandwidth for this frame.
const int pct_high = (int)MIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
const int pct_high =
(int)VPXMIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
target += (target * pct_high) / 200;
}
if (oxcf->rc_max_inter_bitrate_pct) {
const int max_rate = rc->avg_frame_bandwidth *
oxcf->rc_max_inter_bitrate_pct / 100;
target = MIN(target, max_rate);
target = VPXMIN(target, max_rate);
}
return MAX(min_frame_target, target);
return VPXMAX(min_frame_target, target);
}
static int calc_iframe_target_size_one_pass_cbr(const VP10_COMP *cpi) {
const RATE_CONTROL *rc = &cpi->rc;
const VP10EncoderConfig *oxcf = &cpi->oxcf;
const SVC *const svc = &cpi->svc;
int target;
if (cpi->common.current_video_frame == 0) {
target = ((rc->starting_buffer_level / 2) > INT_MAX)
@ -1496,15 +1447,8 @@ static int calc_iframe_target_size_one_pass_cbr(const VP10_COMP *cpi) {
} else {
int kf_boost = 32;
double framerate = cpi->framerate;
if (svc->number_temporal_layers > 1 &&
oxcf->rc_mode == VPX_CBR) {
// Use the layer framerate for temporal layers CBR mode.
const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id,
svc->temporal_layer_id, svc->number_temporal_layers);
const LAYER_CONTEXT *lc = &svc->layer_context[layer];
framerate = lc->framerate;
}
kf_boost = MAX(kf_boost, (int)(2 * framerate - 16));
kf_boost = VPXMAX(kf_boost, (int)(2 * framerate - 16));
if (rc->frames_since_key < framerate / 2) {
kf_boost = (int)(kf_boost * rc->frames_since_key /
(framerate / 2));
@ -1514,82 +1458,6 @@ static int calc_iframe_target_size_one_pass_cbr(const VP10_COMP *cpi) {
return vp10_rc_clamp_iframe_target_size(cpi, target);
}
// Reset information needed to set proper reference frames and buffer updates
// for temporal layering. This is called when a key frame is encoded.
static void reset_temporal_layer_to_zero(VP10_COMP *cpi) {
int sl;
LAYER_CONTEXT *lc = NULL;
cpi->svc.temporal_layer_id = 0;
for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
lc = &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers];
lc->current_video_frame_in_layer = 0;
lc->frames_from_key_frame = 0;
}
}
void vp10_rc_get_svc_params(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
int target = rc->avg_frame_bandwidth;
const int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers);
if ((cm->current_video_frame == 0) ||
(cpi->frame_flags & FRAMEFLAGS_KEY) ||
(cpi->oxcf.auto_key && (rc->frames_since_key %
cpi->oxcf.key_freq == 0))) {
cm->frame_type = KEY_FRAME;
rc->source_alt_ref_active = 0;
if (is_two_pass_svc(cpi)) {
cpi->svc.layer_context[layer].is_key_frame = 1;
cpi->ref_frame_flags &=
(~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
} else if (is_one_pass_cbr_svc(cpi)) {
cpi->svc.layer_context[layer].is_key_frame = 1;
reset_temporal_layer_to_zero(cpi);
cpi->ref_frame_flags &=
(~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
// Assumption here is that LAST_FRAME is being updated for a keyframe.
// Thus no change in update flags.
target = calc_iframe_target_size_one_pass_cbr(cpi);
}
} else {
cm->frame_type = INTER_FRAME;
if (is_two_pass_svc(cpi)) {
LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
if (cpi->svc.spatial_layer_id == 0) {
lc->is_key_frame = 0;
} else {
lc->is_key_frame =
cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame;
if (lc->is_key_frame)
cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
}
cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
} else if (is_one_pass_cbr_svc(cpi)) {
LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
if (cpi->svc.spatial_layer_id == 0) {
lc->is_key_frame = 0;
} else {
lc->is_key_frame =
cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame;
}
target = calc_pframe_target_size_one_pass_cbr(cpi);
}
}
// Any update/change of global cyclic refresh parameters (amount/delta-qp)
// should be done here, before the frame qp is selected.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp10_cyclic_refresh_update_parameters(cpi);
vp10_rc_set_frame_target(cpi, target);
rc->frames_till_gf_update_due = INT_MAX;
rc->baseline_gf_interval = INT_MAX;
}
void vp10_rc_get_one_pass_cbr_params(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
@ -1712,7 +1580,7 @@ void vp10_rc_set_gf_interval_range(const VP10_COMP *const cpi,
rc->max_gf_interval = rc->static_scene_max_gf_interval;
// Clamp min to max
rc->min_gf_interval = MIN(rc->min_gf_interval, rc->max_gf_interval);
rc->min_gf_interval = VPXMIN(rc->min_gf_interval, rc->max_gf_interval);
}
void vp10_rc_update_framerate(VP10_COMP *cpi) {
@ -1725,7 +1593,8 @@ void vp10_rc_update_framerate(VP10_COMP *cpi) {
rc->min_frame_bandwidth = (int)(rc->avg_frame_bandwidth *
oxcf->two_pass_vbrmin_section / 100);
rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
rc->min_frame_bandwidth =
VPXMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
// A maximum bitrate for a frame is defined.
// The baseline for this aligns with HW implementations that
@ -1736,8 +1605,8 @@ void vp10_rc_update_framerate(VP10_COMP *cpi) {
// specifies lossless encode.
vbr_max_bits = (int)(((int64_t)rc->avg_frame_bandwidth *
oxcf->two_pass_vbrmax_section) / 100);
rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P),
vbr_max_bits);
rc->max_frame_bandwidth =
VPXMAX(VPXMAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits);
vp10_rc_set_gf_interval_range(cpi, rc);
}
@ -1775,12 +1644,12 @@ static void vbr_rate_correction(VP10_COMP *cpi, int *this_frame_target) {
// Dont do it for kf,arf,gf or overlay frames.
if (!frame_is_kf_gf_arf(cpi) && !rc->is_src_frame_alt_ref &&
rc->vbr_bits_off_target_fast) {
int one_frame_bits = MAX(rc->avg_frame_bandwidth, *this_frame_target);
int one_frame_bits = VPXMAX(rc->avg_frame_bandwidth, *this_frame_target);
int fast_extra_bits;
fast_extra_bits =
(int)MIN(rc->vbr_bits_off_target_fast, one_frame_bits);
fast_extra_bits = (int)MIN(fast_extra_bits,
MAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8));
fast_extra_bits = (int)VPXMIN(rc->vbr_bits_off_target_fast, one_frame_bits);
fast_extra_bits = (int)VPXMIN(
fast_extra_bits,
VPXMAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8));
*this_frame_target += (int)fast_extra_bits;
rc->vbr_bits_off_target_fast -= fast_extra_bits;
}

View File

@ -169,7 +169,6 @@ int vp10_rc_get_default_max_gf_interval(double framerate, int min_frame_rate);
// First call per frame, one of:
// vp10_rc_get_one_pass_vbr_params()
// vp10_rc_get_one_pass_cbr_params()
// vp10_rc_get_svc_params()
// vp10_rc_get_first_pass_params()
// vp10_rc_get_second_pass_params()
// depending on the usage to set the rate control encode parameters desired.
@ -190,7 +189,6 @@ int vp10_rc_get_default_max_gf_interval(double framerate, int min_frame_rate);
// encode_frame_to_data_rate() function.
void vp10_rc_get_one_pass_vbr_params(struct VP10_COMP *cpi);
void vp10_rc_get_one_pass_cbr_params(struct VP10_COMP *cpi);
void vp10_rc_get_svc_params(struct VP10_COMP *cpi);
// Post encode update of the rate control parameters based
// on bytes used

View File

@ -14,6 +14,7 @@
#include "./vp10_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/bitops.h"
#include "vpx_ports/mem.h"
@ -177,7 +178,7 @@ int vp10_compute_rd_mult(const VP10_COMP *cpi, int qindex) {
if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
const int boost_index = MIN(15, (cpi->rc.gfu_boost / 100));
const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
@ -209,7 +210,7 @@ static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
q = vp10_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
#endif // CONFIG_VP9_HIGHBITDEPTH
// TODO(debargha): Adjust the function below.
return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
}
void vp10_initialize_me_consts(VP10_COMP *cpi, MACROBLOCK *x, int qindex) {
@ -290,8 +291,7 @@ void vp10_initialize_rd_consts(VP10_COMP *cpi) {
set_block_thresholds(cm, rd);
set_partition_probs(cm, xd);
if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
fill_token_costs(x->token_costs, cm->fc->coef_probs);
fill_token_costs(x->token_costs, cm->fc->coef_probs);
if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
cm->frame_type == KEY_FRAME) {
@ -300,20 +300,17 @@ void vp10_initialize_rd_consts(VP10_COMP *cpi) {
vp10_partition_tree);
}
if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
cm->frame_type == KEY_FRAME) {
fill_mode_costs(cpi);
fill_mode_costs(cpi);
if (!frame_is_intra_only(cm)) {
vp10_build_nmv_cost_table(x->nmvjointcost,
cm->allow_high_precision_mv ? x->nmvcost_hp
: x->nmvcost,
&cm->fc->nmvc, cm->allow_high_precision_mv);
if (!frame_is_intra_only(cm)) {
vp10_build_nmv_cost_table(x->nmvjointcost,
cm->allow_high_precision_mv ? x->nmvcost_hp
: x->nmvcost,
&cm->fc->nmvc, cm->allow_high_precision_mv);
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
vp10_cost_tokens((int *)cpi->inter_mode_cost[i],
cm->fc->inter_mode_probs[i], vp10_inter_mode_tree);
}
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
vp10_cost_tokens((int *)cpi->inter_mode_cost[i],
cm->fc->inter_mode_probs[i], vp10_inter_mode_tree);
}
}
@ -409,7 +406,7 @@ void vp10_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
static const uint32_t MAX_XSQ_Q10 = 245727;
const uint64_t xsq_q10_64 =
(((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
const int xsq_q10 = (int)MIN(xsq_q10_64, MAX_XSQ_Q10);
const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
model_rd_norm(xsq_q10, &r_q10, &d_q10);
*rate = ((r_q10 << n_log2) + 2) >> 2;
*dist = (var * (int64_t)d_q10 + 512) >> 10;
@ -490,7 +487,7 @@ void vp10_mv_pred(VP10_COMP *cpi, MACROBLOCK *x,
continue;
fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
if (fp_row ==0 && fp_col == 0 && zero_seen)
continue;
@ -635,16 +632,15 @@ void vp10_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
int mode;
for (mode = 0; mode < top_mode; ++mode) {
const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
BLOCK_SIZE bs;
for (bs = min_size; bs <= max_size; ++bs) {
int *const fact = &factor_buf[bs][mode];
if (mode == best_mode_index) {
*fact -= (*fact >> 4);
} else {
*fact = MIN(*fact + RD_THRESH_INC,
rd_thresh * RD_THRESH_MAX_FACT);
*fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
}
}
}

View File

@ -14,6 +14,7 @@
#include "./vp10_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/system_state.h"
@ -196,8 +197,8 @@ static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
const int64_t ac_thr = p->quant_thred[1] >> shift;
// The low thresholds are used to measure if the prediction errors are
// low enough so that we can skip the mode search.
const int64_t low_dc_thr = MIN(50, dc_thr >> 2);
const int64_t low_ac_thr = MIN(80, ac_thr >> 2);
const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
int idx, idy;
@ -441,19 +442,6 @@ static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
&this_sse) >> shift;
#endif // CONFIG_VP9_HIGHBITDEPTH
*out_sse = this_sse >> shift;
if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
// TODO(jingning): tune the model to better capture the distortion.
int64_t p = (pd->dequant[1] * pd->dequant[1] *
(1 << ss_txfrm_size)) >>
#if CONFIG_VP9_HIGHBITDEPTH
(shift + 2 + (bd - 8) * 2);
#else
(shift + 2);
#endif // CONFIG_VP9_HIGHBITDEPTH
*out_dist += (p >> 4);
*out_sse += p;
}
}
static int rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
@ -509,7 +497,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
if (tx_size != TX_32X32)
dc_correct >>= 2;
dist = MAX(0, sse - dc_correct);
dist = VPXMAX(0, sse - dc_correct);
}
} else {
// SKIP_TXFM_AC_DC
@ -535,7 +523,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
// TODO(jingning): temporarily enabled only for luma component
rd = MIN(rd1, rd2);
rd = VPXMIN(rd1, rd2);
if (plane == 0)
x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
(rd1 > rd2 && !xd->lossless);
@ -603,7 +591,7 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
#if CONFIG_EXT_TX
if (mbmi->ext_txfrm >= GET_EXT_TX_TYPES(mbmi->tx_size)) {
*rate = INT_MAX;
@ -659,8 +647,8 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
start_tx = max_tx_size;
end_tx = 0;
} else {
TX_SIZE chosen_tx_size = MIN(max_tx_size,
tx_mode_to_biggest_tx_size[cm->tx_mode]);
TX_SIZE chosen_tx_size = VPXMIN(max_tx_size,
tx_mode_to_biggest_tx_size[cm->tx_mode]);
start_tx = chosen_tx_size;
end_tx = chosen_tx_size;
}
@ -832,9 +820,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
p->src_diff);
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
xd->mi[0]->bmi[block].as_mode = mode;
vp10_predict_intra_block(xd, 1, TX_4X4, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
vp10_predict_intra_block(xd, 1, TX_4X4, mode, dst, dst_stride,
dst, dst_stride,
col + idx, row + idy, 0);
vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
@ -897,7 +883,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
next_highbd:
{}
}
if (best_rd >= rd_thresh || x->skip_encode)
if (best_rd >= rd_thresh)
return best_rd;
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
@ -938,9 +924,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
vp10_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
xd->mi[0]->bmi[block].as_mode = mode;
vp10_predict_intra_block(xd, 1, TX_4X4, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
vp10_predict_intra_block(xd, 1, TX_4X4, mode, dst, dst_stride,
dst, dst_stride, col + idx, row + idy, 0);
vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
@ -996,7 +980,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
{}
}
if (best_rd >= rd_thresh || x->skip_encode)
if (best_rd >= rd_thresh)
return best_rd;
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
@ -1096,16 +1080,6 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
/* Y Search for intra prediction mode */
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
if (cpi->sf.use_nonrd_pick_mode) {
// These speed features are turned on in hybrid non-RD and RD mode
// for key frame coding in the context of real-time setting.
if (conditional_skipintra(mode, mode_selected))
continue;
if (*skippable)
break;
}
mic->mbmi.mode = mode;
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
@ -1435,7 +1409,7 @@ static int64_t encode_inter_mb_segment(VP10_COMP *cpi,
cpi->sf.use_fast_coef_costing);
rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
rd = MIN(rd1, rd2);
rd = VPXMIN(rd1, rd2);
if (rd >= best_yrd)
return INT64_MAX;
}
@ -1854,7 +1828,8 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
if (i == 0)
max_mv = x->max_mv_context[mbmi->ref_frame[0]];
else
max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
max_mv =
VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
// Take wtd average of the step_params based on the last frame's
@ -1872,7 +1847,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
if (cpi->sf.adaptive_motion_search) {
mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
step_param = MAX(step_param, 8);
step_param = VPXMAX(step_param, 8);
}
// adjust src pointer for this block
@ -2277,7 +2252,7 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
vp10_set_mv_search_range(x, &ref_mv);
// Work out the size of the first step in the mv step search.
// 0 here is maximum length first step. 1 is MAX >> 1 etc.
// 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
// Take wtd average of the step_params based on the last frame's
// max mv magnitude and that based on the best ref mvs of the current
@ -2289,9 +2264,10 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
}
if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] -
MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
step_param = MAX(step_param, boffset);
int boffset =
2 * (b_width_log2_lookup[BLOCK_64X64] -
VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
step_param = VPXMAX(step_param, boffset);
}
if (cpi->sf.adaptive_motion_search) {
@ -2512,7 +2488,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
// motion field, where the distortion gain for a single block may not
// be enough to overcome the cost of a new mv.
if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
*rate2 += MAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
*rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
} else {
*rate2 += rate_mv;
}
@ -2549,10 +2525,10 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
// initiation of a motion field.
if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]],
mode_mv, refs[0])) {
*rate2 += MIN(cost_mv_ref(cpi, this_mode,
mbmi_ext->mode_context[refs[0]]),
cost_mv_ref(cpi, NEARESTMV,
mbmi_ext->mode_context[refs[0]]));
*rate2 += VPXMIN(cost_mv_ref(cpi, this_mode,
mbmi_ext->mode_context[refs[0]]),
cost_mv_ref(cpi, NEARESTMV,
mbmi_ext->mode_context[refs[0]]));
} else {
*rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
}
@ -2594,10 +2570,10 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
filter_cache[i] = rd;
filter_cache[SWITCHABLE_FILTERS] =
MIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
rd += rs_rd;
*mask_filter = MAX(*mask_filter, rd);
*mask_filter = VPXMAX(*mask_filter, rd);
} else {
int rate_sum = 0;
int64_t dist_sum = 0;
@ -2627,10 +2603,10 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
filter_cache[i] = rd;
filter_cache[SWITCHABLE_FILTERS] =
MIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
rd += rs_rd;
*mask_filter = MAX(*mask_filter, rd);
*mask_filter = VPXMAX(*mask_filter, rd);
if (i == 0 && intpel_mv) {
tmp_rate_sum = rate_sum;
@ -2745,7 +2721,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
assert(rate_y_tx != INT_MAX);
assert(rate_y_tx >= 0);
rdcost_tx = RDCOST(x->rdmult, x->rddiv, rate_y_tx, distortion_y_tx);
rdcost_tx = MIN(rdcost_tx, RDCOST(x->rdmult, x->rddiv, 0, *psse));
rdcost_tx = VPXMIN(rdcost_tx, RDCOST(x->rdmult, x->rddiv, 0, *psse));
assert(rdcost_tx >= 0);
if (rdcost_tx <
(best_ext_tx == NORM ? ext_tx_th : 1) * best_rdcost_tx) {
@ -2773,7 +2749,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
*distortion += distortion_y;
rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
&sseuv, bsize, ref_best_rd - rdcosty)) {
@ -2814,7 +2790,6 @@ void vp10_rd_pick_intra_mode_sb(VP10_COMP *cpi, MACROBLOCK *x,
int y_skip = 0, uv_skip = 0;
int64_t dist_y = 0, dist_uv = 0;
TX_SIZE max_uv_tx_size;
x->skip_encode = 0;
ctx->skip = 0;
xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
xd->mi[0]->mbmi.ref_frame[1] = NONE;
@ -2838,7 +2813,7 @@ void vp10_rd_pick_intra_mode_sb(VP10_COMP *cpi, MACROBLOCK *x,
pd[1].subsampling_x,
pd[1].subsampling_y);
rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, MAX(BLOCK_8X8, bsize),
&dist_uv, &uv_skip, VPXMAX(BLOCK_8X8, bsize),
max_uv_tx_size);
if (y_skip && uv_skip) {
@ -2905,12 +2880,12 @@ static void rd_variance_adjustment(VP10_COMP *cpi,
// to a predictor with a low spatial complexity compared to the source.
if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
(source_variance > recon_variance)) {
var_factor = MIN(absvar_diff, MIN(VLOW_ADJ_MAX, var_error));
var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error));
// A second possible case of interest is where the source variance
// is very low and we wish to discourage false texture or motion trails.
} else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
(recon_variance > source_variance)) {
var_factor = MIN(absvar_diff, MIN(VHIGH_ADJ_MAX, var_error));
var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error));
}
*this_rd += (*this_rd * var_factor) / 100;
}
@ -2940,7 +2915,7 @@ int vp10_active_h_edge(VP10_COMP *cpi, int mi_row, int mi_step) {
top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
bottom_edge = MAX(top_edge, bottom_edge);
bottom_edge = VPXMAX(top_edge, bottom_edge);
}
if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
@ -2967,7 +2942,7 @@ int vp10_active_v_edge(VP10_COMP *cpi, int mi_col, int mi_step) {
left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
right_edge = MAX(left_edge, right_edge);
right_edge = VPXMAX(left_edge, right_edge);
}
if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
@ -2987,14 +2962,13 @@ int vp10_active_edge_sb(VP10_COMP *cpi,
}
void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
TileDataEnc *tile_data,
MACROBLOCK *x,
int mi_row, int mi_col,
RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far) {
TileDataEnc *tile_data,
MACROBLOCK *x,
int mi_row, int mi_col,
RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far) {
VP10_COMMON *const cm = &cpi->common;
TileInfo *const tile_info = &tile_data->tile_info;
RD_OPT *const rd_opt = &cpi->rd;
SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
@ -3045,8 +3019,6 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
vp10_zero(best_mbmode);
x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
filter_cache[i] = INT64_MAX;
@ -3088,7 +3060,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
// are masked out.
ref_frame_skip_mask[0] |= (1 << ref_frame);
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
} else if (sf->reference_masking) {
} else {
for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
// Skip fixed mv modes for poor references
if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
@ -3214,7 +3186,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
}
if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
(ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame))))
(ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
continue;
if (mode_skip_mask[ref_frame] & (1 << this_mode))
@ -3227,55 +3199,6 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
if (best_rd < mode_threshold[mode_index])
continue;
if (sf->motion_field_mode_search) {
const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize],
tile_info->mi_col_end - mi_col);
const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize],
tile_info->mi_row_end - mi_row);
const int bsl = mi_width_log2_lookup[bsize];
int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl)
+ get_chessboard_index(cm->current_video_frame)) & 0x1;
MB_MODE_INFO *ref_mbmi;
int const_motion = 1;
int skip_ref_frame = !cb_partition_search_ctrl;
MV_REFERENCE_FRAME rf = NONE;
int_mv ref_mv;
ref_mv.as_int = INVALID_MV;
if ((mi_row - 1) >= tile_info->mi_row_start) {
ref_mv = xd->mi[-xd->mi_stride]->mbmi.mv[0];
rf = xd->mi[-xd->mi_stride]->mbmi.ref_frame[0];
for (i = 0; i < mi_width; ++i) {
ref_mbmi = &xd->mi[-xd->mi_stride + i]->mbmi;
const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) &&
(ref_frame == ref_mbmi->ref_frame[0]);
skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]);
}
}
if ((mi_col - 1) >= tile_info->mi_col_start) {
if (ref_mv.as_int == INVALID_MV)
ref_mv = xd->mi[-1]->mbmi.mv[0];
if (rf == NONE)
rf = xd->mi[-1]->mbmi.ref_frame[0];
for (i = 0; i < mi_height; ++i) {
ref_mbmi = &xd->mi[i * xd->mi_stride - 1]->mbmi;
const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) &&
(ref_frame == ref_mbmi->ref_frame[0]);
skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]);
}
}
if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV)
if (rf > INTRA_FRAME)
if (ref_frame != rf)
continue;
if (const_motion)
if (this_mode == NEARMV || this_mode == ZEROMV)
continue;
}
comp_pred = second_ref_frame > INTRA_FRAME;
if (comp_pred) {
if (!cpi->allow_comp_inter_inter)
@ -3455,9 +3378,9 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
if (!disable_skip && ref_frame == INTRA_FRAME) {
for (i = 0; i < REFERENCE_MODES; ++i)
best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
}
// Did this mode help.. i.e. is it the new best mode
@ -3556,7 +3479,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
adj_rd = filter_cache[i] - ref;
adj_rd += this_rd;
best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
}
}
}
@ -3694,8 +3617,6 @@ void vp10_rd_pick_inter_mode_sb_seg_skip(VP10_COMP *cpi,
int rate2 = 0;
const int64_t distortion2 = 0;
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
&comp_mode_p);
@ -3822,7 +3743,6 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
mbmi->ext_txfrm = NORM;
#endif
x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
memset(x->zcoeff_blk[TX_4X4], 0, 4);
vp10_zero(best_mbmode);
@ -3902,7 +3822,7 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
}
if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
(ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame))))
(ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
continue;
// Test best rd so far against threshold for trying this mode.
@ -4060,12 +3980,11 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
filter_cache[switchable_filter_index] = tmp_rd;
filter_cache[SWITCHABLE_FILTERS] =
MIN(filter_cache[SWITCHABLE_FILTERS],
tmp_rd + rs_rd);
VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
tmp_rd += rs_rd;
mask_filter = MAX(mask_filter, tmp_rd);
mask_filter = VPXMAX(mask_filter, tmp_rd);
newbest = (tmp_rd < tmp_best_rd);
if (newbest) {
@ -4143,8 +4062,8 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
tmp_best_rdu = best_rd -
MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
RDCOST(x->rdmult, x->rddiv, 0, total_sse));
VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
RDCOST(x->rdmult, x->rddiv, 0, total_sse));
if (tmp_best_rdu > 0) {
// If even the 'Y' rd value of split is higher than best so far
@ -4204,9 +4123,9 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
if (!disable_skip && ref_frame == INTRA_FRAME) {
for (i = 0; i < REFERENCE_MODES; ++i)
best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
}
// Did this mode help.. i.e. is it the new best mode
@ -4305,7 +4224,7 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
adj_rd = filter_cache[i] - ref;
adj_rd += this_rd;
best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
}
}

View File

@ -14,6 +14,10 @@
#include <stdio.h>
#include "vpx/vpx_integer.h"
#ifdef __cplusplus
extern "C" {
#endif
void vp10_resize_plane(const uint8_t *const input,
int height,
int width,
@ -121,4 +125,9 @@ void vp10_highbd_resize_frame444(const uint8_t *const y,
int owidth,
int bd);
#endif // CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP10_ENCODER_RESIZE_H_

View File

@ -14,11 +14,12 @@
#include "vp10/encoder/speed_features.h"
#include "vp10/encoder/rdopt.h"
#include "vpx_dsp/vpx_dsp_common.h"
// Intra only frames, golden frames (except alt ref overlays) and
// alt ref frames tend to be coded at a higher than ambient quality
static int frame_is_boosted(const VP10_COMP *cpi) {
return frame_is_kf_gf_arf(cpi) || vp10_is_upper_layer_key_frame(cpi);
return frame_is_kf_gf_arf(cpi);
}
// Sets a partition size down to which the auto partition code will always
@ -49,7 +50,7 @@ static void set_good_speed_feature_framesize_dependent(VP10_COMP *cpi,
VP10_COMMON *const cm = &cpi->common;
if (speed >= 1) {
if (MIN(cm->width, cm->height) >= 720) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->partition_search_breakout_dist_thr = (1 << 23);
@ -60,7 +61,7 @@ static void set_good_speed_feature_framesize_dependent(VP10_COMP *cpi,
}
if (speed >= 2) {
if (MIN(cm->width, cm->height) >= 720) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->adaptive_pred_interp_filter = 0;
@ -75,7 +76,7 @@ static void set_good_speed_feature_framesize_dependent(VP10_COMP *cpi,
}
if (speed >= 3) {
if (MIN(cm->width, cm->height) >= 720) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = DISABLE_ALL_SPLIT;
sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0;
sf->partition_search_breakout_dist_thr = (1 << 25);
@ -99,7 +100,7 @@ static void set_good_speed_feature_framesize_dependent(VP10_COMP *cpi,
}
if (speed >= 4) {
if (MIN(cm->width, cm->height) >= 720) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->partition_search_breakout_dist_thr = (1 << 26);
} else {
sf->partition_search_breakout_dist_thr = (1 << 24);
@ -147,9 +148,6 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
: USE_LARGESTALL;
// Reference masking is not supported in dynamic scaling mode.
sf->reference_masking = cpi->oxcf.resize_mode != RESIZE_DYNAMIC ? 1 : 0;
sf->mode_search_skip_flags = (cm->frame_type == KEY_FRAME) ? 0 :
FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
@ -191,7 +189,6 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
sf->use_lp32x32fdct = 1;
sf->use_fast_coef_updates = ONE_LOOP_REDUCED;
sf->use_fast_coef_costing = 1;
sf->motion_field_mode_search = !boosted;
sf->partition_search_breakout_rate_thr = 300;
}
@ -215,7 +212,7 @@ static void set_rt_speed_feature_framesize_dependent(VP10_COMP *cpi,
VP10_COMMON *const cm = &cpi->common;
if (speed >= 1) {
if (MIN(cm->width, cm->height) >= 720) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
} else {
@ -224,7 +221,7 @@ static void set_rt_speed_feature_framesize_dependent(VP10_COMP *cpi,
}
if (speed >= 2) {
if (MIN(cm->width, cm->height) >= 720) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
} else {
@ -233,7 +230,7 @@ static void set_rt_speed_feature_framesize_dependent(VP10_COMP *cpi,
}
if (speed >= 5) {
if (MIN(cm->width, cm->height) >= 720) {
if (VPXMIN(cm->width, cm->height) >= 720) {
sf->partition_search_breakout_dist_thr = (1 << 25);
} else {
sf->partition_search_breakout_dist_thr = (1 << 23);
@ -241,7 +238,7 @@ static void set_rt_speed_feature_framesize_dependent(VP10_COMP *cpi,
}
if (speed >= 7) {
sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ?
sf->encode_breakout_thresh = (VPXMIN(cm->width, cm->height) >= 720) ?
800 : 300;
}
}
@ -279,14 +276,6 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_INTRA_LOWVAR;
sf->adaptive_pred_interp_filter = 2;
// Disable reference masking if using spatial scaling since
// pred_mv_sad will not be set (since vp10_mv_pred will not
// be called).
// TODO(marpan/agrange): Fix this condition.
sf->reference_masking = (cpi->oxcf.resize_mode != RESIZE_DYNAMIC &&
cpi->svc.number_spatial_layers == 1) ? 1 : 0;
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
@ -302,7 +291,6 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
sf->use_square_partition_only = 1;
sf->disable_filter_search_var_thresh = 100;
sf->use_uv_intra_rd_estimate = 1;
sf->skip_encode_sb = 1;
sf->mv.subpel_iters_per_step = 1;
sf->adaptive_rd_thresh = 4;
sf->mode_skip_start = 6;
@ -348,7 +336,6 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
(frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1);
sf->max_delta_qindex = is_keyframe ? 20 : 15;
sf->partition_search_type = REFERENCE_PARTITION;
sf->use_nonrd_pick_mode = 1;
sf->allow_skip_recode = 0;
sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
@ -384,10 +371,8 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
// Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
sf->partition_search_type = VAR_BASED_PARTITION;
// Turn on this to use non-RD key frame coding mode.
sf->use_nonrd_pick_mode = 1;
sf->mv.search_method = NSTEP;
sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0;
}
if (speed >= 7) {
@ -459,10 +444,8 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
sf->adaptive_mode_search = 0;
sf->cb_pred_filter_search = 0;
sf->cb_partition_search = 0;
sf->motion_field_mode_search = 0;
sf->alt_ref_search_fp = 0;
sf->use_quant_fp = 0;
sf->reference_masking = 0;
sf->partition_search_type = SEARCH_PARTITION;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
@ -485,7 +468,6 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
sf->intra_uv_mode_mask[i] = INTRA_ALL;
}
sf->use_rd_breakout = 0;
sf->skip_encode_sb = 0;
sf->use_uv_intra_rd_estimate = 0;
sf->allow_skip_recode = 0;
sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
@ -493,7 +475,6 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
sf->use_fast_coef_costing = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
sf->schedule_mode_search = 0;
sf->use_nonrd_pick_mode = 0;
for (i = 0; i < BLOCK_SIZES; ++i)
sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_64X64;

View File

@ -141,7 +141,7 @@ typedef enum {
} INTERP_FILTER_MASK;
typedef enum {
// Search partitions using RD/NONRD criterion
// Search partitions using RD criterion
SEARCH_PARTITION,
// Always use a fixed size partition
@ -223,11 +223,6 @@ typedef struct SPEED_FEATURES {
// mode to be evaluated. A high value means we will be faster.
int adaptive_rd_thresh;
// Enables skipping the reconstruction step (idct, recon) in the
// intermediate steps assuming the last frame didn't have too many intra
// blocks and the q is less than a threshold.
int skip_encode_sb;
int skip_encode_frame;
// Speed feature to allow or disallow skipping of recode at block
// level within a frame.
int allow_skip_recode;
@ -253,9 +248,6 @@ typedef struct SPEED_FEATURES {
// of the best so far.
int mode_skip_start;
// TODO(JBB): Remove this.
int reference_masking;
PARTITION_SEARCH_TYPE partition_search_type;
// Used if partition_search_type = FIXED_SIZE_PARTITION
@ -314,8 +306,6 @@ typedef struct SPEED_FEATURES {
int cb_partition_search;
int motion_field_mode_search;
int alt_ref_search_fp;
// Fast quantization process path
@ -363,9 +353,6 @@ typedef struct SPEED_FEATURES {
// by only looking at counts from 1/2 the bands.
FAST_COEFF_UPDATE use_fast_coef_updates;
// This flag controls the use of non-RD mode decision.
int use_nonrd_pick_mode;
// A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
// modes are used in order from LSB to MSB for each BLOCK_SIZE.
int inter_mode_mask[BLOCK_SIZES];

View File

@ -1,646 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include "vp10/encoder/encoder.h"
#include "vp10/encoder/svc_layercontext.h"
#include "vp10/encoder/extend.h"
#define SMALL_FRAME_FB_IDX 7
#define SMALL_FRAME_WIDTH 16
#define SMALL_FRAME_HEIGHT 16
void vp10_init_layer_context(VP10_COMP *const cpi) {
SVC *const svc = &cpi->svc;
const VP10EncoderConfig *const oxcf = &cpi->oxcf;
int sl, tl;
int alt_ref_idx = svc->number_spatial_layers;
svc->spatial_layer_id = 0;
svc->temporal_layer_id = 0;
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img,
SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT,
cpi->common.subsampling_x,
cpi->common.subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cpi->common.use_highbitdepth,
#endif
VP9_ENC_BORDER_IN_PIXELS,
cpi->common.byte_alignment,
NULL, NULL, NULL))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate empty frame for multiple frame "
"contexts");
memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80,
cpi->svc.empty_frame.img.buffer_alloc_sz);
}
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
LAYER_CONTEXT *const lc = &svc->layer_context[layer];
RATE_CONTROL *const lrc = &lc->rc;
int i;
lc->current_video_frame_in_layer = 0;
lc->layer_size = 0;
lc->frames_from_key_frame = 0;
lc->last_frame_type = FRAME_TYPES;
lrc->ni_av_qi = oxcf->worst_allowed_q;
lrc->total_actual_bits = 0;
lrc->total_target_vs_actual = 0;
lrc->ni_tot_qi = 0;
lrc->tot_q = 0.0;
lrc->avg_q = 0.0;
lrc->ni_frames = 0;
lrc->decimation_count = 0;
lrc->decimation_factor = 0;
for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
lrc->rate_correction_factors[i] = 1.0;
}
if (cpi->oxcf.rc_mode == VPX_CBR) {
lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
} else {
lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q +
oxcf->best_allowed_q) / 2;
lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q +
oxcf->best_allowed_q) / 2;
if (oxcf->ss_enable_auto_arf[sl])
lc->alt_ref_idx = alt_ref_idx++;
else
lc->alt_ref_idx = INVALID_IDX;
lc->gold_ref_idx = INVALID_IDX;
}
lrc->buffer_level = oxcf->starting_buffer_level_ms *
lc->target_bandwidth / 1000;
lrc->bits_off_target = lrc->buffer_level;
}
}
// Still have extra buffer for base layer golden frame
if (!(svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR)
&& alt_ref_idx < REF_FRAMES)
svc->layer_context[0].gold_ref_idx = alt_ref_idx;
}
// Update the layer context from a change_config() call.
void vp10_update_layer_context_change_config(VP10_COMP *const cpi,
const int target_bandwidth) {
SVC *const svc = &cpi->svc;
const VP10EncoderConfig *const oxcf = &cpi->oxcf;
const RATE_CONTROL *const rc = &cpi->rc;
int sl, tl, layer = 0, spatial_layer_target;
float bitrate_alloc = 1.0;
if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
spatial_layer_target = 0;
for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
svc->layer_context[layer].target_bandwidth =
oxcf->layer_target_bitrate[layer];
}
layer = LAYER_IDS_TO_IDX(sl, ((oxcf->ts_number_layers - 1) < 0 ?
0 : (oxcf->ts_number_layers - 1)), oxcf->ts_number_layers);
spatial_layer_target =
svc->layer_context[layer].target_bandwidth =
oxcf->layer_target_bitrate[layer];
for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
LAYER_CONTEXT *const lc =
&svc->layer_context[sl * oxcf->ts_number_layers + tl];
RATE_CONTROL *const lrc = &lc->rc;
lc->spatial_layer_target_bandwidth = spatial_layer_target;
bitrate_alloc = (float)lc->target_bandwidth / spatial_layer_target;
lrc->starting_buffer_level =
(int64_t)(rc->starting_buffer_level * bitrate_alloc);
lrc->optimal_buffer_level =
(int64_t)(rc->optimal_buffer_level * bitrate_alloc);
lrc->maximum_buffer_size =
(int64_t)(rc->maximum_buffer_size * bitrate_alloc);
lrc->bits_off_target =
MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl];
lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
lrc->worst_quality = rc->worst_quality;
lrc->best_quality = rc->best_quality;
}
}
} else {
int layer_end;
if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
layer_end = svc->number_temporal_layers;
} else {
layer_end = svc->number_spatial_layers;
}
for (layer = 0; layer < layer_end; ++layer) {
LAYER_CONTEXT *const lc = &svc->layer_context[layer];
RATE_CONTROL *const lrc = &lc->rc;
lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
// Update buffer-related quantities.
lrc->starting_buffer_level =
(int64_t)(rc->starting_buffer_level * bitrate_alloc);
lrc->optimal_buffer_level =
(int64_t)(rc->optimal_buffer_level * bitrate_alloc);
lrc->maximum_buffer_size =
(int64_t)(rc->maximum_buffer_size * bitrate_alloc);
lrc->bits_off_target = MIN(lrc->bits_off_target,
lrc->maximum_buffer_size);
lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
// Update framerate-related quantities.
if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
} else {
lc->framerate = cpi->framerate;
}
lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
// Update qp-related quantities.
lrc->worst_quality = rc->worst_quality;
lrc->best_quality = rc->best_quality;
}
}
}
static LAYER_CONTEXT *get_layer_context(VP10_COMP *const cpi) {
if (is_one_pass_cbr_svc(cpi))
return &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id];
else
return (cpi->svc.number_temporal_layers > 1 &&
cpi->oxcf.rc_mode == VPX_CBR) ?
&cpi->svc.layer_context[cpi->svc.temporal_layer_id] :
&cpi->svc.layer_context[cpi->svc.spatial_layer_id];
}
void vp10_update_temporal_layer_framerate(VP10_COMP *const cpi) {
SVC *const svc = &cpi->svc;
const VP10EncoderConfig *const oxcf = &cpi->oxcf;
LAYER_CONTEXT *const lc = get_layer_context(cpi);
RATE_CONTROL *const lrc = &lc->rc;
// Index into spatial+temporal arrays.
const int st_idx = svc->spatial_layer_id * svc->number_temporal_layers +
svc->temporal_layer_id;
const int tl = svc->temporal_layer_id;
lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl];
lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
// Update the average layer frame size (non-cumulative per-frame-bw).
if (tl == 0) {
lc->avg_frame_size = lrc->avg_frame_bandwidth;
} else {
const double prev_layer_framerate =
cpi->framerate / oxcf->ts_rate_decimator[tl - 1];
const int prev_layer_target_bandwidth =
oxcf->layer_target_bitrate[st_idx - 1];
lc->avg_frame_size =
(int)((lc->target_bandwidth - prev_layer_target_bandwidth) /
(lc->framerate - prev_layer_framerate));
}
}
void vp10_update_spatial_layer_framerate(VP10_COMP *const cpi,
double framerate) {
const VP10EncoderConfig *const oxcf = &cpi->oxcf;
LAYER_CONTEXT *const lc = get_layer_context(cpi);
RATE_CONTROL *const lrc = &lc->rc;
lc->framerate = framerate;
lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
lrc->min_frame_bandwidth = (int)(lrc->avg_frame_bandwidth *
oxcf->two_pass_vbrmin_section / 100);
lrc->max_frame_bandwidth = (int)(((int64_t)lrc->avg_frame_bandwidth *
oxcf->two_pass_vbrmax_section) / 100);
vp10_rc_set_gf_interval_range(cpi, lrc);
}
void vp10_restore_layer_context(VP10_COMP *const cpi) {
LAYER_CONTEXT *const lc = get_layer_context(cpi);
const int old_frame_since_key = cpi->rc.frames_since_key;
const int old_frame_to_key = cpi->rc.frames_to_key;
cpi->rc = lc->rc;
cpi->twopass = lc->twopass;
cpi->oxcf.target_bandwidth = lc->target_bandwidth;
cpi->alt_ref_source = lc->alt_ref_source;
// Reset the frames_since_key and frames_to_key counters to their values
// before the layer restore. Keep these defined for the stream (not layer).
if (cpi->svc.number_temporal_layers > 1) {
cpi->rc.frames_since_key = old_frame_since_key;
cpi->rc.frames_to_key = old_frame_to_key;
}
}
void vp10_save_layer_context(VP10_COMP *const cpi) {
const VP10EncoderConfig *const oxcf = &cpi->oxcf;
LAYER_CONTEXT *const lc = get_layer_context(cpi);
lc->rc = cpi->rc;
lc->twopass = cpi->twopass;
lc->target_bandwidth = (int)oxcf->target_bandwidth;
lc->alt_ref_source = cpi->alt_ref_source;
}
void vp10_init_second_pass_spatial_svc(VP10_COMP *cpi) {
SVC *const svc = &cpi->svc;
int i;
for (i = 0; i < svc->number_spatial_layers; ++i) {
TWO_PASS *const twopass = &svc->layer_context[i].twopass;
svc->spatial_layer_id = i;
vp10_init_second_pass(cpi);
twopass->total_stats.spatial_layer_id = i;
twopass->total_left_stats.spatial_layer_id = i;
}
svc->spatial_layer_id = 0;
}
void vp10_inc_frame_in_layer(VP10_COMP *const cpi) {
LAYER_CONTEXT *const lc =
&cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers];
++lc->current_video_frame_in_layer;
++lc->frames_from_key_frame;
}
int vp10_is_upper_layer_key_frame(const VP10_COMP *const cpi) {
return is_two_pass_svc(cpi) &&
cpi->svc.spatial_layer_id > 0 &&
cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id].is_key_frame;
}
static void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den,
int *width_out, int *height_out) {
int w, h;
if (width_out == NULL || height_out == NULL || den == 0)
return;
w = width_org * num / den;
h = height_org * num / den;
// make height and width even to make chrome player happy
w += w % 2;
h += h % 2;
*width_out = w;
*height_out = h;
}
// The function sets proper ref_frame_flags, buffer indices, and buffer update
// variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering
// scheme.
static void set_flags_and_fb_idx_for_temporal_mode3(VP10_COMP *const cpi) {
int frame_num_within_temporal_struct = 0;
int spatial_id, temporal_id;
spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
frame_num_within_temporal_struct =
cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers].current_video_frame_in_layer % 4;
temporal_id = cpi->svc.temporal_layer_id =
(frame_num_within_temporal_struct & 1) ? 2 :
(frame_num_within_temporal_struct >> 1);
cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame =
cpi->ext_refresh_alt_ref_frame = 0;
if (!temporal_id) {
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ext_refresh_last_frame = 1;
if (!spatial_id) {
cpi->ref_frame_flags = VP9_LAST_FLAG;
} else if (cpi->svc.layer_context[temporal_id].is_key_frame) {
// base layer is a key frame.
cpi->ref_frame_flags = VP9_GOLD_FLAG;
} else {
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
} else if (temporal_id == 1) {
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ext_refresh_alt_ref_frame = 1;
if (!spatial_id) {
cpi->ref_frame_flags = VP9_LAST_FLAG;
} else {
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
} else {
if (frame_num_within_temporal_struct == 1) {
// the first tl2 picture
if (!spatial_id) {
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ext_refresh_alt_ref_frame = 1;
cpi->ref_frame_flags = VP9_LAST_FLAG;
} else if (spatial_id < cpi->svc.number_spatial_layers - 1) {
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ext_refresh_alt_ref_frame = 1;
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
} else { // Top layer
cpi->ext_refresh_frame_flags_pending = 0;
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
} else {
// The second tl2 picture
if (!spatial_id) {
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ref_frame_flags = VP9_LAST_FLAG;
cpi->ext_refresh_last_frame = 1;
} else if (spatial_id < cpi->svc.number_spatial_layers - 1) {
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
cpi->ext_refresh_last_frame = 1;
} else { // top layer
cpi->ext_refresh_frame_flags_pending = 0;
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
}
}
if (temporal_id == 0) {
cpi->lst_fb_idx = spatial_id;
if (spatial_id)
cpi->gld_fb_idx = spatial_id - 1;
else
cpi->gld_fb_idx = 0;
cpi->alt_fb_idx = 0;
} else if (temporal_id == 1) {
cpi->lst_fb_idx = spatial_id;
cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
} else if (frame_num_within_temporal_struct == 1) {
cpi->lst_fb_idx = spatial_id;
cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
} else {
cpi->lst_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
cpi->alt_fb_idx = 0;
}
}
// The function sets proper ref_frame_flags, buffer indices, and buffer update
// variables for temporal layering mode 2 - that does 0-1-0-1 temporal layering
// scheme.
static void set_flags_and_fb_idx_for_temporal_mode2(VP10_COMP *const cpi) {
int spatial_id, temporal_id;
spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
temporal_id = cpi->svc.temporal_layer_id =
cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers].current_video_frame_in_layer & 1;
cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame =
cpi->ext_refresh_alt_ref_frame = 0;
if (!temporal_id) {
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ext_refresh_last_frame = 1;
if (!spatial_id) {
cpi->ref_frame_flags = VP9_LAST_FLAG;
} else if (cpi->svc.layer_context[temporal_id].is_key_frame) {
// base layer is a key frame.
cpi->ref_frame_flags = VP9_GOLD_FLAG;
} else {
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
} else if (temporal_id == 1) {
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ext_refresh_alt_ref_frame = 1;
if (!spatial_id) {
cpi->ref_frame_flags = VP9_LAST_FLAG;
} else {
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
}
if (temporal_id == 0) {
cpi->lst_fb_idx = spatial_id;
if (spatial_id)
cpi->gld_fb_idx = spatial_id - 1;
else
cpi->gld_fb_idx = 0;
cpi->alt_fb_idx = 0;
} else if (temporal_id == 1) {
cpi->lst_fb_idx = spatial_id;
cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
}
}
// The function sets proper ref_frame_flags, buffer indices, and buffer update
// variables for temporal layering mode 0 - that has no temporal layering.
static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
VP10_COMP *const cpi) {
int spatial_id;
spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
cpi->ext_refresh_last_frame =
cpi->ext_refresh_golden_frame = cpi->ext_refresh_alt_ref_frame = 0;
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ext_refresh_last_frame = 1;
if (!spatial_id) {
cpi->ref_frame_flags = VP9_LAST_FLAG;
} else if (cpi->svc.layer_context[0].is_key_frame) {
cpi->ref_frame_flags = VP9_GOLD_FLAG;
} else {
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
cpi->lst_fb_idx = spatial_id;
if (spatial_id)
cpi->gld_fb_idx = spatial_id - 1;
else
cpi->gld_fb_idx = 0;
}
int vp10_one_pass_cbr_svc_start_layer(VP10_COMP *const cpi) {
int width = 0, height = 0;
LAYER_CONTEXT *lc = NULL;
if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
set_flags_and_fb_idx_for_temporal_mode3(cpi);
} else if (cpi->svc.temporal_layering_mode ==
VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi);
} else if (cpi->svc.temporal_layering_mode ==
VP9E_TEMPORAL_LAYERING_MODE_0101) {
set_flags_and_fb_idx_for_temporal_mode2(cpi);
} else if (cpi->svc.temporal_layering_mode ==
VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
// VP9E_TEMPORAL_LAYERING_MODE_BYPASS :
// if the code goes here, it means the encoder will be relying on the
// flags from outside for layering.
// However, since when spatial+temporal layering is used, the buffer indices
// cannot be derived automatically, the bypass mode will only work when the
// number of spatial layers equals 1.
assert(cpi->svc.number_spatial_layers == 1);
}
lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id];
get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
lc->scaling_factor_num, lc->scaling_factor_den,
&width, &height);
if (vp10_set_size_literal(cpi, width, height) != 0)
return VPX_CODEC_INVALID_PARAM;
return 0;
}
#if CONFIG_SPATIAL_SVC
int vp10_svc_start_frame(VP10_COMP *const cpi) {
int width = 0, height = 0;
LAYER_CONTEXT *lc;
struct lookahead_entry *buf;
int count = 1 << (cpi->svc.number_temporal_layers - 1);
cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
cpi->svc.temporal_layer_id = 0;
while ((lc->current_video_frame_in_layer % count) != 0) {
++cpi->svc.temporal_layer_id;
count >>= 1;
}
cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
cpi->lst_fb_idx = cpi->svc.spatial_layer_id;
if (cpi->svc.spatial_layer_id == 0)
cpi->gld_fb_idx = (lc->gold_ref_idx >= 0) ?
lc->gold_ref_idx : cpi->lst_fb_idx;
else
cpi->gld_fb_idx = cpi->svc.spatial_layer_id - 1;
if (lc->current_video_frame_in_layer == 0) {
if (cpi->svc.spatial_layer_id >= 2) {
cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2;
} else {
cpi->alt_fb_idx = cpi->lst_fb_idx;
cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_ALT_FLAG);
}
} else {
if (cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id]) {
cpi->alt_fb_idx = lc->alt_ref_idx;
if (!lc->has_alt_frame)
cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
} else {
// Find a proper alt_fb_idx for layers that don't have alt ref frame
if (cpi->svc.spatial_layer_id == 0) {
cpi->alt_fb_idx = cpi->lst_fb_idx;
} else {
LAYER_CONTEXT *lc_lower =
&cpi->svc.layer_context[cpi->svc.spatial_layer_id - 1];
if (cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id - 1] &&
lc_lower->alt_ref_source != NULL)
cpi->alt_fb_idx = lc_lower->alt_ref_idx;
else if (cpi->svc.spatial_layer_id >= 2)
cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2;
else
cpi->alt_fb_idx = cpi->lst_fb_idx;
}
}
}
get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
lc->scaling_factor_num, lc->scaling_factor_den,
&width, &height);
// Workaround for multiple frame contexts. In some frames we can't use prev_mi
// since its previous frame could be changed during decoding time. The idea is
// we put a empty invisible frame in front of them, then we will not use
// prev_mi when encoding these frames.
buf = vp10_lookahead_peek(cpi->lookahead, 0);
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2 &&
cpi->svc.encode_empty_frame_state == NEED_TO_ENCODE &&
lc->rc.frames_to_key != 0 &&
!(buf != NULL && (buf->flags & VPX_EFLAG_FORCE_KF))) {
if ((cpi->svc.number_temporal_layers > 1 &&
cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1) ||
(cpi->svc.number_spatial_layers > 1 &&
cpi->svc.spatial_layer_id == 0)) {
struct lookahead_entry *buf = vp10_lookahead_peek(cpi->lookahead, 0);
if (buf != NULL) {
cpi->svc.empty_frame.ts_start = buf->ts_start;
cpi->svc.empty_frame.ts_end = buf->ts_end;
cpi->svc.encode_empty_frame_state = ENCODING;
cpi->common.show_frame = 0;
cpi->ref_frame_flags = 0;
cpi->common.frame_type = INTER_FRAME;
cpi->lst_fb_idx =
cpi->gld_fb_idx = cpi->alt_fb_idx = SMALL_FRAME_FB_IDX;
if (cpi->svc.encode_intra_empty_frame != 0)
cpi->common.intra_only = 1;
width = SMALL_FRAME_WIDTH;
height = SMALL_FRAME_HEIGHT;
}
}
}
cpi->oxcf.worst_allowed_q = vp10_quantizer_to_qindex(lc->max_q);
cpi->oxcf.best_allowed_q = vp10_quantizer_to_qindex(lc->min_q);
vp10_change_config(cpi, &cpi->oxcf);
if (vp10_set_size_literal(cpi, width, height) != 0)
return VPX_CODEC_INVALID_PARAM;
vp10_set_high_precision_mv(cpi, 1);
cpi->alt_ref_source = get_layer_context(cpi)->alt_ref_source;
return 0;
}
#endif
struct lookahead_entry *vp10_svc_lookahead_pop(VP10_COMP *const cpi,
struct lookahead_ctx *ctx,
int drain) {
struct lookahead_entry *buf = NULL;
if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
buf = vp10_lookahead_peek(ctx, 0);
if (buf != NULL) {
// Only remove the buffer when pop the highest layer.
if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {
vp10_lookahead_pop(ctx, drain);
}
}
}
return buf;
}

View File

@ -1,122 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_SVC_LAYERCONTEXT_H_
#define VP10_ENCODER_SVC_LAYERCONTEXT_H_
#include "vpx/vpx_encoder.h"
#include "vp10/encoder/ratectrl.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
RATE_CONTROL rc;
int target_bandwidth;
int spatial_layer_target_bandwidth; // Target for the spatial layer.
double framerate;
int avg_frame_size;
int max_q;
int min_q;
int scaling_factor_num;
int scaling_factor_den;
TWO_PASS twopass;
vpx_fixed_buf_t rc_twopass_stats_in;
unsigned int current_video_frame_in_layer;
int is_key_frame;
int frames_from_key_frame;
FRAME_TYPE last_frame_type;
struct lookahead_entry *alt_ref_source;
int alt_ref_idx;
int gold_ref_idx;
int has_alt_frame;
size_t layer_size;
struct vpx_psnr_pkt psnr_pkt;
} LAYER_CONTEXT;
typedef struct {
int spatial_layer_id;
int temporal_layer_id;
int number_spatial_layers;
int number_temporal_layers;
int spatial_layer_to_encode;
// Workaround for multiple frame contexts
enum {
ENCODED = 0,
ENCODING,
NEED_TO_ENCODE
}encode_empty_frame_state;
struct lookahead_entry empty_frame;
int encode_intra_empty_frame;
// Store scaled source frames to be used for temporal filter to generate
// a alt ref frame.
YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS];
// Layer context used for rate control in one pass temporal CBR mode or
// two pass spatial mode.
LAYER_CONTEXT layer_context[VPX_MAX_LAYERS];
// Indicates what sort of temporal layering is used.
// Currently, this only works for CBR mode.
VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
} SVC;
struct VP10_COMP;
// Initialize layer context data from init_config().
void vp10_init_layer_context(struct VP10_COMP *const cpi);
// Update the layer context from a change_config() call.
void vp10_update_layer_context_change_config(struct VP10_COMP *const cpi,
const int target_bandwidth);
// Prior to encoding the frame, update framerate-related quantities
// for the current temporal layer.
void vp10_update_temporal_layer_framerate(struct VP10_COMP *const cpi);
// Update framerate-related quantities for the current spatial layer.
void vp10_update_spatial_layer_framerate(struct VP10_COMP *const cpi,
double framerate);
// Prior to encoding the frame, set the layer context, for the current layer
// to be encoded, to the cpi struct.
void vp10_restore_layer_context(struct VP10_COMP *const cpi);
// Save the layer context after encoding the frame.
void vp10_save_layer_context(struct VP10_COMP *const cpi);
// Initialize second pass rc for spatial svc.
void vp10_init_second_pass_spatial_svc(struct VP10_COMP *cpi);
// Increment number of video frames in layer
void vp10_inc_frame_in_layer(struct VP10_COMP *const cpi);
// Check if current layer is key frame in spatial upper layer
int vp10_is_upper_layer_key_frame(const struct VP10_COMP *const cpi);
// Get the next source buffer to encode
struct lookahead_entry *vp10_svc_lookahead_pop(struct VP10_COMP *const cpi,
struct lookahead_ctx *ctx,
int drain);
// Start a frame and initialize svc parameters
int vp10_svc_start_frame(struct VP10_COMP *const cpi);
int vp10_one_pass_cbr_svc_start_layer(struct VP10_COMP *const cpi);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP10_ENCODER_SVC_LAYERCONTEXT_

View File

@ -23,6 +23,7 @@
#include "vp10/encoder/ratectrl.h"
#include "vp10/encoder/segmentation.h"
#include "vp10/encoder/temporal_filter.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/vpx_timer.h"
@ -242,7 +243,7 @@ static int temporal_filter_find_matching_mb_c(VP10_COMP *cpi,
xd->plane[0].pre[0].stride = stride;
step_param = mv_sf->reduce_first_step_size;
step_param = MIN(step_param, MAX_MVSEARCH_STEPS - 2);
step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
// Ignore mv costing by sending NULL pointer instead of cost arrays
vp10_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
@ -652,9 +653,7 @@ static void adjust_arnr_filter(VP10_COMP *cpi,
}
void vp10_temporal_filter(VP10_COMP *cpi, int distance) {
VP10_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
int frame;
int frames_to_blur;
int start_frame;
@ -681,67 +680,21 @@ void vp10_temporal_filter(VP10_COMP *cpi, int distance) {
if (frames_to_blur > 0) {
// Setup scaling factors. Scaling on each of the arnr frames is not
// supported.
if (cpi->use_svc) {
// In spatial svc the scaling factors might be less then 1/2.
// So we will use non-normative scaling.
int frame_used = 0;
// ARF is produced at the native frame size and resized when coded.
#if CONFIG_VP9_HIGHBITDEPTH
vp10_setup_scale_factors_for_frame(
&sf,
get_frame_new_buffer(cm)->y_crop_width,
get_frame_new_buffer(cm)->y_crop_height,
get_frame_new_buffer(cm)->y_crop_width,
get_frame_new_buffer(cm)->y_crop_height,
cm->use_highbitdepth);
vp10_setup_scale_factors_for_frame(&sf,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
cpi->common.use_highbitdepth);
#else
vp10_setup_scale_factors_for_frame(
&sf,
get_frame_new_buffer(cm)->y_crop_width,
get_frame_new_buffer(cm)->y_crop_height,
get_frame_new_buffer(cm)->y_crop_width,
get_frame_new_buffer(cm)->y_crop_height);
vp10_setup_scale_factors_for_frame(&sf,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
frames[0]->y_crop_width,
frames[0]->y_crop_height);
#endif // CONFIG_VP9_HIGHBITDEPTH
for (frame = 0; frame < frames_to_blur; ++frame) {
if (cm->mi_cols * MI_SIZE != frames[frame]->y_width ||
cm->mi_rows * MI_SIZE != frames[frame]->y_height) {
if (vpx_realloc_frame_buffer(&cpi->svc.scaled_frames[frame_used],
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth,
#endif
VP9_ENC_BORDER_IN_PIXELS,
cm->byte_alignment,
NULL, NULL, NULL)) {
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to reallocate alt_ref_buffer");
}
frames[frame] = vp10_scale_if_required(
cm, frames[frame], &cpi->svc.scaled_frames[frame_used]);
++frame_used;
}
}
cm->mi = cm->mip + cm->mi_stride + 1;
xd->mi = cm->mi_grid_visible;
xd->mi[0] = cm->mi;
} else {
// ARF is produced at the native frame size and resized when coded.
#if CONFIG_VP9_HIGHBITDEPTH
vp10_setup_scale_factors_for_frame(&sf,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
cm->use_highbitdepth);
#else
vp10_setup_scale_factors_for_frame(&sf,
frames[0]->y_crop_width,
frames[0]->y_crop_height,
frames[0]->y_crop_width,
frames[0]->y_crop_height);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
temporal_filter_iterate_c(cpi, frames, frames_to_blur,

View File

@ -19,7 +19,6 @@ VP10_COMMON_SRCS-yes += common/entropymode.c
VP10_COMMON_SRCS-yes += common/entropymv.c
VP10_COMMON_SRCS-yes += common/frame_buffers.c
VP10_COMMON_SRCS-yes += common/frame_buffers.h
VP10_COMMON_SRCS-yes += common/idct.c
VP10_COMMON_SRCS-yes += common/alloccommon.h
VP10_COMMON_SRCS-yes += common/blockd.h
VP10_COMMON_SRCS-yes += common/common.h
@ -30,6 +29,9 @@ VP10_COMMON_SRCS-yes += common/enums.h
VP10_COMMON_SRCS-yes += common/filter.h
VP10_COMMON_SRCS-yes += common/filter.c
VP10_COMMON_SRCS-yes += common/idct.h
VP10_COMMON_SRCS-yes += common/idct.c
VP10_COMMON_SRCS-yes += common/vp10_inv_txfm.h
VP10_COMMON_SRCS-yes += common/vp10_inv_txfm.c
VP10_COMMON_SRCS-yes += common/loopfilter.h
VP10_COMMON_SRCS-yes += common/thread_common.h
VP10_COMMON_SRCS-yes += common/mv.h
@ -59,6 +61,8 @@ VP10_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c
VP10_COMMON_SRCS-yes += common/common_data.h
VP10_COMMON_SRCS-yes += common/scan.c
VP10_COMMON_SRCS-yes += common/scan.h
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.h
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.c
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c
@ -85,10 +89,16 @@ VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/mfqe_msa.c
endif
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_intrin_sse2.c
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm_sse2.c
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_dct32x32_impl_sse2.h
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm_impl_sse2.h
ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP10_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iht4x4_add_neon.c
VP10_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iht8x8_add_neon.c
endif
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_inv_txfm_sse2.c
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_inv_txfm_sse2.h
$(eval $(call rtcd_h_template,vp10_rtcd,vp10/common/vp10_rtcd_defs.pl))

Some files were not shown because too many files have changed in this diff Show More