diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh index f1cc04ea5..3653309ed 100755 --- a/build/make/gen_msvs_proj.sh +++ b/build/make/gen_msvs_proj.sh @@ -137,7 +137,9 @@ for opt in "$@"; do ;; --lib) proj_kind="lib" ;; - --src-path-bare=*) src_path_bare=$(fix_path "$optval") + --src-path-bare=*) + src_path_bare=$(fix_path "$optval") + src_path_bare=${src_path_bare%/} ;; --static-crt) use_static_runtime=true ;; @@ -151,9 +153,9 @@ for opt in "$@"; do esac ;; -I*) - opt="${opt%/}" opt=${opt##-I} opt=$(fix_path "$opt") + opt="${opt%/}" incs="${incs}${incs:+;}"${opt}"" yasmincs="${yasmincs} -I"${opt}"" ;; @@ -414,7 +416,7 @@ generate_vcproj() { vpx) tag Tool \ Name="VCPreBuildEventTool" \ - CommandLine="call obj_int_extract.bat $src_path_bare $plat_no_ws\\\$(ConfigurationName)" \ + CommandLine="call obj_int_extract.bat "$src_path_bare" $plat_no_ws\\\$(ConfigurationName)" \ tag Tool \ Name="VCCLCompilerTool" \ diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh index eee354dc7..23ef6a320 100755 --- a/build/make/gen_msvs_vcxproj.sh +++ b/build/make/gen_msvs_vcxproj.sh @@ -157,7 +157,9 @@ for opt in "$@"; do ;; --lib) proj_kind="lib" ;; - --src-path-bare=*) src_path_bare=$(fix_path "$optval") + --src-path-bare=*) + src_path_bare=$(fix_path "$optval") + src_path_bare=${src_path_bare%/} ;; --static-crt) use_static_runtime=true ;; @@ -173,9 +175,9 @@ for opt in "$@"; do esac ;; -I*) - opt="${opt%/}" opt=${opt##-I} opt=$(fix_path "$opt") + opt="${opt%/}" incs="${incs}${incs:+;}"${opt}"" yasmincs="${yasmincs} -I"${opt}"" ;; diff --git a/build/make/iosbuild.sh b/build/make/iosbuild.sh index 230c0ce8c..9d9c3749b 100755 --- a/build/make/iosbuild.sh +++ b/build/make/iosbuild.sh @@ -50,9 +50,79 @@ build_target() { vlog "***Done building target: ${target}***" } +# Returns the preprocessor symbol for the target specified by $1. +target_to_preproc_symbol() { + target="$1" + case "${target}" in + armv6-*) + echo "__ARM_ARCH_6__" + ;; + armv7-*) + echo "__ARM_ARCH_7__" + ;; + armv7s-*) + echo "__ARM_ARCH_7S__" + ;; + x86-*) + echo "__i386__" + ;; + x86_64-*) + echo "__x86_64__" + ;; + *) + echo "#error ${target} unknown/unsupported" + return 1 + ;; + esac +} + +# Create a vpx_config.h shim that, based on preprocessor settings for the +# current target CPU, includes the real vpx_config.h for the current target. +# $1 is the list of targets. +create_vpx_framework_config_shim() { + local targets="$1" + local config_file="${HEADER_DIR}/vpx_config.h" + local preproc_symbol="" + local target="" + local include_guard="VPX_FRAMEWORK_HEADERS_VPX_VPX_CONFIG_H_" + + local file_header="/* + * Copyright (c) $(date +%Y) The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* GENERATED FILE: DO NOT EDIT! */ + +#ifndef ${include_guard} +#define ${include_guard} + +#if defined" + + printf "%s" "${file_header}" > "${config_file}" + for target in ${targets}; do + preproc_symbol=$(target_to_preproc_symbol "${target}") + printf " ${preproc_symbol}\n" >> "${config_file}" + printf "#include \"VPX/vpx/${target}/vpx_config.h\"\n" >> "${config_file}" + printf "#elif defined" >> "${config_file}" + mkdir "${HEADER_DIR}/${target}" + cp -p "${BUILD_ROOT}/${target}/vpx_config.h" "${HEADER_DIR}/${target}" + done + + # Consume the last line of output from the loop: We don't want it. + sed -i '' -e '$d' "${config_file}" + + printf "#endif\n\n" >> "${config_file}" + printf "#endif // ${include_guard}" >> "${config_file}" +} + # Configures and builds each target specified by $1, and then builds # VPX.framework. -build_targets() { +build_framework() { local lib_list="" local targets="$1" local target="" @@ -75,15 +145,20 @@ build_targets() { cd "${ORIG_PWD}" - # Includes are identical for all platforms, and according to dist target - # behavior vpx_config.h and vpx_version.h aren't actually necessary for user - # apps built with libvpx. So, just copy the includes from the last target - # built. - # TODO(tomfinegan): The above is a lame excuse. Build common config/version - # includes that use the preprocessor to include the correct file. + # The basic libvpx API includes are all the same; just grab the most recent + # set. cp -p "${target_dist_dir}"/include/vpx/* "${HEADER_DIR}" + + # Build the fat library. ${LIPO} -create ${lib_list} -output ${FRAMEWORK_DIR}/VPX + # Create the vpx_config.h shim that allows usage of vpx_config.h from + # within VPX.framework. + create_vpx_framework_config_shim "${targets}" + + # Copy in vpx_version.h. + cp -p "${BUILD_ROOT}/${target}/vpx_version.h" "${HEADER_DIR}" + vlog "Created fat library ${FRAMEWORK_DIR}/VPX containing:" for lib in ${lib_list}; do vlog " $(echo ${lib} | awk -F / '{print $2, $NF}')" @@ -166,4 +241,4 @@ cat << EOF EOF fi -build_targets "${TARGETS}" +build_framework "${TARGETS}" diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c index 5bc657576..8c87b2a44 100644 --- a/examples/vp9_spatial_svc_encoder.c +++ b/examples/vp9_spatial_svc_encoder.c @@ -296,6 +296,7 @@ int main(int argc, const char **argv) { int frame_duration = 1; /* 1 timebase tick per frame */ FILE *infile = NULL; int end_of_stream = 0; + int frame_size; memset(&svc_ctx, 0, sizeof(svc_ctx)); svc_ctx.log_print = 1; @@ -351,11 +352,10 @@ int main(int argc, const char **argv) { die_codec(&codec, "Failed to encode frame"); } if (!(app_input.passes == 2 && app_input.pass == 1)) { - if (vpx_svc_get_frame_size(&svc_ctx) > 0) { + while ((frame_size = vpx_svc_get_frame_size(&svc_ctx)) > 0) { vpx_video_writer_write_frame(writer, vpx_svc_get_buffer(&svc_ctx), - vpx_svc_get_frame_size(&svc_ctx), - pts); + frame_size, pts); } } if (vpx_svc_get_rc_stats_buffer_size(&svc_ctx) > 0) { diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc index e667d1dd0..8bea4ccf9 100644 --- a/test/decode_test_driver.cc +++ b/test/decode_test_driver.cc @@ -15,13 +15,27 @@ namespace libvpx_test { +const char kVP8Name[] = "WebM Project VP8"; + +vpx_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size, + vpx_codec_stream_info_t *stream_info) { + return vpx_codec_peek_stream_info(CodecInterface(), + cxdata, static_cast(size), + stream_info); +} + vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) { + return DecodeFrame(cxdata, size, NULL); +} + +vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size, + void *user_priv) { vpx_codec_err_t res_dec; InitOnce(); REGISTER_STATE_CHECK( res_dec = vpx_codec_decode(&decoder_, cxdata, static_cast(size), - NULL, 0)); + user_priv, 0)); return res_dec; } @@ -29,13 +43,37 @@ void DecoderTest::RunLoop(CompressedVideoSource *video) { vpx_codec_dec_cfg_t dec_cfg = {0}; Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0); ASSERT_TRUE(decoder != NULL); + const char *codec_name = decoder->GetDecoderName(); + const bool is_vp8 = strncmp(kVP8Name, codec_name, sizeof(kVP8Name) - 1) == 0; // Decode frames. - for (video->Begin(); video->cxdata(); video->Next()) { + for (video->Begin(); !::testing::Test::HasFailure() && video->cxdata(); + video->Next()) { PreDecodeFrameHook(*video, decoder); + + vpx_codec_stream_info_t stream_info; + stream_info.sz = sizeof(stream_info); + const vpx_codec_err_t res_peek = decoder->PeekStream(video->cxdata(), + video->frame_size(), + &stream_info); + if (is_vp8) { + /* Vp8's implementation of PeekStream returns an error if the frame you + * pass it is not a keyframe, so we only expect VPX_CODEC_OK on the first + * frame, which must be a keyframe. */ + if (video->frame_number() == 0) + ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: " + << vpx_codec_err_to_string(res_peek); + } else { + /* The Vp9 implementation of PeekStream returns an error only if the + * data passed to it isn't a valid Vp9 chunk. */ + ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: " + << vpx_codec_err_to_string(res_peek); + } + vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(), video->frame_size()); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError(); + if (!HandleDecodeResult(res_dec, *video, decoder)) + break; DxDataIterator dec_iter = decoder->GetDxData(); const vpx_image_t *img = NULL; diff --git a/test/decode_test_driver.h b/test/decode_test_driver.h index 2734a45f1..dd3593e1e 100644 --- a/test/decode_test_driver.h +++ b/test/decode_test_driver.h @@ -49,8 +49,14 @@ class Decoder { vpx_codec_destroy(&decoder_); } + vpx_codec_err_t PeekStream(const uint8_t *cxdata, size_t size, + vpx_codec_stream_info_t *stream_info); + vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size); + vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size, + void *user_priv); + DxDataIterator GetDxData() { return DxDataIterator(&decoder_); } @@ -85,6 +91,10 @@ class Decoder { &decoder_, cb_get, cb_release, user_priv); } + const char* GetDecoderName() { + return vpx_codec_iface_name(CodecInterface()); + } + protected: virtual vpx_codec_iface_t* CodecInterface() const = 0; @@ -114,6 +124,14 @@ class DecoderTest { virtual void PreDecodeFrameHook(const CompressedVideoSource& video, Decoder *decoder) {} + // Hook to be called to handle decode result. Return true to continue. + virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec, + const CompressedVideoSource& /* video */, + Decoder *decoder) { + EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError(); + return VPX_CODEC_OK == res_dec; + } + // Hook to be called on every decompressed frame. virtual void DecompressedFrameHook(const vpx_image_t& img, const unsigned int frame_number) {} diff --git a/test/invalid_file_test.cc b/test/invalid_file_test.cc new file mode 100644 index 000000000..493365847 --- /dev/null +++ b/test/invalid_file_test.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "./vpx_config.h" +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/ivf_video_source.h" +#include "test/util.h" +#if CONFIG_WEBM_IO +#include "test/webm_video_source.h" +#endif +#include "vpx_mem/vpx_mem.h" + +namespace { + +class InvalidFileTest + : public ::libvpx_test::DecoderTest, + public ::libvpx_test::CodecTestWithParam { + protected: + InvalidFileTest() : DecoderTest(GET_PARAM(0)), res_file_(NULL) {} + + virtual ~InvalidFileTest() { + if (res_file_ != NULL) + fclose(res_file_); + } + + void OpenResFile(const std::string &res_file_name_) { + res_file_ = libvpx_test::OpenTestDataFile(res_file_name_); + ASSERT_TRUE(res_file_ != NULL) << "Result file open failed. Filename: " + << res_file_name_; + } + + virtual bool HandleDecodeResult( + const vpx_codec_err_t res_dec, + const libvpx_test::CompressedVideoSource &video, + libvpx_test::Decoder *decoder) { + EXPECT_TRUE(res_file_ != NULL); + int expected_res_dec; + + // Read integer result. + const int res = fscanf(res_file_, "%d", &expected_res_dec); + EXPECT_NE(res, EOF) << "Read result data failed"; + + // Check results match. + EXPECT_EQ(expected_res_dec, res_dec) + << "Results don't match: frame number = " << video.frame_number(); + + return !HasFailure(); + } + + private: + FILE *res_file_; +}; + +TEST_P(InvalidFileTest, ReturnCode) { + const std::string filename = GET_PARAM(1); + libvpx_test::CompressedVideoSource *video = NULL; + + // Open compressed video file. + if (filename.substr(filename.length() - 3, 3) == "ivf") { + video = new libvpx_test::IVFVideoSource(filename); + } else if (filename.substr(filename.length() - 4, 4) == "webm") { +#if CONFIG_WEBM_IO + video = new libvpx_test::WebMVideoSource(filename); +#else + fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n", + filename.c_str()); + return; +#endif + } + video->Init(); + + // Construct result file name. The file holds a list of expected integer + // results, one for each decoded frame. Any result that doesn't match + // the files list will cause a test failure. + const std::string res_filename = filename + ".res"; + OpenResFile(res_filename); + + // Decode frame, and check the md5 matching. + ASSERT_NO_FATAL_FAILURE(RunLoop(video)); + delete video; +} + +const char *const kVP9InvalidFileTests[] = { + "invalid-vp90-01.webm", + "invalid-vp90-02.webm", + "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf", +}; + +#define NELEMENTS(x) static_cast(sizeof(x) / sizeof(x[0])) + +VP9_INSTANTIATE_TEST_CASE(InvalidFileTest, + ::testing::ValuesIn(kVP9InvalidFileTests, + kVP9InvalidFileTests + + NELEMENTS(kVP9InvalidFileTests))); + +} // namespace diff --git a/test/svc_test.cc b/test/svc_test.cc index db26a8e9d..f831e751c 100644 --- a/test/svc_test.cc +++ b/test/svc_test.cc @@ -265,9 +265,17 @@ TEST_F(SvcTest, FirstFrameHasLayers) { video.duration(), VPX_DL_GOOD_QUALITY); EXPECT_EQ(VPX_CODEC_OK, res); + if (vpx_svc_get_frame_size(&svc_) == 0) { + // Flush encoder + res = vpx_svc_encode(&svc_, &codec_, NULL, 0, + video.duration(), VPX_DL_GOOD_QUALITY); + EXPECT_EQ(VPX_CODEC_OK, res); + } + + int frame_size = vpx_svc_get_frame_size(&svc_); + EXPECT_GT(frame_size, 0); const vpx_codec_err_t res_dec = decoder_->DecodeFrame( - static_cast(vpx_svc_get_buffer(&svc_)), - vpx_svc_get_frame_size(&svc_)); + static_cast(vpx_svc_get_buffer(&svc_)), frame_size); // this test fails with a decoder error ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); @@ -277,6 +285,9 @@ TEST_F(SvcTest, EncodeThreeFrames) { svc_.spatial_layers = 2; vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); vpx_svc_set_quantizers(&svc_, "40,30", 0); + int decoded_frames = 0; + vpx_codec_err_t res_dec; + int frame_size; vpx_codec_err_t res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); @@ -291,13 +302,14 @@ TEST_F(SvcTest, EncodeThreeFrames) { // This frame is a keyframe. res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), video.duration(), VPX_DL_GOOD_QUALITY); - ASSERT_EQ(VPX_CODEC_OK, res); - EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_)); - vpx_codec_err_t res_dec = decoder_->DecodeFrame( - static_cast(vpx_svc_get_buffer(&svc_)), - vpx_svc_get_frame_size(&svc_)); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { + EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); + res_dec = decoder_->DecodeFrame( + static_cast(vpx_svc_get_buffer(&svc_)), frame_size); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + ++decoded_frames; + } // FRAME 1 video.Next(); @@ -305,12 +317,14 @@ TEST_F(SvcTest, EncodeThreeFrames) { res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), video.duration(), VPX_DL_GOOD_QUALITY); ASSERT_EQ(VPX_CODEC_OK, res); - EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_)); - res_dec = decoder_->DecodeFrame( - static_cast(vpx_svc_get_buffer(&svc_)), - vpx_svc_get_frame_size(&svc_)); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { + EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); + res_dec = decoder_->DecodeFrame( + static_cast(vpx_svc_get_buffer(&svc_)), frame_size); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + ++decoded_frames; + } // FRAME 2 video.Next(); @@ -318,12 +332,29 @@ TEST_F(SvcTest, EncodeThreeFrames) { res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), video.duration(), VPX_DL_GOOD_QUALITY); ASSERT_EQ(VPX_CODEC_OK, res); - EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_)); - res_dec = decoder_->DecodeFrame( - static_cast(vpx_svc_get_buffer(&svc_)), - vpx_svc_get_frame_size(&svc_)); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { + EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); + res_dec = decoder_->DecodeFrame( + static_cast(vpx_svc_get_buffer(&svc_)), frame_size); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + ++decoded_frames; + } + + // Flush encoder + res = vpx_svc_encode(&svc_, &codec_, NULL, 0, + video.duration(), VPX_DL_GOOD_QUALITY); + EXPECT_EQ(VPX_CODEC_OK, res); + + while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { + EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); + res_dec = decoder_->DecodeFrame( + static_cast(vpx_svc_get_buffer(&svc_)), frame_size); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + ++decoded_frames; + } + + EXPECT_EQ(decoded_frames, 3); } TEST_F(SvcTest, GetLayerResolution) { @@ -413,6 +444,9 @@ TEST_F(SvcTest, TwoPassEncode) { vpx_codec_destroy(&codec_); // Second pass encode + int decoded_frames = 0; + vpx_codec_err_t res_dec; + int frame_size; codec_enc_.g_pass = VPX_RC_LAST_PASS; codec_enc_.rc_twopass_stats_in.buf = &stats_buf[0]; codec_enc_.rc_twopass_stats_in.sz = stats_buf.size(); @@ -427,12 +461,14 @@ TEST_F(SvcTest, TwoPassEncode) { res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), video.duration(), VPX_DL_GOOD_QUALITY); ASSERT_EQ(VPX_CODEC_OK, res); - EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_)); - vpx_codec_err_t res_dec = decoder_->DecodeFrame( - static_cast(vpx_svc_get_buffer(&svc_)), - vpx_svc_get_frame_size(&svc_)); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { + EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); + res_dec = decoder_->DecodeFrame( + static_cast(vpx_svc_get_buffer(&svc_)), frame_size); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + ++decoded_frames; + } // FRAME 1 video.Next(); @@ -440,12 +476,14 @@ TEST_F(SvcTest, TwoPassEncode) { res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), video.duration(), VPX_DL_GOOD_QUALITY); ASSERT_EQ(VPX_CODEC_OK, res); - EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_)); - res_dec = decoder_->DecodeFrame( - static_cast(vpx_svc_get_buffer(&svc_)), - vpx_svc_get_frame_size(&svc_)); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { + EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); + res_dec = decoder_->DecodeFrame( + static_cast(vpx_svc_get_buffer(&svc_)), frame_size); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + ++decoded_frames; + } // FRAME 2 video.Next(); @@ -453,12 +491,29 @@ TEST_F(SvcTest, TwoPassEncode) { res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), video.duration(), VPX_DL_GOOD_QUALITY); ASSERT_EQ(VPX_CODEC_OK, res); - EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_)); - res_dec = decoder_->DecodeFrame( - static_cast(vpx_svc_get_buffer(&svc_)), - vpx_svc_get_frame_size(&svc_)); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { + EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); + res_dec = decoder_->DecodeFrame( + static_cast(vpx_svc_get_buffer(&svc_)), frame_size); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + ++decoded_frames; + } + + // Flush encoder + res = vpx_svc_encode(&svc_, &codec_, NULL, 0, + video.duration(), VPX_DL_GOOD_QUALITY); + EXPECT_EQ(VPX_CODEC_OK, res); + + while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) { + EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_)); + res_dec = decoder_->DecodeFrame( + static_cast(vpx_svc_get_buffer(&svc_)), frame_size); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + ++decoded_frames; + } + + EXPECT_EQ(decoded_frames, 3); } } // namespace diff --git a/test/test-data.sha1 b/test/test-data.sha1 index 0def69d18..bc6f77ed1 100644 --- a/test/test-data.sha1 +++ b/test/test-data.sha1 @@ -1,5 +1,9 @@ d5dfb0151c9051f8c85999255645d7a23916d3c0 hantro_collage_w352h288.yuv b87815bf86020c592ccc7a846ba2e28ec8043902 hantro_odd.yuv +fe346136b9b8c1e6f6084cc106485706915795e4 invalid-vp90-01.webm +25751f5d3b05ff03f0719ad42cd625348eb8961e invalid-vp90-01.webm.res +d78e2fceba5ac942246503ec8366f879c4775ca5 invalid-vp90-02.webm +2dadee5306245fa5eeb0f99652d0e17afbcba96d invalid-vp90-02.webm.res b1f1c3ec79114b9a0651af24ce634afb44a9a419 rush_hour_444.y4m 5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf 65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf @@ -576,6 +580,8 @@ d48c5db1b0f8e60521a7c749696b8067886033a3 vp90-2-09-aq2.webm 54638c38009198c38c8f3b25c182b709b6c1fd2e vp90-2-09-lf_deltas.webm.md5 510d95f3beb3b51c572611fdaeeece12277dac30 vp90-2-10-show-existing-frame.webm 14d631096f4bfa2d71f7f739aec1448fb3c33bad vp90-2-10-show-existing-frame.webm.md5 +d2feea7728e8d2c615981d0f47427a4a5a45d881 vp90-2-10-show-existing-frame2.webm +5f7c7811baa3e4f03be1dd78c33971b727846821 vp90-2-10-show-existing-frame2.webm.md5 b4318e75f73a6a08992c7326de2fb589c2a794c7 vp90-2-11-size-351x287.webm b3c48382cf7d0454e83a02497c229d27720f9e20 vp90-2-11-size-351x287.webm.md5 8e0096475ea2535bac71d3e2fc09e0c451c444df vp90-2-11-size-351x288.webm @@ -638,5 +644,5 @@ e615575ded499ea1d992f3b38e3baa434509cdcd vp90-2-15-segkey.webm e3ab35d4316c5e81325c50f5236ceca4bc0d35df vp90-2-15-segkey.webm.md5 9b7ca2cac09d34c4a5d296c1900f93b1e2f69d0d vp90-2-15-segkey_adpq.webm 8f46ba5f785d0c2170591a153e0d0d146a7c8090 vp90-2-15-segkey_adpq.webm.md5 -d78e2fceba5ac942246503ec8366f879c4775ca5 vp90-2-15-fuzz-flicker.webm -bbd7dd15f43a703ff0a332fee4959e7b23bf77dc vp90-2-15-fuzz-flicker.webm.md5 +76024eb753cdac6a5e5703aaea189d35c3c30ac7 invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf +d3964f9dad9f60363c81b688324d95b4ec7c8038 invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf.res diff --git a/test/test.mk b/test/test.mk index c59ae1172..e7c4036e7 100644 --- a/test/test.mk +++ b/test/test.mk @@ -30,6 +30,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc @@ -54,6 +55,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.h LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += webm_video_source.h endif +LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += invalid_file_test.cc LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += test_vector_test.cc # Currently we only support decoder perf tests for vp9. Also they read from WebM @@ -690,6 +692,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-subpixel-00.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-subpixel-00.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x287.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x287.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x288.webm @@ -754,8 +758,14 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm.md5 -LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-fuzz-flicker.webm -LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-fuzz-flicker.webm.md5 + +# Invalid files for testing libvpx error checking. +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01.webm.res +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02.webm.res +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf.res ifeq ($(CONFIG_DECODE_PERF_TESTS),yes) # BBB VP9 streams diff --git a/test/test_vectors.cc b/test/test_vectors.cc index 3873712cf..a6d546ea4 100644 --- a/test/test_vectors.cc +++ b/test/test_vectors.cc @@ -161,6 +161,7 @@ const char *const kVP9TestVectors[] = { "vp90-2-08-tile-4x1.webm", "vp90-2-09-subpixel-00.ivf", "vp90-2-02-size-lf-1920x1080.webm", "vp90-2-09-aq2.webm", "vp90-2-09-lf_deltas.webm", "vp90-2-10-show-existing-frame.webm", + "vp90-2-10-show-existing-frame2.webm", "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm", "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf", "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf", @@ -179,7 +180,6 @@ const char *const kVP9TestVectors[] = { "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm", "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm", "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm", - "vp90-2-15-fuzz-flicker.webm" }; const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors); #endif // CONFIG_VP9_DECODER diff --git a/test/user_priv_test.cc b/test/user_priv_test.cc new file mode 100644 index 000000000..f9aef33da --- /dev/null +++ b/test/user_priv_test.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "./vpx_config.h" +#include "test/acm_random.h" +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/ivf_video_source.h" +#include "test/md5_helper.h" +#include "test/util.h" +#if CONFIG_WEBM_IO +#include "test/webm_video_source.h" +#endif +#include "vpx_mem/vpx_mem.h" +#include "vpx/vp8.h" + +namespace { + +using std::string; +using libvpx_test::ACMRandom; + +#if CONFIG_WEBM_IO + +void CheckUserPrivateData(void *user_priv, int *target) { + // actual pointer value should be the same as expected. + EXPECT_EQ(reinterpret_cast(target), user_priv) << + "user_priv pointer value does not match."; +} + +// Decodes |filename|. Passes in user_priv data when calling DecodeFrame and +// compares the user_priv from return img with the original user_priv to see if +// they match. Both the pointer values and the values inside the addresses +// should match. +string DecodeFile(const string &filename) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + libvpx_test::WebMVideoSource video(filename); + video.Init(); + + vpx_codec_dec_cfg_t cfg = {0}; + libvpx_test::VP9Decoder decoder(cfg, 0); + + libvpx_test::MD5 md5; + int frame_num = 0; + for (video.Begin(); !::testing::Test::HasFailure() && video.cxdata(); + video.Next()) { + void *user_priv = reinterpret_cast(&frame_num); + const vpx_codec_err_t res = + decoder.DecodeFrame(video.cxdata(), video.frame_size(), + (frame_num == 0) ? NULL : user_priv); + if (res != VPX_CODEC_OK) { + EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError(); + break; + } + libvpx_test::DxDataIterator dec_iter = decoder.GetDxData(); + const vpx_image_t *img = NULL; + + // Get decompressed data. + while ((img = dec_iter.Next())) { + if (frame_num == 0) { + CheckUserPrivateData(img->user_priv, NULL); + } else { + CheckUserPrivateData(img->user_priv, &frame_num); + + // Also test ctrl_get_reference api. + struct vp9_ref_frame ref; + // Randomly fetch a reference frame. + ref.idx = rnd.Rand8() % 3; + decoder.Control(VP9_GET_REFERENCE, &ref); + + CheckUserPrivateData(ref.img.user_priv, &frame_num); + } + md5.Add(img); + } + + frame_num++; + } + return string(md5.Get()); +} + +TEST(UserPrivTest, VideoDecode) { + // no tiles or frame parallel; this exercises the decoding to test the + // user_priv. + EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", + DecodeFile("vp90-2-03-size-226x226.webm").c_str()); +} + +#endif // CONFIG_WEBM_IO + +} // namespace diff --git a/third_party/libmkv/EbmlIDs.h b/third_party/libmkv/EbmlIDs.h new file mode 100644 index 000000000..44d438583 --- /dev/null +++ b/third_party/libmkv/EbmlIDs.h @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MKV_DEFS_HPP +#define MKV_DEFS_HPP 1 + +/* Commenting out values not available in webm, but available in matroska */ + +enum mkv { + EBML = 0x1A45DFA3, + EBMLVersion = 0x4286, + EBMLReadVersion = 0x42F7, + EBMLMaxIDLength = 0x42F2, + EBMLMaxSizeLength = 0x42F3, + DocType = 0x4282, + DocTypeVersion = 0x4287, + DocTypeReadVersion = 0x4285, +/* CRC_32 = 0xBF, */ + Void = 0xEC, + SignatureSlot = 0x1B538667, + SignatureAlgo = 0x7E8A, + SignatureHash = 0x7E9A, + SignaturePublicKey = 0x7EA5, + Signature = 0x7EB5, + SignatureElements = 0x7E5B, + SignatureElementList = 0x7E7B, + SignedElement = 0x6532, + /* segment */ + Segment = 0x18538067, + /* Meta Seek Information */ + SeekHead = 0x114D9B74, + Seek = 0x4DBB, + SeekID = 0x53AB, + SeekPosition = 0x53AC, + /* Segment Information */ + Info = 0x1549A966, +/* SegmentUID = 0x73A4, */ +/* SegmentFilename = 0x7384, */ +/* PrevUID = 0x3CB923, */ +/* PrevFilename = 0x3C83AB, */ +/* NextUID = 0x3EB923, */ +/* NextFilename = 0x3E83BB, */ +/* SegmentFamily = 0x4444, */ +/* ChapterTranslate = 0x6924, */ +/* ChapterTranslateEditionUID = 0x69FC, */ +/* ChapterTranslateCodec = 0x69BF, */ +/* ChapterTranslateID = 0x69A5, */ + TimecodeScale = 0x2AD7B1, + Segment_Duration = 0x4489, + DateUTC = 0x4461, +/* Title = 0x7BA9, */ + MuxingApp = 0x4D80, + WritingApp = 0x5741, + /* Cluster */ + Cluster = 0x1F43B675, + Timecode = 0xE7, +/* SilentTracks = 0x5854, */ +/* SilentTrackNumber = 0x58D7, */ +/* Position = 0xA7, */ + PrevSize = 0xAB, + BlockGroup = 0xA0, + Block = 0xA1, +/* BlockVirtual = 0xA2, */ + BlockAdditions = 0x75A1, + BlockMore = 0xA6, + BlockAddID = 0xEE, + BlockAdditional = 0xA5, + BlockDuration = 0x9B, +/* ReferencePriority = 0xFA, */ + ReferenceBlock = 0xFB, +/* ReferenceVirtual = 0xFD, */ +/* CodecState = 0xA4, */ +/* Slices = 0x8E, */ +/* TimeSlice = 0xE8, */ + LaceNumber = 0xCC, +/* FrameNumber = 0xCD, */ +/* BlockAdditionID = 0xCB, */ +/* MkvDelay = 0xCE, */ +/* Cluster_Duration = 0xCF, */ + SimpleBlock = 0xA3, +/* EncryptedBlock = 0xAF, */ + /* Track */ + Tracks = 0x1654AE6B, + TrackEntry = 0xAE, + TrackNumber = 0xD7, + TrackUID = 0x73C5, + TrackType = 0x83, + FlagEnabled = 0xB9, + FlagDefault = 0x88, + FlagForced = 0x55AA, + FlagLacing = 0x9C, +/* MinCache = 0x6DE7, */ +/* MaxCache = 0x6DF8, */ + DefaultDuration = 0x23E383, +/* TrackTimecodeScale = 0x23314F, */ +/* TrackOffset = 0x537F, */ + MaxBlockAdditionID = 0x55EE, + Name = 0x536E, + Language = 0x22B59C, + CodecID = 0x86, + CodecPrivate = 0x63A2, + CodecName = 0x258688, +/* AttachmentLink = 0x7446, */ +/* CodecSettings = 0x3A9697, */ +/* CodecInfoURL = 0x3B4040, */ +/* CodecDownloadURL = 0x26B240, */ +/* CodecDecodeAll = 0xAA, */ +/* TrackOverlay = 0x6FAB, */ +/* TrackTranslate = 0x6624, */ +/* TrackTranslateEditionUID = 0x66FC, */ +/* TrackTranslateCodec = 0x66BF, */ +/* TrackTranslateTrackID = 0x66A5, */ + /* video */ + Video = 0xE0, + FlagInterlaced = 0x9A, + StereoMode = 0x53B8, + AlphaMode = 0x53C0, + PixelWidth = 0xB0, + PixelHeight = 0xBA, + PixelCropBottom = 0x54AA, + PixelCropTop = 0x54BB, + PixelCropLeft = 0x54CC, + PixelCropRight = 0x54DD, + DisplayWidth = 0x54B0, + DisplayHeight = 0x54BA, + DisplayUnit = 0x54B2, + AspectRatioType = 0x54B3, +/* ColourSpace = 0x2EB524, */ +/* GammaValue = 0x2FB523, */ + FrameRate = 0x2383E3, + /* end video */ + /* audio */ + Audio = 0xE1, + SamplingFrequency = 0xB5, + OutputSamplingFrequency = 0x78B5, + Channels = 0x9F, +/* ChannelPositions = 0x7D7B, */ + BitDepth = 0x6264, + /* end audio */ + /* content encoding */ +/* ContentEncodings = 0x6d80, */ +/* ContentEncoding = 0x6240, */ +/* ContentEncodingOrder = 0x5031, */ +/* ContentEncodingScope = 0x5032, */ +/* ContentEncodingType = 0x5033, */ +/* ContentCompression = 0x5034, */ +/* ContentCompAlgo = 0x4254, */ +/* ContentCompSettings = 0x4255, */ +/* ContentEncryption = 0x5035, */ +/* ContentEncAlgo = 0x47e1, */ +/* ContentEncKeyID = 0x47e2, */ +/* ContentSignature = 0x47e3, */ +/* ContentSigKeyID = 0x47e4, */ +/* ContentSigAlgo = 0x47e5, */ +/* ContentSigHashAlgo = 0x47e6, */ + /* end content encoding */ + /* Cueing Data */ + Cues = 0x1C53BB6B, + CuePoint = 0xBB, + CueTime = 0xB3, + CueTrackPositions = 0xB7, + CueTrack = 0xF7, + CueClusterPosition = 0xF1, + CueBlockNumber = 0x5378 +/* CueCodecState = 0xEA, */ +/* CueReference = 0xDB, */ +/* CueRefTime = 0x96, */ +/* CueRefCluster = 0x97, */ +/* CueRefNumber = 0x535F, */ +/* CueRefCodecState = 0xEB, */ + /* Attachment */ +/* Attachments = 0x1941A469, */ +/* AttachedFile = 0x61A7, */ +/* FileDescription = 0x467E, */ +/* FileName = 0x466E, */ +/* FileMimeType = 0x4660, */ +/* FileData = 0x465C, */ +/* FileUID = 0x46AE, */ +/* FileReferral = 0x4675, */ + /* Chapters */ +/* Chapters = 0x1043A770, */ +/* EditionEntry = 0x45B9, */ +/* EditionUID = 0x45BC, */ +/* EditionFlagHidden = 0x45BD, */ +/* EditionFlagDefault = 0x45DB, */ +/* EditionFlagOrdered = 0x45DD, */ +/* ChapterAtom = 0xB6, */ +/* ChapterUID = 0x73C4, */ +/* ChapterTimeStart = 0x91, */ +/* ChapterTimeEnd = 0x92, */ +/* ChapterFlagHidden = 0x98, */ +/* ChapterFlagEnabled = 0x4598, */ +/* ChapterSegmentUID = 0x6E67, */ +/* ChapterSegmentEditionUID = 0x6EBC, */ +/* ChapterPhysicalEquiv = 0x63C3, */ +/* ChapterTrack = 0x8F, */ +/* ChapterTrackNumber = 0x89, */ +/* ChapterDisplay = 0x80, */ +/* ChapString = 0x85, */ +/* ChapLanguage = 0x437C, */ +/* ChapCountry = 0x437E, */ +/* ChapProcess = 0x6944, */ +/* ChapProcessCodecID = 0x6955, */ +/* ChapProcessPrivate = 0x450D, */ +/* ChapProcessCommand = 0x6911, */ +/* ChapProcessTime = 0x6922, */ +/* ChapProcessData = 0x6933, */ + /* Tagging */ +/* Tags = 0x1254C367, */ +/* Tag = 0x7373, */ +/* Targets = 0x63C0, */ +/* TargetTypeValue = 0x68CA, */ +/* TargetType = 0x63CA, */ +/* Tagging_TrackUID = 0x63C5, */ +/* Tagging_EditionUID = 0x63C9, */ +/* Tagging_ChapterUID = 0x63C4, */ +/* AttachmentUID = 0x63C6, */ +/* SimpleTag = 0x67C8, */ +/* TagName = 0x45A3, */ +/* TagLanguage = 0x447A, */ +/* TagDefault = 0x4484, */ +/* TagString = 0x4487, */ +/* TagBinary = 0x4485, */ +}; +#endif diff --git a/third_party/libmkv/EbmlWriter.c b/third_party/libmkv/EbmlWriter.c new file mode 100644 index 000000000..27cfe861c --- /dev/null +++ b/third_party/libmkv/EbmlWriter.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "EbmlWriter.h" +#include +#include +#include +#include +#if defined(_MSC_VER) +#define LITERALU64(n) n +#else +#define LITERALU64(n) n##LLU +#endif + +void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) { + /* TODO check and make sure we are not > than 0x0100000000000000LLU */ + unsigned char size = 8; /* size in bytes to output */ + + /* mask to compare for byte size */ + int64_t minVal = 0xff; + + for (size = 1; size < 8; size ++) { + if (val < minVal) + break; + + minVal = (minVal << 7); + } + + val |= (((uint64_t)0x80) << ((size - 1) * 7)); + + Ebml_Serialize(glob, (void *) &val, sizeof(val), size); +} + +void Ebml_WriteString(EbmlGlobal *glob, const char *str) { + const size_t size_ = strlen(str); + const uint64_t size = size_; + Ebml_WriteLen(glob, size); + /* TODO: it's not clear from the spec whether the nul terminator + * should be serialized too. For now we omit the null terminator. + */ + Ebml_Write(glob, str, (unsigned long)size); +} + +void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) { + const size_t strlen = wcslen(wstr); + + /* TODO: it's not clear from the spec whether the nul terminator + * should be serialized too. For now we include it. + */ + const uint64_t size = strlen; + + Ebml_WriteLen(glob, size); + Ebml_Write(glob, wstr, (unsigned long)size); +} + +void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) { + int len; + + if (class_id >= 0x01000000) + len = 4; + else if (class_id >= 0x00010000) + len = 3; + else if (class_id >= 0x00000100) + len = 2; + else + len = 1; + + Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len); +} + +void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) { + unsigned char sizeSerialized = 8 | 0x80; + Ebml_WriteID(glob, class_id); + Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); + Ebml_Serialize(glob, &ui, sizeof(ui), 8); +} + +void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) { + unsigned char size = 8; /* size in bytes to output */ + unsigned char sizeSerialized = 0; + unsigned long minVal; + + Ebml_WriteID(glob, class_id); + minVal = 0x7fLU; /* mask to compare for byte size */ + + for (size = 1; size < 4; size ++) { + if (ui < minVal) { + break; + } + + minVal <<= 7; + } + + sizeSerialized = 0x80 | size; + Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); + Ebml_Serialize(glob, &ui, sizeof(ui), size); +} +/* TODO: perhaps this is a poor name for this id serializer helper function */ +void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) { + int size; + for (size = 4; size > 1; size--) { + if (bin & (unsigned int)0x000000ff << ((size - 1) * 8)) + break; + } + Ebml_WriteID(glob, class_id); + Ebml_WriteLen(glob, size); + Ebml_WriteID(glob, bin); +} + +void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d) { + unsigned char len = 0x88; + + Ebml_WriteID(glob, class_id); + Ebml_Serialize(glob, &len, sizeof(len), 1); + Ebml_Serialize(glob, &d, sizeof(d), 8); +} + +void Ebml_WriteSigned16(EbmlGlobal *glob, short val) { + signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8; + Ebml_Serialize(glob, &out, sizeof(out), 3); +} + +void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s) { + Ebml_WriteID(glob, class_id); + Ebml_WriteString(glob, s); +} + +void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s) { + Ebml_WriteID(glob, class_id); + Ebml_WriteUTF8(glob, s); +} + +void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length) { + Ebml_WriteID(glob, class_id); + Ebml_WriteLen(glob, data_length); + Ebml_Write(glob, data, data_length); +} + +void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) { + unsigned char tmp = 0; + unsigned long i = 0; + + Ebml_WriteID(glob, 0xEC); + Ebml_WriteLen(glob, vSize); + + for (i = 0; i < vSize; i++) { + Ebml_Write(glob, &tmp, 1); + } +} + +/* TODO Serialize Date */ diff --git a/third_party/libmkv/EbmlWriter.h b/third_party/libmkv/EbmlWriter.h new file mode 100644 index 000000000..b94f75733 --- /dev/null +++ b/third_party/libmkv/EbmlWriter.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef EBMLWRITER_HPP +#define EBMLWRITER_HPP +#include +#include "vpx/vpx_integer.h" + +/* note: you must define write and serialize functions as well as your own + * EBML_GLOBAL + * + * These functions MUST be implemented + */ + +typedef struct EbmlGlobal EbmlGlobal; +void Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long); +void Ebml_Write(EbmlGlobal *glob, const void *, unsigned long); + +/*****/ + +void Ebml_WriteLen(EbmlGlobal *glob, int64_t val); +void Ebml_WriteString(EbmlGlobal *glob, const char *str); +void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr); +void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id); +void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui); +void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui); +void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui); +void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d); +/* TODO make this more generic to signed */ +void Ebml_WriteSigned16(EbmlGlobal *glob, short val); +void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s); +void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s); +void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length); +void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize); +/* TODO need date function */ +#endif diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl index cbfd76a8d..3e4077460 100644 --- a/vp8/common/rtcd_defs.pl +++ b/vp8/common/rtcd_defs.pl @@ -463,9 +463,7 @@ $vp8_short_walsh4x4_neon_asm=vp8_short_walsh4x4_neon; # Quantizer # add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *"; -specialize qw/vp8_regular_quantize_b sse2/; -# TODO(johann) Update sse4 implementation and re-enable -#$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4; +specialize qw/vp8_regular_quantize_b sse2 sse4_1/; add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *"; specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon_asm/; diff --git a/vp8/encoder/x86/quantize_sse2.c b/vp8/encoder/x86/quantize_sse2.c index f495bf287..291d21992 100644 --- a/vp8/encoder/x86/quantize_sse2.c +++ b/vp8/encoder/x86/quantize_sse2.c @@ -26,11 +26,10 @@ int cmp = (x[z] < boost) | (y[z] == 0); \ zbin_boost_ptr++; \ if (cmp) \ - goto select_eob_end_##i; \ + break; \ qcoeff_ptr[z] = y[z]; \ eob = i; \ zbin_boost_ptr = b->zrun_zbin_boost; \ - select_eob_end_##i:; \ } while (0) void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d) diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm deleted file mode 100644 index dbd171bfc..000000000 --- a/vp8/encoder/x86/quantize_sse4.asm +++ /dev/null @@ -1,256 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -%include "vp8_asm_enc_offsets.asm" - - -; void vp8_regular_quantize_b_sse4 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp8_regular_quantize_b_sse4) PRIVATE -sym(vp8_regular_quantize_b_sse4): - -%if ABI_IS_32BIT - push rbp - mov rbp, rsp - GET_GOT rbx - push rdi - push rsi - - ALIGN_STACK 16, rax - %define qcoeff 0 ; 32 - %define stack_size 32 - sub rsp, stack_size -%else - %if LIBVPX_YASM_WIN64 - SAVE_XMM 8, u - push rdi - push rsi - %endif -%endif - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %if LIBVPX_YASM_WIN64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp8_block_coeff] - mov rcx, [rdi + vp8_block_zbin] - mov rdx, [rdi + vp8_block_round] - movd xmm7, [rdi + vp8_block_zbin_extra] - - ; z - movdqa xmm0, [rax] - movdqa xmm1, [rax + 16] - - ; duplicate zbin_oq_value - pshuflw xmm7, xmm7, 0 - punpcklwd xmm7, xmm7 - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - ; sz - psraw xmm0, 15 - psraw xmm1, 15 - - ; (z ^ sz) - pxor xmm2, xmm0 - pxor xmm3, xmm1 - - ; x = abs(z) - psubw xmm2, xmm0 - psubw xmm3, xmm1 - - ; zbin - movdqa xmm4, [rcx] - movdqa xmm5, [rcx + 16] - - ; *zbin_ptr + zbin_oq_value - paddw xmm4, xmm7 - paddw xmm5, xmm7 - - movdqa xmm6, xmm2 - movdqa xmm7, xmm3 - - ; x - (*zbin_ptr + zbin_oq_value) - psubw xmm6, xmm4 - psubw xmm7, xmm5 - - ; round - movdqa xmm4, [rdx] - movdqa xmm5, [rdx + 16] - - mov rax, [rdi + vp8_block_quant_shift] - mov rcx, [rdi + vp8_block_quant] - mov rdx, [rdi + vp8_block_zrun_zbin_boost] - - ; x + round - paddw xmm2, xmm4 - paddw xmm3, xmm5 - - ; quant - movdqa xmm4, [rcx] - movdqa xmm5, [rcx + 16] - - ; y = x * quant_ptr >> 16 - pmulhw xmm4, xmm2 - pmulhw xmm5, xmm3 - - ; y += x - paddw xmm2, xmm4 - paddw xmm3, xmm5 - - pxor xmm4, xmm4 -%if ABI_IS_32BIT - movdqa [rsp + qcoeff], xmm4 - movdqa [rsp + qcoeff + 16], xmm4 -%else - pxor xmm8, xmm8 -%endif - - ; quant_shift - movdqa xmm5, [rax] - - ; zrun_zbin_boost - mov rax, rdx - -%macro ZIGZAG_LOOP 5 - ; x - pextrw ecx, %4, %2 - - ; if (x >= zbin) - sub cx, WORD PTR[rdx] ; x - zbin - lea rdx, [rdx + 2] ; zbin_boost_ptr++ - jl .rq_zigzag_loop_%1 ; x < zbin - - pextrw edi, %3, %2 ; y - - ; downshift by quant_shift[rc] - pextrb ecx, xmm5, %1 ; quant_shift[rc] - sar edi, cl ; also sets Z bit - je .rq_zigzag_loop_%1 ; !y -%if ABI_IS_32BIT - mov WORD PTR[rsp + qcoeff + %1 *2], di -%else - pinsrw %5, edi, %2 ; qcoeff[rc] -%endif - mov rdx, rax ; reset to b->zrun_zbin_boost -.rq_zigzag_loop_%1: -%endmacro -; in vp8_default_zig_zag1d order: see vp8/common/entropy.c -ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 - - mov rcx, [rsi + vp8_blockd_dequant] - mov rdi, [rsi + vp8_blockd_dqcoeff] - -%if ABI_IS_32BIT - movdqa xmm4, [rsp + qcoeff] - movdqa xmm5, [rsp + qcoeff + 16] -%else - %define xmm5 xmm8 -%endif - - ; y ^ sz - pxor xmm4, xmm0 - pxor xmm5, xmm1 - ; x = (y ^ sz) - sz - psubw xmm4, xmm0 - psubw xmm5, xmm1 - - ; dequant - movdqa xmm0, [rcx] - movdqa xmm1, [rcx + 16] - - mov rcx, [rsi + vp8_blockd_qcoeff] - - pmullw xmm0, xmm4 - pmullw xmm1, xmm5 - - ; store qcoeff - movdqa [rcx], xmm4 - movdqa [rcx + 16], xmm5 - - ; store dqcoeff - movdqa [rdi], xmm0 - movdqa [rdi + 16], xmm1 - - mov rcx, [rsi + vp8_blockd_eob] - - ; select the last value (in zig_zag order) for EOB - pxor xmm6, xmm6 - pcmpeqw xmm4, xmm6 - pcmpeqw xmm5, xmm6 - - packsswb xmm4, xmm5 - pshufb xmm4, [GLOBAL(zig_zag1d)] - pmovmskb edx, xmm4 - xor rdi, rdi - mov eax, -1 - xor dx, ax - bsr eax, edx - sub edi, edx - sar edi, 31 - add eax, 1 - and eax, edi - - mov BYTE PTR [rcx], al ; store eob - - ; begin epilog -%if ABI_IS_32BIT - add rsp, stack_size - pop rsp - - pop rsi - pop rdi - RESTORE_GOT - pop rbp -%else - %undef xmm5 - %if LIBVPX_YASM_WIN64 - pop rsi - pop rdi - RESTORE_XMM - %endif -%endif - - ret - -SECTION_RODATA -align 16 -; vp8/common/entropy.c: vp8_default_zig_zag1d -zig_zag1d: - db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --git a/vp8/encoder/x86/quantize_sse4.c b/vp8/encoder/x86/quantize_sse4.c new file mode 100644 index 000000000..601dd23a2 --- /dev/null +++ b/vp8/encoder/x86/quantize_sse4.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include /* SSE4.1 */ + +#include "./vp8_rtcd.h" +#include "vp8/encoder/block.h" +#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */ + +#define SELECT_EOB(i, z, x, y, q) \ + do { \ + short boost = *zbin_boost_ptr; \ + short x_z = _mm_extract_epi16(x, z); \ + short y_z = _mm_extract_epi16(y, z); \ + int cmp = (x_z < boost) | (y_z == 0); \ + zbin_boost_ptr++; \ + if (cmp) \ + break; \ + q = _mm_insert_epi16(q, y_z, z); \ + eob = i; \ + zbin_boost_ptr = b->zrun_zbin_boost; \ + } while (0) + +void vp8_regular_quantize_b_sse4_1(BLOCK *b, BLOCKD *d) { + char eob = 0; + short *zbin_boost_ptr = b->zrun_zbin_boost; + + __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1, + dqcoeff0, dqcoeff1; + __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); + __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8)); + __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); + __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8)); + __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra); + __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin)); + __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8)); + __m128i round0 = _mm_load_si128((__m128i *)(b->round)); + __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); + __m128i quant0 = _mm_load_si128((__m128i *)(b->quant)); + __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8)); + __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); + __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); + __m128i qcoeff0 = _mm_setzero_si128(); + __m128i qcoeff1 = _mm_setzero_si128(); + + /* Duplicate to all lanes. */ + zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0); + zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra); + + /* Sign of z: z >> 15 */ + sz0 = _mm_srai_epi16(z0, 15); + sz1 = _mm_srai_epi16(z1, 15); + + /* x = abs(z): (z ^ sz) - sz */ + x0 = _mm_xor_si128(z0, sz0); + x1 = _mm_xor_si128(z1, sz1); + x0 = _mm_sub_epi16(x0, sz0); + x1 = _mm_sub_epi16(x1, sz1); + + /* zbin[] + zbin_extra */ + zbin0 = _mm_add_epi16(zbin0, zbin_extra); + zbin1 = _mm_add_epi16(zbin1, zbin_extra); + + /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance + * the equation because boost is the only value which can change: + * x - (zbin[] + extra) >= boost */ + x_minus_zbin0 = _mm_sub_epi16(x0, zbin0); + x_minus_zbin1 = _mm_sub_epi16(x1, zbin1); + + /* All the remaining calculations are valid whether they are done now with + * simd or later inside the loop one at a time. */ + x0 = _mm_add_epi16(x0, round0); + x1 = _mm_add_epi16(x1, round1); + + y0 = _mm_mulhi_epi16(x0, quant0); + y1 = _mm_mulhi_epi16(x1, quant1); + + y0 = _mm_add_epi16(y0, x0); + y1 = _mm_add_epi16(y1, x1); + + /* Instead of shifting each value independently we convert the scaling + * factor with 1 << (16 - shift) so we can use multiply/return high half. */ + y0 = _mm_mulhi_epi16(y0, quant_shift0); + y1 = _mm_mulhi_epi16(y1, quant_shift1); + + /* Return the sign: (y ^ sz) - sz */ + y0 = _mm_xor_si128(y0, sz0); + y1 = _mm_xor_si128(y1, sz1); + y0 = _mm_sub_epi16(y0, sz0); + y1 = _mm_sub_epi16(y1, sz1); + + /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */ + SELECT_EOB(1, 0, x_minus_zbin0, y0, qcoeff0); + SELECT_EOB(2, 1, x_minus_zbin0, y0, qcoeff0); + SELECT_EOB(3, 4, x_minus_zbin0, y0, qcoeff0); + SELECT_EOB(4, 0, x_minus_zbin1, y1, qcoeff1); + SELECT_EOB(5, 5, x_minus_zbin0, y0, qcoeff0); + SELECT_EOB(6, 2, x_minus_zbin0, y0, qcoeff0); + SELECT_EOB(7, 3, x_minus_zbin0, y0, qcoeff0); + SELECT_EOB(8, 6, x_minus_zbin0, y0, qcoeff0); + SELECT_EOB(9, 1, x_minus_zbin1, y1, qcoeff1); + SELECT_EOB(10, 4, x_minus_zbin1, y1, qcoeff1); + SELECT_EOB(11, 5, x_minus_zbin1, y1, qcoeff1); + SELECT_EOB(12, 2, x_minus_zbin1, y1, qcoeff1); + SELECT_EOB(13, 7, x_minus_zbin0, y0, qcoeff0); + SELECT_EOB(14, 3, x_minus_zbin1, y1, qcoeff1); + SELECT_EOB(15, 6, x_minus_zbin1, y1, qcoeff1); + SELECT_EOB(16, 7, x_minus_zbin1, y1, qcoeff1); + + _mm_store_si128((__m128i *)(d->qcoeff), qcoeff0); + _mm_store_si128((__m128i *)(d->qcoeff + 8), qcoeff1); + + dqcoeff0 = _mm_mullo_epi16(qcoeff0, dequant0); + dqcoeff1 = _mm_mullo_epi16(qcoeff1, dequant1); + + _mm_store_si128((__m128i *)(d->dqcoeff), dqcoeff0); + _mm_store_si128((__m128i *)(d->dqcoeff + 8), dqcoeff1); + + *d->eob = eob; +} diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index b7b948add..a0dbdcfa9 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -89,6 +89,7 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.c +VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.c ifeq ($(CONFIG_TEMPORAL_DENOISING),yes) VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c @@ -97,7 +98,6 @@ endif VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c -VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.asm VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt_x86_64.asm diff --git a/vp9/common/arm/neon/vp9_convolve_neon.c b/vp9/common/arm/neon/vp9_convolve_neon.c index d8b24bfaf..f0881b5ae 100644 --- a/vp9/common/arm/neon/vp9_convolve_neon.c +++ b/vp9/common/arm/neon/vp9_convolve_neon.c @@ -25,12 +25,14 @@ void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, // Account for the vertical phase needing 3 lines prior and 4 lines post int intermediate_height = h + 7; - if (x_step_q4 != 16 || y_step_q4 != 16) - return vp9_convolve8_c(src, src_stride, - dst, dst_stride, - filter_x, x_step_q4, - filter_y, y_step_q4, - w, h); + if (x_step_q4 != 16 || y_step_q4 != 16) { + vp9_convolve8_c(src, src_stride, + dst, dst_stride, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + return; + } /* Filter starting 3 lines back. The neon implementation will ignore the * given height and filter a multiple of 4 lines. Since this goes in to @@ -57,12 +59,14 @@ void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72); int intermediate_height = h + 7; - if (x_step_q4 != 16 || y_step_q4 != 16) - return vp9_convolve8_avg_c(src, src_stride, - dst, dst_stride, - filter_x, x_step_q4, - filter_y, y_step_q4, - w, h); + if (x_step_q4 != 16 || y_step_q4 != 16) { + vp9_convolve8_avg_c(src, src_stride, + dst, dst_stride, + filter_x, x_step_q4, + filter_y, y_step_q4, + w, h); + return; + } /* This implementation has the same issues as above. In addition, we only want * to average the values after both passes. diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c index 0820db247..bc6a17cd1 100644 --- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c +++ b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c @@ -9,6 +9,7 @@ */ #include "./vp9_rtcd.h" +#include "vpx/vpx_integer.h" void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */, const uint8_t *blimit0, diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index e56a0b7a8..2386b13e7 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -109,7 +109,9 @@ void vp9_free_frame_buffers(VP9_COMMON *cm) { } vp9_free_frame_buffer(&cm->post_proc_buffer); +} +void vp9_free_context_buffers(VP9_COMMON *cm) { free_mi(cm); vpx_free(cm->last_frame_seg_map); @@ -165,37 +167,55 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { fail: vp9_free_frame_buffers(cm); + vp9_free_context_buffers(cm); return 1; } +static void init_frame_bufs(VP9_COMMON *cm) { + int i; + + cm->new_fb_idx = FRAME_BUFFERS - 1; + cm->frame_bufs[cm->new_fb_idx].ref_count = 1; + + for (i = 0; i < REF_FRAMES; ++i) { + cm->ref_frame_map[i] = i; + cm->frame_bufs[i].ref_count = 1; + } +} + int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) { - const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2); - const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); + int i; const int ss_x = cm->subsampling_x; const int ss_y = cm->subsampling_y; - int i; vp9_free_frame_buffers(cm); - for (i = 0; i < FRAME_BUFFERS; i++) { + for (i = 0; i < FRAME_BUFFERS; ++i) { cm->frame_bufs[i].ref_count = 0; if (vp9_alloc_frame_buffer(&cm->frame_bufs[i].buf, width, height, ss_x, ss_y, VP9_ENC_BORDER_IN_PIXELS) < 0) goto fail; } - cm->new_fb_idx = FRAME_BUFFERS - 1; - cm->frame_bufs[cm->new_fb_idx].ref_count = 1; - - for (i = 0; i < REF_FRAMES; i++) { - cm->ref_frame_map[i] = i; - cm->frame_bufs[i].ref_count = 1; - } + init_frame_bufs(cm); if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y, VP9_ENC_BORDER_IN_PIXELS) < 0) goto fail; + return 0; + + fail: + vp9_free_frame_buffers(cm); + return 1; +} + +int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { + const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2); + const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); + + vp9_free_context_buffers(cm); + set_mb_mi(cm, aligned_width, aligned_height); if (alloc_mi(cm, cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE))) @@ -224,12 +244,13 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) { return 0; fail: - vp9_free_frame_buffers(cm); + vp9_free_context_buffers(cm); return 1; } void vp9_remove_common(VP9_COMMON *cm) { vp9_free_frame_buffers(cm); + vp9_free_context_buffers(cm); vp9_free_internal_frame_buffers(&cm->int_frame_buffers); } diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h index 06636a905..c4b1b8d2d 100644 --- a/vp9/common/vp9_alloccommon.h +++ b/vp9/common/vp9_alloccommon.h @@ -23,8 +23,12 @@ void vp9_remove_common(struct VP9Common *cm); int vp9_resize_frame_buffers(struct VP9Common *cm, int width, int height); int vp9_alloc_frame_buffers(struct VP9Common *cm, int width, int height); +int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height); +int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height); void vp9_free_frame_buffers(struct VP9Common *cm); +void vp9_free_state_buffers(struct VP9Common *cm); +void vp9_free_context_buffers(struct VP9Common *cm); void vp9_update_frame_size(struct VP9Common *cm); diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c index 1a8c49d52..d8aaf32c4 100644 --- a/vp9/common/vp9_convolve.c +++ b/vp9/common/vp9_convolve.c @@ -117,17 +117,25 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride, const InterpKernel *const y_filters, int y0_q4, int y_step_q4, int w, int h) { - // Fixed size intermediate buffer places limits on parameters. - // Maximum intermediate_height is 324, for y_step_q4 == 80, - // h == 64, taps == 8. - // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc - uint8_t temp[64 * 324]; + // Note: Fixed size intermediate buffer, temp, places limits on parameters. + // 2d filtering proceeds in 2 steps: + // (1) Interpolate horizontally into an intermediate buffer, temp. + // (2) Interpolate temp vertically to derive the sub-pixel result. + // Deriving the maximum number of rows in the temp buffer (135): + // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). + // --Largest block size is 64x64 pixels. + // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the + // original frame (in 1/16th pixel units). + // --Must round-up because block may be located at sub-pixel position. + // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. + // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. + uint8_t temp[135 * 64]; int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; assert(w <= 64); assert(h <= 64); - assert(y_step_q4 <= 80); - assert(x_step_q4 <= 80); + assert(y_step_q4 <= 32); + assert(x_step_q4 <= 32); if (intermediate_height < h) intermediate_height = h; diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index 61682c42d..0fe58c5c8 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -11,181 +11,6 @@ #include "vp9/common/vp9_mvref_common.h" -#define MVREF_NEIGHBOURS 8 - -typedef struct position { - int row; - int col; -} POSITION; - -typedef enum { - BOTH_ZERO = 0, - ZERO_PLUS_PREDICTED = 1, - BOTH_PREDICTED = 2, - NEW_PLUS_NON_INTRA = 3, - BOTH_NEW = 4, - INTRA_PLUS_NON_INTRA = 5, - BOTH_INTRA = 6, - INVALID_CASE = 9 -} motion_vector_context; - -// This is used to figure out a context for the ref blocks. The code flattens -// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by -// adding 9 for each intra block, 3 for each zero mv and 1 for each new -// motion vector. This single number is then converted into a context -// with a single lookup ( counter_to_context ). -static const int mode_2_counter[MB_MODE_COUNT] = { - 9, // DC_PRED - 9, // V_PRED - 9, // H_PRED - 9, // D45_PRED - 9, // D135_PRED - 9, // D117_PRED - 9, // D153_PRED - 9, // D207_PRED - 9, // D63_PRED - 9, // TM_PRED - 0, // NEARESTMV - 0, // NEARMV - 3, // ZEROMV - 1, // NEWMV -}; - -// There are 3^3 different combinations of 3 counts that can be either 0,1 or -// 2. However the actual count can never be greater than 2 so the highest -// counter we need is 18. 9 is an invalid counter that's never used. -static const int counter_to_context[19] = { - BOTH_PREDICTED, // 0 - NEW_PLUS_NON_INTRA, // 1 - BOTH_NEW, // 2 - ZERO_PLUS_PREDICTED, // 3 - NEW_PLUS_NON_INTRA, // 4 - INVALID_CASE, // 5 - BOTH_ZERO, // 6 - INVALID_CASE, // 7 - INVALID_CASE, // 8 - INTRA_PLUS_NON_INTRA, // 9 - INTRA_PLUS_NON_INTRA, // 10 - INVALID_CASE, // 11 - INTRA_PLUS_NON_INTRA, // 12 - INVALID_CASE, // 13 - INVALID_CASE, // 14 - INVALID_CASE, // 15 - INVALID_CASE, // 16 - INVALID_CASE, // 17 - BOTH_INTRA // 18 -}; - -static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = { - // 4X4 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, - // 4X8 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, - // 8X4 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, - // 8X8 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, - // 8X16 - {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}}, - // 16X8 - {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}}, - // 16X16 - {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, - // 16X32 - {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}}, - // 32X16 - {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, - // 32X32 - {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, - // 32X64 - {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}}, - // 64X32 - {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}}, - // 64X64 - {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}} -}; - -static const int idx_n_column_to_subblock[4][2] = { - {1, 2}, - {1, 3}, - {3, 2}, - {3, 3} -}; - -// clamp_mv_ref -#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units - -static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) { - clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER, - xd->mb_to_right_edge + MV_BORDER, - xd->mb_to_top_edge - MV_BORDER, - xd->mb_to_bottom_edge + MV_BORDER); -} - -// This function returns either the appropriate sub block or block's mv -// on whether the block_size < 8x8 and we have check_sub_blocks set. -static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv, - int search_col, int block_idx) { - return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8 - ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] - .as_mv[which_mv] - : candidate->mbmi.mv[which_mv]; -} - - -// Performs mv sign inversion if indicated by the reference frame combination. -static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref, - const MV_REFERENCE_FRAME this_ref_frame, - const int *ref_sign_bias) { - int_mv mv = mbmi->mv[ref]; - if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) { - mv.as_mv.row *= -1; - mv.as_mv.col *= -1; - } - return mv; -} - -// This macro is used to add a motion vector mv_ref list if it isn't -// already in the list. If it's the second motion vector it will also -// skip all additional processing and jump to done! -#define ADD_MV_REF_LIST(mv) \ - do { \ - if (refmv_count) { \ - if ((mv).as_int != mv_ref_list[0].as_int) { \ - mv_ref_list[refmv_count] = (mv); \ - goto Done; \ - } \ - } else { \ - mv_ref_list[refmv_count++] = (mv); \ - } \ - } while (0) - -// If either reference frame is different, not INTRA, and they -// are different from each other scale and add the mv to our list. -#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \ - do { \ - if (is_inter_block(mbmi)) { \ - if ((mbmi)->ref_frame[0] != ref_frame) \ - ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \ - if (has_second_ref(mbmi) && \ - (mbmi)->ref_frame[1] != ref_frame && \ - (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ - ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \ - } \ - } while (0) - - -// Checks that the given mi_row, mi_col and search point -// are inside the borders of the tile. -static INLINE int is_inside(const TileInfo *const tile, - int mi_col, int mi_row, int mi_rows, - const POSITION *mi_pos) { - return !(mi_row + mi_pos->row < 0 || - mi_col + mi_pos->col < tile->mi_col_start || - mi_row + mi_pos->row >= mi_rows || - mi_col + mi_pos->col >= tile->mi_col_end); -} - // This function searches the neighbourhood of a given MB/SB // to try and find candidate reference vectors. static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h index 903ac02bb..7bce3fa37 100644 --- a/vp9/common/vp9_mvref_common.h +++ b/vp9/common/vp9_mvref_common.h @@ -21,6 +21,181 @@ extern "C" { #define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\ VP9_INTERP_EXTEND) << 3) +#define MVREF_NEIGHBOURS 8 + +typedef struct position { + int row; + int col; +} POSITION; + +typedef enum { + BOTH_ZERO = 0, + ZERO_PLUS_PREDICTED = 1, + BOTH_PREDICTED = 2, + NEW_PLUS_NON_INTRA = 3, + BOTH_NEW = 4, + INTRA_PLUS_NON_INTRA = 5, + BOTH_INTRA = 6, + INVALID_CASE = 9 +} motion_vector_context; + +// This is used to figure out a context for the ref blocks. The code flattens +// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by +// adding 9 for each intra block, 3 for each zero mv and 1 for each new +// motion vector. This single number is then converted into a context +// with a single lookup ( counter_to_context ). +static const int mode_2_counter[MB_MODE_COUNT] = { + 9, // DC_PRED + 9, // V_PRED + 9, // H_PRED + 9, // D45_PRED + 9, // D135_PRED + 9, // D117_PRED + 9, // D153_PRED + 9, // D207_PRED + 9, // D63_PRED + 9, // TM_PRED + 0, // NEARESTMV + 0, // NEARMV + 3, // ZEROMV + 1, // NEWMV +}; + +// There are 3^3 different combinations of 3 counts that can be either 0,1 or +// 2. However the actual count can never be greater than 2 so the highest +// counter we need is 18. 9 is an invalid counter that's never used. +static const int counter_to_context[19] = { + BOTH_PREDICTED, // 0 + NEW_PLUS_NON_INTRA, // 1 + BOTH_NEW, // 2 + ZERO_PLUS_PREDICTED, // 3 + NEW_PLUS_NON_INTRA, // 4 + INVALID_CASE, // 5 + BOTH_ZERO, // 6 + INVALID_CASE, // 7 + INVALID_CASE, // 8 + INTRA_PLUS_NON_INTRA, // 9 + INTRA_PLUS_NON_INTRA, // 10 + INVALID_CASE, // 11 + INTRA_PLUS_NON_INTRA, // 12 + INVALID_CASE, // 13 + INVALID_CASE, // 14 + INVALID_CASE, // 15 + INVALID_CASE, // 16 + INVALID_CASE, // 17 + BOTH_INTRA // 18 +}; + +static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = { + // 4X4 + {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + // 4X8 + {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + // 8X4 + {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + // 8X8 + {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + // 8X16 + {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}}, + // 16X8 + {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}}, + // 16X16 + {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + // 16X32 + {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}}, + // 32X16 + {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + // 32X32 + {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + // 32X64 + {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}}, + // 64X32 + {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}}, + // 64X64 + {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}} +}; + +static const int idx_n_column_to_subblock[4][2] = { + {1, 2}, + {1, 3}, + {3, 2}, + {3, 3} +}; + +// clamp_mv_ref +#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units + +static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) { + clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER, + xd->mb_to_right_edge + MV_BORDER, + xd->mb_to_top_edge - MV_BORDER, + xd->mb_to_bottom_edge + MV_BORDER); +} + +// This function returns either the appropriate sub block or block's mv +// on whether the block_size < 8x8 and we have check_sub_blocks set. +static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv, + int search_col, int block_idx) { + return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8 + ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] + .as_mv[which_mv] + : candidate->mbmi.mv[which_mv]; +} + + +// Performs mv sign inversion if indicated by the reference frame combination. +static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref, + const MV_REFERENCE_FRAME this_ref_frame, + const int *ref_sign_bias) { + int_mv mv = mbmi->mv[ref]; + if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + return mv; +} + +// This macro is used to add a motion vector mv_ref list if it isn't +// already in the list. If it's the second motion vector it will also +// skip all additional processing and jump to done! +#define ADD_MV_REF_LIST(mv) \ + do { \ + if (refmv_count) { \ + if ((mv).as_int != mv_ref_list[0].as_int) { \ + mv_ref_list[refmv_count] = (mv); \ + goto Done; \ + } \ + } else { \ + mv_ref_list[refmv_count++] = (mv); \ + } \ + } while (0) + +// If either reference frame is different, not INTRA, and they +// are different from each other scale and add the mv to our list. +#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \ + do { \ + if (is_inter_block(mbmi)) { \ + if ((mbmi)->ref_frame[0] != ref_frame) \ + ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \ + if (has_second_ref(mbmi) && \ + (mbmi)->ref_frame[1] != ref_frame && \ + (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ + ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \ + } \ + } while (0) + + +// Checks that the given mi_row, mi_col and search point +// are inside the borders of the tile. +static INLINE int is_inside(const TileInfo *const tile, + int mi_col, int mi_row, int mi_rows, + const POSITION *mi_pos) { + return !(mi_row + mi_pos->row < 0 || + mi_col + mi_pos->col < tile->mi_col_start || + mi_row + mi_pos->row >= mi_rows || + mi_col + mi_pos->col >= tile->mi_col_end); +} + // TODO(jingning): this mv clamping function should be block size dependent. static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN, diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c index def12554d..3332e58e6 100644 --- a/vp9/common/vp9_quant_common.c +++ b/vp9/common/vp9_quant_common.c @@ -12,7 +12,6 @@ #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_seg_common.h" -#if 1 static const int16_t dc_qlookup[QINDEX_RANGE] = { 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, 19, 19, @@ -83,44 +82,6 @@ static const int16_t ac_qlookup[QINDEX_RANGE] = { 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828, }; -void vp9_init_quant_tables(void) { } -#else -static int16_t dc_qlookup[QINDEX_RANGE]; -static int16_t ac_qlookup[QINDEX_RANGE]; - -#define ACDC_MIN 8 - -// TODO(dkovalev) move to common and reuse -static double poly3(double a, double b, double c, double d, double x) { - return a*x*x*x + b*x*x + c*x + d; -} - -void vp9_init_quant_tables() { - int i, val = 4; - - // A "real" q of 1.0 forces lossless mode. - // In practice non lossless Q's between 1.0 and 2.0 (represented here by - // integer values from 5-7 give poor rd results (lower psnr and often - // larger size than the lossless encode. To block out those "not very useful" - // values we increment the ac and dc q lookup values by 4 after position 0. - ac_qlookup[0] = val; - dc_qlookup[0] = val; - val += 4; - - for (i = 1; i < QINDEX_RANGE; i++) { - const int ac_val = val; - - val = (int)(val * 1.01975); - if (val == ac_val) - ++val; - - ac_qlookup[i] = (int16_t)ac_val; - dc_qlookup[i] = (int16_t)MAX(ACDC_MIN, poly3(0.000000305, -0.00065, 0.9, - 0.5, ac_val)); - } -} -#endif - int16_t vp9_dc_quant(int qindex, int delta) { return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; } diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h index 581104006..d1545d93c 100644 --- a/vp9/common/vp9_quant_common.h +++ b/vp9/common/vp9_quant_common.h @@ -22,8 +22,6 @@ extern "C" { #define QINDEX_RANGE (MAXQ - MINQ + 1) #define QINDEX_BITS 8 -void vp9_init_quant_tables(); - int16_t vp9_dc_quant(int qindex, int delta); int16_t vp9_ac_quant(int qindex, int delta); diff --git a/vp9/common/vp9_scale.h b/vp9/common/vp9_scale.h index a9dda1889..04aae659f 100644 --- a/vp9/common/vp9_scale.h +++ b/vp9/common/vp9_scale.h @@ -46,8 +46,8 @@ static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) { } static INLINE int vp9_is_scaled(const struct scale_factors *sf) { - return sf->x_scale_fp != REF_NO_SCALE || - sf->y_scale_fp != REF_NO_SCALE; + return vp9_is_valid_scale(sf) && + (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE); } #ifdef __cplusplus diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index fc70035f2..9220a9eec 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -685,6 +685,10 @@ static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { while (max_ones-- && vp9_rb_read_bit(rb)) cm->log2_tile_cols++; + if (cm->log2_tile_cols > 6) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid number of tile columns"); + // rows cm->log2_tile_rows = vp9_rb_read_bit(rb); if (cm->log2_tile_rows) @@ -1077,7 +1081,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, // Show an existing frame directly. const int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)]; - if (cm->frame_bufs[frame_to_show].ref_count < 1) + if (frame_to_show < 0 || cm->frame_bufs[frame_to_show].ref_count < 1) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Buffer %d does not contain a decoded frame", frame_to_show); diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index 5859859fa..245c5f195 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -37,7 +37,6 @@ static void initialize_dec() { if (!init_done) { vp9_init_neighbors(); - vp9_init_quant_tables(); init_done = 1; } } @@ -244,8 +243,8 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, // Check if the previous frame was a frame without any references to it. // Release frame buffer if not decoding in frame parallel mode. - if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 && - cm->frame_bufs[cm->new_fb_idx].ref_count == 0) + if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 + && cm->frame_bufs[cm->new_fb_idx].ref_count == 0) cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer); cm->new_fb_idx = get_free_fb(cm); @@ -260,10 +259,10 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, // TODO(jkoleszar): Error concealment is undefined and non-normative // at this point, but if it becomes so, [0] may not always be the correct // thing to do here. - if (cm->frame_refs[0].idx != INT_MAX) + if (cm->frame_refs[0].idx != INT_MAX && cm->frame_refs[0].buf != NULL) cm->frame_refs[0].buf->corrupted = 1; - if (cm->frame_bufs[cm->new_fb_idx].ref_count > 0) + if (cm->new_fb_idx > 0 && cm->frame_bufs[cm->new_fb_idx].ref_count > 0) cm->frame_bufs[cm->new_fb_idx].ref_count--; return -1; diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h index a727e2aef..01c07f1a0 100644 --- a/vp9/decoder/vp9_dthread.h +++ b/vp9/decoder/vp9_dthread.h @@ -40,6 +40,23 @@ typedef struct VP9LfSyncData { int sync_range; } VP9LfSync; +// WorkerData for the FrameWorker thread. It contains all the information of +// the worker and decode structures for decoding a frame. +typedef struct FrameWorkerData { + struct VP9Decoder *pbi; + const uint8_t *data; + const uint8_t *data_end; + size_t data_size; + void *user_priv; + int result; + int worker_id; + + // scratch_buffer is used in frame parallel mode only. + // It is used to make a copy of the compressed data. + uint8_t *scratch_buffer; + size_t scratch_buffer_size; +} FrameWorkerData; + // Allocate memory for loopfilter row synchronization. void vp9_loop_filter_alloc(struct VP9Common *cm, VP9LfSync *lf_sync, int rows, int width); diff --git a/vp9/decoder/vp9_read_bit_buffer.c b/vp9/decoder/vp9_read_bit_buffer.c index 778a635e3..3eef72844 100644 --- a/vp9/decoder/vp9_read_bit_buffer.c +++ b/vp9/decoder/vp9_read_bit_buffer.c @@ -10,7 +10,7 @@ #include "vp9/decoder/vp9_read_bit_buffer.h" size_t vp9_rb_bytes_read(struct vp9_read_bit_buffer *rb) { - return rb->bit_offset / CHAR_BIT + (rb->bit_offset % CHAR_BIT > 0); + return (rb->bit_offset + CHAR_BIT - 1) / CHAR_BIT; } int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb) { diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 76f5e7bbe..1bf826a56 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -890,14 +890,8 @@ static void write_tile_info(VP9_COMMON *cm, struct vp9_write_bit_buffer *wb) { } static int get_refresh_mask(VP9_COMP *cpi) { - // Should the GF or ARF be updated using the transmitted frame or buffer -#if CONFIG_MULTIPLE_ARF - if (!cpi->multi_arf_enabled && cpi->refresh_golden_frame && - !cpi->refresh_alt_ref_frame) { -#else - if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame && - !cpi->use_svc) { -#endif + if (!cpi->multi_arf_allowed && cpi->refresh_golden_frame && + cpi->rc.is_src_frame_alt_ref && !cpi->use_svc) { // Preserve the previously existing golden frame and update the frame in // the alt ref slot instead. This is highly specific to the use of // alt-ref as a forward reference, and this needs to be generalized as @@ -910,15 +904,10 @@ static int get_refresh_mask(VP9_COMP *cpi) { (cpi->refresh_golden_frame << cpi->alt_fb_idx); } else { int arf_idx = cpi->alt_fb_idx; -#if CONFIG_MULTIPLE_ARF - // Determine which ARF buffer to use to encode this ARF frame. - if (cpi->multi_arf_enabled) { - int sn = cpi->sequence_number; - arf_idx = (cpi->frame_coding_order[sn] < 0) ? - cpi->arf_buffer_idx[sn + 1] : - cpi->arf_buffer_idx[sn]; + if ((cpi->pass == 2) && cpi->multi_arf_allowed) { + const GF_GROUP *const gf_group = &cpi->twopass.gf_group; + arf_idx = gf_group->arf_update_idx[gf_group->index]; } -#endif return (cpi->refresh_last_frame << cpi->lst_fb_idx) | (cpi->refresh_golden_frame << cpi->gld_fb_idx) | (cpi->refresh_alt_ref_frame << arf_idx); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 2463ed0f4..454d0da90 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -93,8 +93,6 @@ struct macroblock { int encode_breakout; - int in_active_map; - // note that token_costs is the cost when eob node is skipped vp9_coeff_cost token_costs[TX_SIZES]; diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c index 687b4c2b2..ff54033d0 100644 --- a/vp9/encoder/vp9_denoiser.c +++ b/vp9/encoder/vp9_denoiser.c @@ -8,10 +8,10 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include -#include -#include "vp9/encoder/vp9_denoiser.h" +#include #include "vpx_scale/yv12config.h" +#include "vpx/vpx_integer.h" +#include "vp9/encoder/vp9_denoiser.h" static const int widths[] = {4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64}; static const int heights[] = {4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 64, 32, 64}; @@ -20,30 +20,180 @@ int vp9_denoiser_filter() { return 0; } -void vp9_denoiser_denoise(VP9_DENOISER *denoiser, - MACROBLOCK *mb, MODE_INFO **grid, +static int update_running_avg(const uint8_t *mc_avg, int mc_avg_stride, + uint8_t *avg, int avg_stride, + const uint8_t *sig, int sig_stride, + int increase_denoising, BLOCK_SIZE bs) { + int r, c; + int diff, adj, absdiff; + int shift_inc1 = 0, shift_inc2 = 1; + int adj_val[] = {3, 4, 6}; + int total_adj = 0; + + if (increase_denoising) { + shift_inc1 = 1; + shift_inc2 = 2; + } + + for (r = 0; r < heights[bs]; ++r) { + for (c = 0; c < widths[bs]; ++c) { + diff = mc_avg[c] - sig[c]; + absdiff = abs(diff); + + if (absdiff <= 3 + shift_inc1) { + avg[c] = mc_avg[c]; + total_adj += diff; + } else { + switch (absdiff) { + case 4: case 5: case 6: case 7: + adj = adj_val[0]; + break; + case 8: case 9: case 10: case 11: + case 12: case 13: case 14: case 15: + adj = adj_val[1]; + break; + default: + adj = adj_val[2]; + } + if (diff > 0) { + avg[c] = MIN(UINT8_MAX, sig[c] + adj); + total_adj += adj; + } else { + avg[c] = MAX(0, sig[c] - adj); + total_adj -= adj; + } + } + } + sig += sig_stride; + avg += avg_stride; + mc_avg += mc_avg_stride; + } + return total_adj; +} + +static uint8_t *block_start(uint8_t *framebuf, int stride, + int mi_row, int mi_col) { + return framebuf + (stride * mi_row * 8) + (mi_col * 8); +} + +void copy_block(uint8_t *dest, int dest_stride, + uint8_t *src, int src_stride, BLOCK_SIZE bs) { + int r, c; + for (r = 0; r < heights[bs]; ++r) { + for (c = 0; c < widths[bs]; ++c) { + dest[c] = src[c]; + } + dest += dest_stride; + src += src_stride; + } +} + +void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs) { - return; + int decision = COPY_BLOCK; + + YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME]; + YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y; + uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col); + uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride, + mi_row, mi_col); + struct buf_2d src = mb->plane[0].src; + + + update_running_avg(mc_avg_start, mc_avg.y_stride, avg_start, avg.y_stride, + mb->plane[0].src.buf, mb->plane[0].src.stride, 0, bs); + + if (decision == FILTER_BLOCK) { + // TODO(tkopp) + } + if (decision == COPY_BLOCK) { + copy_block(avg_start, avg.y_stride, src.buf, src.stride, bs); + } +} + +static void copy_frame(YV12_BUFFER_CONFIG dest, const YV12_BUFFER_CONFIG src) { + int r, c; + const uint8_t *srcbuf = src.y_buffer; + uint8_t *destbuf = dest.y_buffer; + assert(dest.y_width == src.y_width); + assert(dest.y_height == src.y_height); + + for (r = 0; r < dest.y_height; ++r) { + for (c = 0; c < dest.y_width; ++c) { + destbuf[c] = srcbuf[c]; + } + destbuf += dest.y_stride; + srcbuf += src.y_stride; + } } void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, + YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame) { - return; + if (frame_type == KEY_FRAME) { + int i; + copy_frame(denoiser->running_avg_y[LAST_FRAME], src); + for (i = 2; i < MAX_REF_FRAMES - 1; i++) { + copy_frame(denoiser->running_avg_y[i], + denoiser->running_avg_y[LAST_FRAME]); + } + } else { /* For non key frames */ + if (refresh_alt_ref_frame) { + copy_frame(denoiser->running_avg_y[ALTREF_FRAME], + denoiser->running_avg_y[INTRA_FRAME]); + } + if (refresh_golden_frame) { + copy_frame(denoiser->running_avg_y[GOLDEN_FRAME], + denoiser->running_avg_y[INTRA_FRAME]); + } + if (refresh_last_frame) { + copy_frame(denoiser->running_avg_y[LAST_FRAME], + denoiser->running_avg_y[INTRA_FRAME]); + } + } } void vp9_denoiser_update_frame_stats() { - return; } int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, - int border) { + int ssx, int ssy, int border) { + int i, fail; + assert(denoiser != NULL); + + for (i = 0; i < MAX_REF_FRAMES; ++i) { + fail = vp9_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height, + ssx, ssy, border); + if (fail) { + vp9_denoiser_free(denoiser); + return 1; + } + } + + fail = vp9_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height, + ssx, ssy, border); + if (fail) { + vp9_denoiser_free(denoiser); + return 1; + } + return 0; } void vp9_denoiser_free(VP9_DENOISER *denoiser) { - return; + int i; + if (denoiser == NULL) { + return; + } + for (i = 0; i < MAX_REF_FRAMES; ++i) { + if (&denoiser->running_avg_y[i] != NULL) { + vp9_free_frame_buffer(&denoiser->running_avg_y[i]); + } + } + if (&denoiser->mc_running_avg_y != NULL) { + vp9_free_frame_buffer(&denoiser->mc_running_avg_y); + } } - diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h index a7a8d9329..18b9766a5 100644 --- a/vp9/encoder/vp9_denoiser.h +++ b/vp9/encoder/vp9_denoiser.h @@ -12,6 +12,7 @@ #define VP9_ENCODER_DENOISER_H_ #include "vp9/encoder/vp9_block.h" +#include "vpx_scale/yv12config.h" #ifdef __cplusplus extern "C" { @@ -23,24 +24,24 @@ enum vp9_denoiser_decision { }; typedef struct vp9_denoiser { - struct buf_2d running_avg_y; - struct buf_2d mc_running_avg_y; + YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES]; + YV12_BUFFER_CONFIG mc_running_avg_y; } VP9_DENOISER; void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, + YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame); -void vp9_denoiser_denoise(VP9_DENOISER *denoiser, - MACROBLOCK *mb, MODE_INFO **grid, +void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs); void vp9_denoiser_update_frame_stats(); int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, - int border); + int ssx, int ssy, int border); void vp9_denoiser_free(VP9_DENOISER *denoiser); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 001ac69bd..b9349a49a 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -139,42 +139,6 @@ static INLINE void set_modeinfo_offsets(VP9_COMMON *const cm, xd->mi[0] = cm->mi + idx_str; } -static int is_block_in_mb_map(const VP9_COMP *cpi, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - const VP9_COMMON *const cm = &cpi->common; - const int mb_rows = cm->mb_rows; - const int mb_cols = cm->mb_cols; - const int mb_row = mi_row >> 1; - const int mb_col = mi_col >> 1; - const int mb_width = num_8x8_blocks_wide_lookup[bsize] >> 1; - const int mb_height = num_8x8_blocks_high_lookup[bsize] >> 1; - int r, c; - if (bsize <= BLOCK_16X16) { - return cpi->active_map[mb_row * mb_cols + mb_col]; - } - for (r = 0; r < mb_height; ++r) { - for (c = 0; c < mb_width; ++c) { - int row = mb_row + r; - int col = mb_col + c; - if (row >= mb_rows || col >= mb_cols) - continue; - if (cpi->active_map[row * mb_cols + col]) - return 1; - } - } - return 0; -} - -static int check_active_map(const VP9_COMP *cpi, const MACROBLOCK *x, - int mi_row, int mi_col, - BLOCK_SIZE bsize) { - if (cpi->active_map_enabled && !x->e_mbd.lossless) { - return is_block_in_mb_map(cpi, mi_row, mi_col, bsize); - } else { - return 1; - } -} - static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize) { MACROBLOCK *const x = &cpi->mb; @@ -187,9 +151,6 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, set_skip_context(xd, mi_row, mi_col); - // Activity map pointer - x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); - set_modeinfo_offsets(cm, xd, mi_row, mi_col); mbmi = &xd->mi[0]->mbmi; @@ -723,7 +684,6 @@ static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, int *rate, xd->mi[0]->bmi[0].as_mv[0].as_int = 0; x->skip = 1; - x->skip_encode = 1; *rate = 0; *dist = 0; @@ -822,12 +782,18 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx, best_rd); } else { - if (bsize >= BLOCK_8X8) - vp9_rd_pick_inter_mode_sb(cpi, x, tile, mi_row, mi_col, - totalrate, totaldist, bsize, ctx, best_rd); - else + if (bsize >= BLOCK_8X8) { + if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) + vp9_rd_pick_inter_mode_sb_seg_skip(cpi, x, tile, mi_row, mi_col, + totalrate, totaldist, bsize, ctx, + best_rd); + else + vp9_rd_pick_inter_mode_sb(cpi, x, tile, mi_row, mi_col, + totalrate, totaldist, bsize, ctx, best_rd); + } else { vp9_rd_pick_inter_mode_sub8x8(cpi, x, tile, mi_row, mi_col, totalrate, totaldist, bsize, ctx, best_rd); + } } x->rdmult = orig_rdmult; @@ -1508,20 +1474,8 @@ static void rd_use_partition(VP9_COMP *cpi, if (bsize == BLOCK_16X16) { set_offsets(cpi, tile, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); - } else { - x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); } - if (!x->in_active_map) { - do_partition_search = 0; - if (mi_row + (mi_step >> 1) < cm->mi_rows && - mi_col + (mi_step >> 1) < cm->mi_cols) { - pc_tree->partitioning = PARTITION_NONE; - bs_type = mi_8x8[0]->mbmi.sb_type = bsize; - subsize = bsize; - partition = PARTITION_NONE; - } - } if (do_partition_search && cpi->sf.partition_search_type == SEARCH_PARTITION && cpi->sf.adjust_partitioning_from_last_frame) { @@ -1984,8 +1938,6 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (bsize == BLOCK_16X16) { set_offsets(cpi, tile, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); - } else { - x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); } // Determine partition types in search according to the speed features. // The threshold set here has to be of square block size. @@ -2018,8 +1970,6 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } - if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed)) - do_split = 0; // PARTITION_NONE if (partition_none_allowed) { rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize, @@ -2053,10 +2003,6 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } } - if (!x->in_active_map) { - do_split = 0; - do_rect = 0; - } restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); } @@ -2322,8 +2268,15 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); } else { + GF_GROUP * gf_grp = &cpi->twopass.gf_group; + int last_was_mid_sequence_overlay = 0; + if ((cpi->pass == 2) && (gf_grp->index)) { + if (gf_grp->update_type[gf_grp->index - 1] == OVERLAY_UPDATE) + last_was_mid_sequence_overlay = 1; + } if ((cm->current_video_frame % sf->last_partitioning_redo_frequency) == 0 + || last_was_mid_sequence_overlay || cm->prev_mi == 0 || cm->show_frame == 0 || cm->frame_type == KEY_FRAME @@ -2586,8 +2539,6 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, assert(num_8x8_blocks_wide_lookup[bsize] == num_8x8_blocks_high_lookup[bsize]); - x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); - // Determine partition types in search according to the speed features. // The threshold set here has to be of square block size. if (cpi->sf.auto_min_max_partition_size) { @@ -2606,15 +2557,13 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_vert_allowed &= force_vert_split; } - if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed)) - do_split = 0; - // PARTITION_NONE if (partition_none_allowed) { nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize); ctx->mic.mbmi = xd->mi[0]->mbmi; ctx->skip_txfm = x->skip_txfm; + ctx->skip = x->skip; if (this_rate != INT_MAX) { int pl = partition_plane_context(xd, mi_row, mi_col, bsize); @@ -2643,10 +2592,6 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } } - if (!x->in_active_map) { - do_split = 0; - do_rect = 0; - } } // store estimated motion vector @@ -2702,6 +2647,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi; pc_tree->horizontal[0].skip_txfm = x->skip_txfm; + pc_tree->horizontal[0].skip = x->skip; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); @@ -2712,6 +2658,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi; pc_tree->horizontal[1].skip_txfm = x->skip_txfm; + pc_tree->horizontal[1].skip = x->skip; if (this_rate == INT_MAX) { sum_rd = INT64_MAX; @@ -2742,6 +2689,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, &this_rate, &this_dist, subsize); pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi; pc_tree->vertical[0].skip_txfm = x->skip_txfm; + pc_tree->vertical[0].skip = x->skip; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) { load_pred_mv(x, ctx); @@ -2749,6 +2697,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, &this_rate, &this_dist, subsize); pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi; pc_tree->vertical[1].skip_txfm = x->skip_txfm; + pc_tree->vertical[1].skip = x->skip; if (this_rate == INT_MAX) { sum_rd = INT64_MAX; } else { @@ -2838,16 +2787,19 @@ static void nonrd_use_partition(VP9_COMP *cpi, nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); pc_tree->none.mic.mbmi = xd->mi[0]->mbmi; pc_tree->none.skip_txfm = x->skip_txfm; + pc_tree->none.skip = x->skip; break; case PARTITION_VERT: nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi; pc_tree->vertical[0].skip_txfm = x->skip_txfm; + pc_tree->vertical[0].skip = x->skip; if (mi_col + hbs < cm->mi_cols) { nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs, &rate, &dist, subsize); pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi; pc_tree->vertical[1].skip_txfm = x->skip_txfm; + pc_tree->vertical[1].skip = x->skip; if (rate != INT_MAX && dist != INT64_MAX && *totrate != INT_MAX && *totdist != INT64_MAX) { *totrate += rate; @@ -2859,11 +2811,13 @@ static void nonrd_use_partition(VP9_COMP *cpi, nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi; pc_tree->horizontal[0].skip_txfm = x->skip_txfm; + pc_tree->horizontal[0].skip = x->skip; if (mi_row + hbs < cm->mi_rows) { nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col, &rate, &dist, subsize); pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi; pc_tree->horizontal[1].skip_txfm = x->skip_txfm; + pc_tree->horizontal[1].skip = x->skip; if (rate != INT_MAX && dist != INT64_MAX && *totrate != INT_MAX && *totdist != INT64_MAX) { *totrate += rate; @@ -3364,7 +3318,10 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, &xd->block_refs[ref]->sf); } - vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); + if (!cpi->sf.reuse_inter_pred_sby) + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); + + vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); if (!x->skip) { mbmi->skip = 1; diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 525eccd56..6e07f08cf 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -64,6 +64,9 @@ void vp9_coef_tree_initialize(); // #define OUTPUT_YUV_REC +#ifdef OUTPUT_YUV_DENOISED +FILE *yuv_denoised_file; +#endif #ifdef OUTPUT_YUV_SRC FILE *yuv_file; #endif @@ -103,7 +106,7 @@ static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { } } -static void set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) { +void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) { MACROBLOCK *const mb = &cpi->mb; cpi->common.allow_high_precision_mv = allow_high_precision_mv; if (cpi->common.allow_high_precision_mv) { @@ -142,8 +145,6 @@ void vp9_initialize_enc() { if (!init_done) { vp9_init_neighbors(); - vp9_init_quant_tables(); - vp9_coef_tree_initialize(); vp9_tokenize_initialize(); vp9_init_me_luts(); @@ -173,10 +174,8 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vp9_cyclic_refresh_free(cpi->cyclic_refresh); cpi->cyclic_refresh = NULL; - vpx_free(cpi->active_map); - cpi->active_map = NULL; - vp9_free_frame_buffers(cm); + vp9_free_context_buffers(cm); vp9_free_frame_buffer(&cpi->last_frame_uf); vp9_free_frame_buffer(&cpi->scaled_source); @@ -414,39 +413,46 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) { "Failed to allocate altref buffer"); } -void vp9_alloc_compressor_data(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - +static void alloc_ref_frame_buffers(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; if (vp9_alloc_frame_buffers(cm, cm->width, cm->height)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffers"); +} - if (vp9_alloc_frame_buffer(&cpi->last_frame_uf, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, - VP9_ENC_BORDER_IN_PIXELS)) +static void alloc_util_frame_buffers(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if (vp9_realloc_frame_buffer(&cpi->last_frame_uf, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate last frame buffer"); - if (vp9_alloc_frame_buffer(&cpi->scaled_source, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, - VP9_ENC_BORDER_IN_PIXELS)) + if (vp9_realloc_frame_buffer(&cpi->scaled_source, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled source buffer"); - if (vp9_alloc_frame_buffer(&cpi->scaled_last_source, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, - VP9_ENC_BORDER_IN_PIXELS)) + if (vp9_realloc_frame_buffer(&cpi->scaled_last_source, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled last source buffer"); +} + +void vp9_alloc_compressor_data(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + + vp9_alloc_context_buffers(cm, cm->width, cm->height); vpx_free(cpi->tok); { unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols); - CHECK_MEM_ERROR(cm, cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok))); } @@ -456,41 +462,7 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { static void update_frame_size(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - vp9_update_frame_size(cm); - - // Update size of buffers local to this frame - if (vp9_realloc_frame_buffer(&cpi->last_frame_uf, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to reallocate last frame buffer"); - - if (vp9_realloc_frame_buffer(&cpi->scaled_source, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to reallocate scaled source buffer"); - - if (vp9_realloc_frame_buffer(&cpi->scaled_last_source, - cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to reallocate scaled last source buffer"); - - { - int y_stride = cpi->scaled_source.y_stride; - - if (cpi->sf.mv.search_method == NSTEP) { - vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride); - } else if (cpi->sf.mv.search_method == DIAMOND) { - vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride); - } - } - init_macroblockd(cm, xd); } @@ -518,6 +490,12 @@ static void set_tile_limits(VP9_COMP *cpi) { cm->log2_tile_rows = cpi->oxcf.tile_rows; } +static void init_buffer_indices(VP9_COMP *cpi) { + cpi->lst_fb_idx = 0; + cpi->gld_fb_idx = 1; + cpi->alt_fb_idx = 2; +} + static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { VP9_COMMON *const cm = &cpi->common; @@ -528,8 +506,6 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { cm->width = oxcf->width; cm->height = oxcf->height; - cm->subsampling_x = 0; - cm->subsampling_y = 0; vp9_alloc_compressor_data(cpi); // Spatial scalability. @@ -548,10 +524,9 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { vp9_change_config(cpi, oxcf); cpi->static_mb_pct = 0; + cpi->ref_frame_flags = 0; - cpi->lst_fb_idx = 0; - cpi->gld_fb_idx = 1; - cpi->alt_fb_idx = 2; + init_buffer_indices(cpi); set_tile_limits(cpi); } @@ -590,7 +565,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { cpi->pass = get_pass(cpi->oxcf.mode); rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; - cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 1; @@ -598,7 +572,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { cm->reset_frame_context = 0; vp9_reset_segment_features(&cm->seg); - set_high_precision_mv(cpi, 0); + vp9_set_high_precision_mv(cpi, 0); { int i; @@ -663,11 +637,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { (int)cpi->oxcf.target_bandwidth); } -#if CONFIG_MULTIPLE_ARF - vp9_zero(cpi->alt_ref_source); -#else cpi->alt_ref_source = NULL; -#endif rc->is_src_frame_alt_ref = 0; #if 0 @@ -683,6 +653,12 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { #if CONFIG_DENOISING vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height, + // TODO(tkopp) An unrelated bug causes + // cm->subsampling_{x,y} to be uninitialized at this point + // in execution. For now we assume YUV-420, which is x/y + // subsampling of 1. + 1, 1, + // cm->subsampling_x, cm->subsampling_y, VP9_ENC_BORDER_IN_PIXELS); #endif } @@ -781,10 +757,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy, vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); - CHECK_MEM_ERROR(cm, cpi->active_map, vpx_calloc(cm->MBs, 1)); - vpx_memset(cpi->active_map, 1, cm->MBs); - cpi->active_map_enabled = 0; - for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0])); i++) { CHECK_MEM_ERROR(cm, cpi->mbgraph_stats[i].mb_stats, @@ -794,18 +766,23 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { cpi->refresh_alt_ref_frame = 0; -#if CONFIG_MULTIPLE_ARF - // Turn multiple ARF usage on/off. This is a quick hack for the initial test - // version. It should eventually be set via the codec API. - cpi->multi_arf_enabled = 1; - - if (cpi->multi_arf_enabled) { - cpi->sequence_number = 0; - cpi->frame_coding_order_period = 0; - vp9_zero(cpi->frame_coding_order); - vp9_zero(cpi->arf_buffer_idx); + // Note that at the moment multi_arf will not work with svc. + // For the current check in all the execution paths are defaulted to 0 + // pending further tuning and testing. The code is left in place here + // as a place holder in regard to the required paths. + if (cpi->pass == 2) { + if (cpi->use_svc) { + cpi->multi_arf_allowed = 0; + cpi->multi_arf_enabled = 0; + } else { + // Disable by default for now. + cpi->multi_arf_allowed = 0; + cpi->multi_arf_enabled = 0; + } + } else { + cpi->multi_arf_allowed = 0; + cpi->multi_arf_enabled = 0; } -#endif cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; #if CONFIG_INTERNAL_STATS @@ -860,6 +837,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { cpi->mb.nmvsadcost_hp[1] = &cpi->mb.nmvsadcosts_hp[1][MV_MAX]; cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp); +#ifdef OUTPUT_YUV_DENOISED + yuv_denoised_file = fopen("denoised.yuv", "ab"); +#endif #ifdef OUTPUT_YUV_SRC yuv_file = fopen("bd.yuv", "ab"); #endif @@ -1105,6 +1085,9 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vp9_remove_common(&cpi->common); vpx_free(cpi); +#ifdef OUTPUT_YUV_DENOISED + fclose(yuv_denoised_file); +#endif #ifdef OUTPUT_YUV_SRC fclose(yuv_file); #endif @@ -1286,13 +1269,13 @@ int vp9_update_entropy(VP9_COMP * cpi, int update) { } -#ifdef OUTPUT_YUV_SRC -void vp9_write_yuv_frame(YV12_BUFFER_CONFIG *s) { +#if defined(OUTPUT_YUV_SRC) || defined(OUTPUT_YUV_DENOISED) +void vp9_write_yuv_frame(YV12_BUFFER_CONFIG *s, FILE *f) { uint8_t *src = s->y_buffer; int h = s->y_height; do { - fwrite(src, s->y_width, 1, yuv_file); + fwrite(src, s->y_width, 1, f); src += s->y_stride; } while (--h); @@ -1300,7 +1283,7 @@ void vp9_write_yuv_frame(YV12_BUFFER_CONFIG *s) { h = s->uv_height; do { - fwrite(src, s->uv_width, 1, yuv_file); + fwrite(src, s->uv_width, 1, f); src += s->uv_stride; } while (--h); @@ -1308,7 +1291,7 @@ void vp9_write_yuv_frame(YV12_BUFFER_CONFIG *s) { h = s->uv_height; do { - fwrite(src, s->uv_width, 1, yuv_file); + fwrite(src, s->uv_width, 1, f); src += s->uv_stride; } while (--h); } @@ -1509,14 +1492,8 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); - } -#if CONFIG_MULTIPLE_ARF - else if (!cpi->multi_arf_enabled && cpi->refresh_golden_frame && - !cpi->refresh_alt_ref_frame) { -#else - else if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame && - !cpi->use_svc) { -#endif + } else if (!cpi->multi_arf_allowed && cpi->refresh_golden_frame && + cpi->rc.is_src_frame_alt_ref && !cpi->use_svc) { /* Preserve the previously existing golden frame and update the frame in * the alt ref slot instead. This is highly specific to the current use of * alt-ref as a forward reference, and this needs to be generalized as @@ -1534,14 +1511,14 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { tmp = cpi->alt_fb_idx; cpi->alt_fb_idx = cpi->gld_fb_idx; cpi->gld_fb_idx = tmp; - } else { /* For non key/golden frames */ + } else { /* For non key/golden frames */ if (cpi->refresh_alt_ref_frame) { int arf_idx = cpi->alt_fb_idx; -#if CONFIG_MULTIPLE_ARF - if (cpi->multi_arf_enabled) { - arf_idx = cpi->arf_buffer_idx[cpi->sequence_number + 1]; + if ((cpi->pass == 2) && cpi->multi_arf_allowed) { + const GF_GROUP *const gf_group = &cpi->twopass.gf_group; + arf_idx = gf_group->arf_update_idx[gf_group->index]; } -#endif + ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx); } @@ -1558,6 +1535,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { } #if CONFIG_DENOISING vp9_denoiser_update_frame_info(&cpi->denoiser, + *cpi->Source, cpi->common.frame_type, cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame, @@ -1593,13 +1571,15 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { void vp9_scale_references(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; MV_REFERENCE_FRAME ref_frame; + const VP9_REFFRAME ref_mask[3] = {VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG}; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf; - if (ref->y_crop_width != cm->width || - ref->y_crop_height != cm->height) { + // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1). + if ((cpi->ref_frame_flags & ref_mask[ref_frame - 1]) && + (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height)) { const int new_fb = get_free_fb(cm); vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf, cm->width, cm->height, @@ -2121,8 +2101,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } #endif +#ifdef OUTPUT_YUV_DENOISED + vp9_write_yuv_frame(&cpi->denoiser.running_avg_y[INTRA_FRAME], + yuv_denoised_file); +#endif #ifdef OUTPUT_YUV_SRC - vp9_write_yuv_frame(cpi->Source); + vp9_write_yuv_frame(cpi->Source, yuv_file); #endif set_speed_features(cpi); @@ -2133,7 +2117,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (!frame_is_intra_only(cm)) { cm->interp_filter = DEFAULT_INTERP_FILTER; /* TODO: Decide this more intelligently */ - set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH); + vp9_set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH); } if (cpi->sf.recode_loop == DISALLOW_RECODE) { @@ -2221,31 +2205,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (cm->frame_type == KEY_FRAME) { // Tell the caller that the frame was coded as a key frame *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY; - -#if CONFIG_MULTIPLE_ARF - // Reset the sequence number. - if (cpi->multi_arf_enabled) { - cpi->sequence_number = 0; - cpi->frame_coding_order_period = cpi->new_frame_coding_order_period; - cpi->new_frame_coding_order_period = -1; - } -#endif } else { *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY; - -#if CONFIG_MULTIPLE_ARF - /* Increment position in the coded frame sequence. */ - if (cpi->multi_arf_enabled) { - ++cpi->sequence_number; - if (cpi->sequence_number >= cpi->frame_coding_order_period) { - cpi->sequence_number = 0; - cpi->frame_coding_order_period = cpi->new_frame_coding_order_period; - cpi->new_frame_coding_order_period = -1; - } - cpi->this_frame_weight = cpi->arf_weight[cpi->sequence_number]; - assert(cpi->this_frame_weight >= 0); - } -#endif } // Clear the one shot update flags for segmentation map and mode/ref loop @@ -2299,6 +2260,16 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, vp9_twopass_postencode_update(cpi); } +static void init_motion_estimation(VP9_COMP *cpi) { + int y_stride = cpi->scaled_source.y_stride; + + if (cpi->sf.mv.search_method == NSTEP) { + vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride); + } else if (cpi->sf.mv.search_method == DIAMOND) { + vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride); + } +} + static void check_initial_width(VP9_COMP *cpi, int subsampling_x, int subsampling_y) { VP9_COMMON *const cm = &cpi->common; @@ -2306,7 +2277,13 @@ static void check_initial_width(VP9_COMP *cpi, int subsampling_x, if (!cpi->initial_width) { cm->subsampling_x = subsampling_x; cm->subsampling_y = subsampling_y; + alloc_raw_frame_buffers(cpi); + alloc_ref_frame_buffers(cpi); + alloc_util_frame_buffers(cpi); + + init_motion_estimation(cpi); + cpi->initial_width = cm->width; cpi->initial_height = cm->height; } @@ -2321,11 +2298,22 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, int res = 0; const int subsampling_x = sd->uv_width < sd->y_width; const int subsampling_y = sd->uv_height < sd->y_height; + const int is_spatial_svc = cpi->use_svc && + (cpi->svc.number_temporal_layers == 1); check_initial_width(cpi, subsampling_x, subsampling_y); + vpx_usec_timer_start(&timer); - if (vp9_lookahead_push(cpi->lookahead, - sd, time_stamp, end_time, frame_flags)) + +#ifdef CONFIG_SPATIAL_SVC + if (is_spatial_svc) + res = vp9_svc_lookahead_push(cpi, cpi->lookahead, sd, time_stamp, end_time, + frame_flags); + else +#endif + res = vp9_lookahead_push(cpi->lookahead, + sd, time_stamp, end_time, frame_flags); + if (res) res = -1; vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); @@ -2353,13 +2341,6 @@ static int frame_is_reference(const VP9_COMP *cpi) { cm->seg.update_data; } -#if CONFIG_MULTIPLE_ARF -int is_next_frame_arf(VP9_COMP *cpi) { - // Negative entry in frame_coding_order indicates an ARF at this position. - return cpi->frame_coding_order[cpi->sequence_number + 1] < 0 ? 1 : 0; -} -#endif - void adjust_frame_rate(VP9_COMP *cpi) { int64_t this_duration; int step = 0; @@ -2398,6 +2379,46 @@ void adjust_frame_rate(VP9_COMP *cpi) { cpi->last_end_time_stamp_seen = cpi->source->ts_end; } +// Returns 0 if this is not an alt ref else the offset of the source frame +// used as the arf midpoint. +static int get_arf_src_index(VP9_COMP *cpi) { + RATE_CONTROL *const rc = &cpi->rc; + int arf_src_index = 0; + if (is_altref_enabled(&cpi->oxcf)) { + if (cpi->pass == 2) { + const GF_GROUP *const gf_group = &cpi->twopass.gf_group; + if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { + arf_src_index = gf_group->arf_src_offset[gf_group->index]; + } + } else if (rc->source_alt_ref_pending) { + arf_src_index = rc->frames_till_gf_update_due; + } + } + return arf_src_index; +} + +static void check_src_altref(VP9_COMP *cpi) { + RATE_CONTROL *const rc = &cpi->rc; + + if (cpi->pass == 2) { + const GF_GROUP *const gf_group = &cpi->twopass.gf_group; + rc->is_src_frame_alt_ref = + (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE); + } else { + rc->is_src_frame_alt_ref = cpi->alt_ref_source && + (cpi->source == cpi->alt_ref_source); + } + + if (rc->is_src_frame_alt_ref) { + // Current frame is an ARF overlay frame. + cpi->alt_ref_source = NULL; + + // Don't refresh the last buffer for an ARF overlay frame. It will + // become the GF so preserve last as an alternative prediction option. + cpi->refresh_last_frame = 0; + } +} + int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, int64_t *time_end, int flush) { @@ -2407,11 +2428,15 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, struct vpx_usec_timer cmptimer; YV12_BUFFER_CONFIG *force_src_buffer = NULL; MV_REFERENCE_FRAME ref_frame; + int arf_src_index; + const int is_spatial_svc = cpi->use_svc && + (cpi->svc.number_temporal_layers == 1); if (!cpi) return -1; - if (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2) { + if (is_spatial_svc && cpi->pass == 2) { + vp9_svc_lookahead_peek(cpi, cpi->lookahead, 0, 1); vp9_restore_layer_context(cpi); } @@ -2420,7 +2445,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->source = NULL; cpi->last_source = NULL; - set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV); + vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV); // Normal defaults cm->reset_frame_context = 0; @@ -2429,35 +2454,26 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->refresh_golden_frame = 0; cpi->refresh_alt_ref_frame = 0; - // Should we code an alternate reference frame. - if (is_altref_enabled(&cpi->oxcf) && rc->source_alt_ref_pending) { - int frames_to_arf; + // Should we encode an arf frame. + arf_src_index = get_arf_src_index(cpi); + if (arf_src_index) { + assert(arf_src_index <= rc->frames_to_key); -#if CONFIG_MULTIPLE_ARF - assert(!cpi->multi_arf_enabled || - cpi->frame_coding_order[cpi->sequence_number] < 0); - - if (cpi->multi_arf_enabled && (cpi->pass == 2)) - frames_to_arf = (-cpi->frame_coding_order[cpi->sequence_number]) - - cpi->next_frame_in_order; +#ifdef CONFIG_SPATIAL_SVC + if (is_spatial_svc) + cpi->source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, + arf_src_index, 1); else #endif - frames_to_arf = rc->frames_till_gf_update_due; - - assert(frames_to_arf <= rc->frames_to_key); - - if ((cpi->source = vp9_lookahead_peek(cpi->lookahead, frames_to_arf))) { -#if CONFIG_MULTIPLE_ARF - cpi->alt_ref_source[cpi->arf_buffered] = cpi->source; -#else + cpi->source = vp9_lookahead_peek(cpi->lookahead, arf_src_index); + if (cpi->source != NULL) { cpi->alt_ref_source = cpi->source; -#endif if (cpi->oxcf.arnr_max_frames > 0) { // Produce the filtered ARF frame. // TODO(agrange) merge these two functions. - vp9_configure_arnr_filter(cpi, frames_to_arf, rc->gfu_boost); - vp9_temporal_filter_prepare(cpi, frames_to_arf); + vp9_configure_arnr_filter(cpi, arf_src_index, rc->gfu_boost); + vp9_temporal_filter_prepare(cpi, arf_src_index); vp9_extend_frame_borders(&cpi->alt_ref_buffer); force_src_buffer = &cpi->alt_ref_buffer; } @@ -2467,59 +2483,38 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 0; rc->is_src_frame_alt_ref = 0; - -#if CONFIG_MULTIPLE_ARF - if (!cpi->multi_arf_enabled) -#endif - rc->source_alt_ref_pending = 0; + rc->source_alt_ref_pending = 0; } else { rc->source_alt_ref_pending = 0; } } if (!cpi->source) { -#if CONFIG_MULTIPLE_ARF - int i; -#endif - // Get last frame source. if (cm->current_video_frame > 0) { - if ((cpi->last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL) +#ifdef CONFIG_SPATIAL_SVC + if (is_spatial_svc) + cpi->last_source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, -1, 0); + else +#endif + cpi->last_source = vp9_lookahead_peek(cpi->lookahead, -1); + if (cpi->last_source == NULL) return -1; } - if ((cpi->source = vp9_lookahead_pop(cpi->lookahead, flush))) { + // Read in the source frame. +#ifdef CONFIG_SPATIAL_SVC + if (is_spatial_svc) + cpi->source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush); + else +#endif + cpi->source = vp9_lookahead_pop(cpi->lookahead, flush); + if (cpi->source != NULL) { cm->show_frame = 1; cm->intra_only = 0; -#if CONFIG_MULTIPLE_ARF - // Is this frame the ARF overlay. - rc->is_src_frame_alt_ref = 0; - for (i = 0; i < cpi->arf_buffered; ++i) { - if (cpi->source == cpi->alt_ref_source[i]) { - rc->is_src_frame_alt_ref = 1; - cpi->refresh_golden_frame = 1; - break; - } - } -#else - rc->is_src_frame_alt_ref = cpi->alt_ref_source && - (cpi->source == cpi->alt_ref_source); -#endif - if (rc->is_src_frame_alt_ref) { - // Current frame is an ARF overlay frame. -#if CONFIG_MULTIPLE_ARF - cpi->alt_ref_source[i] = NULL; -#else - cpi->alt_ref_source = NULL; -#endif - // Don't refresh the last buffer for an ARF overlay frame. It will - // become the GF so preserve last as an alternative prediction option. - cpi->refresh_last_frame = 0; - } -#if CONFIG_MULTIPLE_ARF - ++cpi->next_frame_in_order; -#endif + // Check to see if the frame should be encoded as an arf overlay. + check_src_altref(cpi); } } @@ -2527,20 +2522,17 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->un_scaled_source = cpi->Source = force_src_buffer ? force_src_buffer : &cpi->source->img; - if (cpi->last_source != NULL) { - cpi->unscaled_last_source = &cpi->last_source->img; - } else { - cpi->unscaled_last_source = NULL; - } + if (cpi->last_source != NULL) { + cpi->unscaled_last_source = &cpi->last_source->img; + } else { + cpi->unscaled_last_source = NULL; + } *time_stamp = cpi->source->ts_start; *time_end = cpi->source->ts_end; - *frame_flags = cpi->source->flags; + *frame_flags = + (cpi->source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0; -#if CONFIG_MULTIPLE_ARF - if (cm->frame_type != KEY_FRAME && cpi->pass == 2) - rc->source_alt_ref_pending = is_next_frame_arf(cpi); -#endif } else { *size = 0; if (flush && cpi->pass == 1 && !cpi->twopass.first_pass_done) { @@ -2578,16 +2570,14 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cm->frame_bufs[cm->new_fb_idx].ref_count--; cm->new_fb_idx = get_free_fb(cm); -#if CONFIG_MULTIPLE_ARF - /* Set up the correct ARF frame. */ - if (cpi->refresh_alt_ref_frame) { - ++cpi->arf_buffered; + if (!cpi->use_svc && cpi->multi_arf_allowed) { + if (cm->frame_type == KEY_FRAME) { + init_buffer_indices(cpi); + } else if (cpi->pass == 2) { + const GF_GROUP *const gf_group = &cpi->twopass.gf_group; + cpi->alt_fb_idx = gf_group->arf_ref_idx[gf_group->index]; + } } - if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) && - (cpi->pass == 2)) { - cpi->alt_fb_idx = cpi->arf_buffer_idx[cpi->sequence_number]; - } -#endif cpi->frame_flags = *frame_flags; @@ -2606,6 +2596,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cm->subsampling_x, cm->subsampling_y, VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL); + alloc_util_frame_buffers(cpi); + init_motion_estimation(cpi); + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; YV12_BUFFER_CONFIG *const buf = &cm->frame_bufs[idx].buf; @@ -2775,16 +2768,23 @@ int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols) { if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) { + const int mi_rows = cpi->common.mi_rows; + const int mi_cols = cpi->common.mi_cols; if (map) { - vpx_memcpy(cpi->active_map, map, rows * cols); - cpi->active_map_enabled = 1; + int r, c; + for (r = 0; r < mi_rows; r++) { + for (c = 0; c < mi_cols; c++) { + cpi->segmentation_map[r * mi_cols + c] = + !map[(r >> 1) * cols + (c >> 1)]; + } + } + vp9_enable_segfeature(&cpi->common.seg, 1, SEG_LVL_SKIP); + vp9_enable_segmentation(&cpi->common.seg); } else { - cpi->active_map_enabled = 0; + vp9_disable_segmentation(&cpi->common.seg); } - return 0; } else { - // cpi->active_map_enabled = 0; return -1; } } @@ -2863,3 +2863,42 @@ int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) { int vp9_get_quantizer(VP9_COMP *cpi) { return cpi->common.base_qindex; } + +void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) { + if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF)) { + int ref = 7; + + if (flags & VP8_EFLAG_NO_REF_LAST) + ref ^= VP9_LAST_FLAG; + + if (flags & VP8_EFLAG_NO_REF_GF) + ref ^= VP9_GOLD_FLAG; + + if (flags & VP8_EFLAG_NO_REF_ARF) + ref ^= VP9_ALT_FLAG; + + vp9_use_as_reference(cpi, ref); + } + + if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF | + VP8_EFLAG_FORCE_ARF)) { + int upd = 7; + + if (flags & VP8_EFLAG_NO_UPD_LAST) + upd ^= VP9_LAST_FLAG; + + if (flags & VP8_EFLAG_NO_UPD_GF) + upd ^= VP9_GOLD_FLAG; + + if (flags & VP8_EFLAG_NO_UPD_ARF) + upd ^= VP9_ALT_FLAG; + + vp9_update_reference(cpi, upd); + } + + if (flags & VP8_EFLAG_NO_UPD_ENTROPY) { + vp9_update_entropy(cpi, 0); + } +} diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index a27868a60..47649a863 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -32,6 +32,7 @@ #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" +#include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_tokenize.h" @@ -46,9 +47,6 @@ extern "C" { #define DEFAULT_GF_INTERVAL 10 -#define MAX_MODES 30 -#define MAX_REFS 6 - typedef struct { int nmvjointcost[MV_JOINTS]; int nmvcosts[2][MV_VALS]; @@ -66,56 +64,6 @@ typedef struct { FRAME_CONTEXT fc; } CODING_CONTEXT; -// This enumerator type needs to be kept aligned with the mode order in -// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code. -typedef enum { - THR_NEARESTMV, - THR_NEARESTA, - THR_NEARESTG, - - THR_DC, - - THR_NEWMV, - THR_NEWA, - THR_NEWG, - - THR_NEARMV, - THR_NEARA, - THR_COMP_NEARESTLA, - THR_COMP_NEARESTGA, - - THR_TM, - - THR_COMP_NEARLA, - THR_COMP_NEWLA, - THR_NEARG, - THR_COMP_NEARGA, - THR_COMP_NEWGA, - - THR_ZEROMV, - THR_ZEROG, - THR_ZEROA, - THR_COMP_ZEROLA, - THR_COMP_ZEROGA, - - THR_H_PRED, - THR_V_PRED, - THR_D135_PRED, - THR_D207_PRED, - THR_D153_PRED, - THR_D63_PRED, - THR_D117_PRED, - THR_D45_PRED, -} THR_MODES; - -typedef enum { - THR_LAST, - THR_GOLD, - THR_ALTR, - THR_COMP_LA, - THR_COMP_GA, - THR_INTRA, -} THR_MODES_SUB8X8; typedef enum { // encode_breakout is disabled. @@ -293,32 +241,6 @@ static INLINE int is_best_mode(MODE mode) { return mode == ONE_PASS_BEST || mode == TWO_PASS_SECOND_BEST; } -typedef struct RD_OPT { - // Thresh_mult is used to set a threshold for the rd score. A higher value - // means that we will accept the best mode so far more often. This number - // is used in combination with the current block size, and thresh_freq_fact - // to pick a threshold. - int thresh_mult[MAX_MODES]; - int thresh_mult_sub8x8[MAX_REFS]; - - int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; - int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; - - int64_t comp_pred_diff[REFERENCE_MODES]; - int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; - int64_t tx_select_diff[TX_MODES]; - // FIXME(rbultje) can this overflow? - int tx_select_threshes[MAX_REF_FRAMES][TX_MODES]; - - int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS]; - int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; - int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; - int64_t mask_filter; - - int RDMULT; - int RDDIV; -} RD_OPT; - typedef struct VP9_COMP { QUANTS quants; MACROBLOCK mb; @@ -326,11 +248,7 @@ typedef struct VP9_COMP { VP9EncoderConfig oxcf; struct lookahead_ctx *lookahead; struct lookahead_entry *source; -#if CONFIG_MULTIPLE_ARF - struct lookahead_entry *alt_ref_source[REF_FRAMES]; -#else struct lookahead_entry *alt_ref_source; -#endif struct lookahead_entry *last_source; YV12_BUFFER_CONFIG *Source; @@ -349,9 +267,6 @@ typedef struct VP9_COMP { int gld_fb_idx; int alt_fb_idx; -#if CONFIG_MULTIPLE_ARF - int alt_ref_fb_idx[REF_FRAMES - 3]; -#endif int refresh_last_frame; int refresh_golden_frame; int refresh_alt_ref_frame; @@ -369,13 +284,6 @@ typedef struct VP9_COMP { TOKENEXTRA *tok; unsigned int tok_count[4][1 << 6]; -#if CONFIG_MULTIPLE_ARF - // Position within a frame coding order (including any additional ARF frames). - unsigned int sequence_number; - // Next frame in naturally occurring order that has not yet been coded. - int next_frame_in_order; -#endif - // Ambient reconstruction err target for force key frames int ambient_err; @@ -425,9 +333,6 @@ typedef struct VP9_COMP { unsigned char *complexity_map; - unsigned char *active_map; - unsigned int active_map_enabled; - CYCLIC_REFRESH *cyclic_refresh; fractional_mv_step_fp *find_fractional_mv_step; @@ -511,18 +416,8 @@ typedef struct VP9_COMP { PC_TREE *pc_root; int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES]; -#if CONFIG_MULTIPLE_ARF - // ARF tracking variables. + int multi_arf_allowed; int multi_arf_enabled; - unsigned int frame_coding_order_period; - unsigned int new_frame_coding_order_period; - int frame_coding_order[MAX_LAG_BUFFERS * 2]; - int arf_buffer_idx[MAX_LAG_BUFFERS * 3 / 2]; - int arf_weight[MAX_LAG_BUFFERS]; - int arf_buffered; - int this_frame_weight; - int max_arf_level; -#endif #if CONFIG_DENOISING VP9_DENOISER denoiser; @@ -622,10 +517,14 @@ void vp9_update_reference_frames(VP9_COMP *cpi); int64_t vp9_rescale(int64_t val, int64_t num, int denom); +void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv); + YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled); +void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); + static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) { diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 5e82bb3f4..d0dd18213 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -33,7 +33,6 @@ #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_quantize.h" -#include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_variance.h" @@ -56,14 +55,7 @@ #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001) #define MIN_KF_BOOST 300 - -#if CONFIG_MULTIPLE_ARF -// Set MIN_GF_INTERVAL to 1 for the full decomposition. -#define MIN_GF_INTERVAL 2 -#else -#define MIN_GF_INTERVAL 4 -#endif - +#define MIN_GF_INTERVAL 4 #define LONG_TERM_VBR_CORRECTION static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { @@ -497,6 +489,8 @@ void vp9_first_pass(VP9_COMP *cpi) { &cpi->scaled_source); } + vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); + vp9_setup_src_planes(x, cpi->Source, 0, 0); vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL); vp9_setup_dst_planes(xd->plane, new_yv12, 0, 0); @@ -504,8 +498,6 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->mi = cm->mi_grid_visible; xd->mi[0] = cm->mi; - vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); - vp9_frame_init_quantizer(cpi); for (i = 0; i < MAX_MB_PLANE; ++i) { @@ -615,7 +607,8 @@ void vp9_first_pass(VP9_COMP *cpi) { &unscaled_last_source_buf_2d); // TODO(pengchong): Replace the hard-coded threshold - if (raw_motion_error > 25) { + if (raw_motion_error > 25 || + (cpi->use_svc && cpi->svc.number_temporal_layers == 1)) { // Test last reference frame using the previous best mv as the // starting point (best reference) for the search. first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv, @@ -1221,144 +1214,6 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, return arf_boost; } -#if CONFIG_MULTIPLE_ARF -// Work out the frame coding order for a GF or an ARF group. -// The current implementation codes frames in their natural order for a -// GF group, and inserts additional ARFs into an ARF group using a -// binary split approach. -// NOTE: this function is currently implemented recursively. -static void schedule_frames(VP9_COMP *cpi, const int start, const int end, - const int arf_idx, const int gf_or_arf_group, - const int level) { - int i, abs_end, half_range; - int *cfo = cpi->frame_coding_order; - int idx = cpi->new_frame_coding_order_period; - - // If (end < 0) an ARF should be coded at position (-end). - assert(start >= 0); - - // printf("start:%d end:%d\n", start, end); - - // GF Group: code frames in logical order. - if (gf_or_arf_group == 0) { - assert(end >= start); - for (i = start; i <= end; ++i) { - cfo[idx] = i; - cpi->arf_buffer_idx[idx] = arf_idx; - cpi->arf_weight[idx] = -1; - ++idx; - } - cpi->new_frame_coding_order_period = idx; - return; - } - - // ARF Group: Work out the ARF schedule and mark ARF frames as negative. - if (end < 0) { - // printf("start:%d end:%d\n", -end, -end); - // ARF frame is at the end of the range. - cfo[idx] = end; - // What ARF buffer does this ARF use as predictor. - cpi->arf_buffer_idx[idx] = (arf_idx > 2) ? (arf_idx - 1) : 2; - cpi->arf_weight[idx] = level; - ++idx; - abs_end = -end; - } else { - abs_end = end; - } - - half_range = (abs_end - start) >> 1; - - // ARFs may not be adjacent, they must be separated by at least - // MIN_GF_INTERVAL non-ARF frames. - if ((start + MIN_GF_INTERVAL) >= (abs_end - MIN_GF_INTERVAL)) { - // printf("start:%d end:%d\n", start, abs_end); - // Update the coding order and active ARF. - for (i = start; i <= abs_end; ++i) { - cfo[idx] = i; - cpi->arf_buffer_idx[idx] = arf_idx; - cpi->arf_weight[idx] = -1; - ++idx; - } - cpi->new_frame_coding_order_period = idx; - } else { - // Place a new ARF at the mid-point of the range. - cpi->new_frame_coding_order_period = idx; - schedule_frames(cpi, start, -(start + half_range), arf_idx + 1, - gf_or_arf_group, level + 1); - schedule_frames(cpi, start + half_range + 1, abs_end, arf_idx, - gf_or_arf_group, level + 1); - } -} - -#define FIXED_ARF_GROUP_SIZE 16 - -void define_fixed_arf_period(VP9_COMP *cpi) { - int i; - int max_level = INT_MIN; - - assert(cpi->multi_arf_enabled); - assert(cpi->oxcf.lag_in_frames >= FIXED_ARF_GROUP_SIZE); - - // Save the weight of the last frame in the sequence before next - // sequence pattern overwrites it. - cpi->this_frame_weight = cpi->arf_weight[cpi->sequence_number]; - assert(cpi->this_frame_weight >= 0); - - cpi->twopass.gf_zeromotion_pct = 0; - - // Initialize frame coding order variables. - cpi->new_frame_coding_order_period = 0; - cpi->next_frame_in_order = 0; - cpi->arf_buffered = 0; - vp9_zero(cpi->frame_coding_order); - vp9_zero(cpi->arf_buffer_idx); - vpx_memset(cpi->arf_weight, -1, sizeof(cpi->arf_weight)); - - if (cpi->rc.frames_to_key <= (FIXED_ARF_GROUP_SIZE + 8)) { - // Setup a GF group close to the keyframe. - cpi->rc.source_alt_ref_pending = 0; - cpi->rc.baseline_gf_interval = cpi->rc.frames_to_key; - schedule_frames(cpi, 0, (cpi->rc.baseline_gf_interval - 1), 2, 0, 0); - } else { - // Setup a fixed period ARF group. - cpi->rc.source_alt_ref_pending = 1; - cpi->rc.baseline_gf_interval = FIXED_ARF_GROUP_SIZE; - schedule_frames(cpi, 0, -(cpi->rc.baseline_gf_interval - 1), 2, 1, 0); - } - - // Replace level indicator of -1 with correct level. - for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { - if (cpi->arf_weight[i] > max_level) { - max_level = cpi->arf_weight[i]; - } - } - ++max_level; - for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { - if (cpi->arf_weight[i] == -1) { - cpi->arf_weight[i] = max_level; - } - } - cpi->max_arf_level = max_level; -#if 0 - printf("\nSchedule: "); - for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { - printf("%4d ", cpi->frame_coding_order[i]); - } - printf("\n"); - printf("ARFref: "); - for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { - printf("%4d ", cpi->arf_buffer_idx[i]); - } - printf("\n"); - printf("Weight: "); - for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { - printf("%4d ", cpi->arf_weight[i]); - } - printf("\n"); -#endif -} -#endif - // Calculate a section intra ratio used in setting max loop filter. static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin, const FIRSTPASS_STATS *end, @@ -1428,6 +1283,18 @@ static int calculate_boost_bits(int frame_count, return MAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0); } +// Current limit on maximum number of active arfs in a GF/ARF group. +#define MAX_ACTIVE_ARFS 2 +#define ARF_SLOT1 2 +#define ARF_SLOT2 3 +// This function indirects the choice of buffers for arfs. +// At the moment the values are fixed but this may change as part of +// the integration process with other codec features that swap buffers around. +static void get_arf_buffer_indices(unsigned char *arf_buffer_indices) { + arf_buffer_indices[0] = ARF_SLOT1; + arf_buffer_indices[1] = ARF_SLOT2; +} + static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, double group_error, int gf_arf_bits) { RATE_CONTROL *const rc = &cpi->rc; @@ -1435,42 +1302,85 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, TWO_PASS *twopass = &cpi->twopass; FIRSTPASS_STATS frame_stats; int i; - int group_frame_index = 1; + int frame_index = 1; int target_frame_size; int key_frame; const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf); int64_t total_group_bits = gf_group_bits; double modified_err = 0.0; double err_fraction; + int mid_boost_bits = 0; + int mid_frame_idx; + unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS]; key_frame = cpi->common.frame_type == KEY_FRAME || vp9_is_upper_layer_key_frame(cpi); + get_arf_buffer_indices(arf_buffer_indices); + // For key frames the frame target rate is already set and it // is also the golden frame. - // NOTE: We dont bother to check for the special case of ARF overlay - // frames here, as there is clamping code for this in the function - // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass - // encodes. if (!key_frame) { - twopass->gf_group_bit_allocation[0] = gf_arf_bits; + if (rc->source_alt_ref_active) { + twopass->gf_group.update_type[0] = OVERLAY_UPDATE; + twopass->gf_group.rf_level[0] = INTER_NORMAL; + twopass->gf_group.bit_allocation[0] = 0; + twopass->gf_group.arf_update_idx[0] = arf_buffer_indices[0]; + twopass->gf_group.arf_ref_idx[0] = arf_buffer_indices[0]; + } else { + twopass->gf_group.update_type[0] = GF_UPDATE; + twopass->gf_group.rf_level[0] = GF_ARF_STD; + twopass->gf_group.bit_allocation[0] = gf_arf_bits; + twopass->gf_group.arf_update_idx[0] = arf_buffer_indices[0]; + twopass->gf_group.arf_ref_idx[0] = arf_buffer_indices[0]; + } // Step over the golden frame / overlay frame if (EOF == input_stats(twopass, &frame_stats)) return; } - // Store the bits to spend on the ARF if there is one. - if (rc->source_alt_ref_pending) { - twopass->gf_group_bit_allocation[group_frame_index++] = gf_arf_bits; - } - - // Deduct the boost bits for arf or gf if it is not a key frame. + // Deduct the boost bits for arf (or gf if it is not a key frame) + // from the group total. if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits; + // Store the bits to spend on the ARF if there is one. + if (rc->source_alt_ref_pending) { + if (cpi->multi_arf_enabled) { + // A portion of the gf / arf extra bits are set asside for lower level + // boosted frames in the middle of the group. + mid_boost_bits += gf_arf_bits >> 5; + gf_arf_bits -= (gf_arf_bits >> 5); + } + + twopass->gf_group.update_type[frame_index] = ARF_UPDATE; + twopass->gf_group.rf_level[frame_index] = GF_ARF_STD; + twopass->gf_group.bit_allocation[frame_index] = gf_arf_bits; + twopass->gf_group.arf_src_offset[frame_index] = + (unsigned char)(rc->baseline_gf_interval - 1); + twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[0]; + twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0]; + ++frame_index; + + if (cpi->multi_arf_enabled) { + // Set aside a slot for a level 1 arf. + twopass->gf_group.update_type[frame_index] = ARF_UPDATE; + twopass->gf_group.rf_level[frame_index] = GF_ARF_LOW; + twopass->gf_group.arf_src_offset[frame_index] = + (unsigned char)((rc->baseline_gf_interval >> 1) - 1); + twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[1]; + twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0]; + ++frame_index; + } + } + + // Define middle frame + mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1; + // Allocate bits to the other frames in the group. for (i = 0; i < rc->baseline_gf_interval - 1; ++i) { + int arf_idx = 0; if (EOF == input_stats(twopass, &frame_stats)) break; @@ -1482,10 +1392,48 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, err_fraction = 0.0; target_frame_size = (int)((double)total_group_bits * err_fraction); + + if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) { + mid_boost_bits += (target_frame_size >> 4); + target_frame_size -= (target_frame_size >> 4); + + if (frame_index <= mid_frame_idx) + arf_idx = 1; + } + twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[arf_idx]; + twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx]; + target_frame_size = clamp(target_frame_size, 0, MIN(max_bits, (int)total_group_bits)); - twopass->gf_group_bit_allocation[group_frame_index++] = target_frame_size; + twopass->gf_group.update_type[frame_index] = LF_UPDATE; + twopass->gf_group.rf_level[frame_index] = INTER_NORMAL; + + twopass->gf_group.bit_allocation[frame_index] = target_frame_size; + ++frame_index; + } + + // Note: + // We need to configure the frame at the end of the sequence + 1 that will be + // the start frame for the next group. Otherwise prior to the call to + // vp9_rc_get_second_pass_params() the data will be undefined. + twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[0]; + twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0]; + + if (rc->source_alt_ref_pending) { + twopass->gf_group.update_type[frame_index] = OVERLAY_UPDATE; + twopass->gf_group.rf_level[frame_index] = INTER_NORMAL; + + // Final setup for second arf and its overlay. + if (cpi->multi_arf_enabled) { + twopass->gf_group.bit_allocation[2] = + twopass->gf_group.bit_allocation[mid_frame_idx] + mid_boost_bits; + twopass->gf_group.update_type[mid_frame_idx] = OVERLAY_UPDATE; + twopass->gf_group.bit_allocation[mid_frame_idx] = 0; + } + } else { + twopass->gf_group.update_type[frame_index] = GF_UPDATE; + twopass->gf_group.rf_level[frame_index] = GF_ARF_STD; } } @@ -1528,8 +1476,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Reset the GF group data structures unless this is a key // frame in which case it will already have been done. if (cpi->common.frame_type != KEY_FRAME) { - twopass->gf_group_index = 0; - vp9_zero(twopass->gf_group_bit_allocation); + vp9_zero(twopass->gf_group); } vp9_clear_system_state(); @@ -1651,24 +1598,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } } -#if CONFIG_MULTIPLE_ARF - if (cpi->multi_arf_enabled) { - // Initialize frame coding order variables. - cpi->new_frame_coding_order_period = 0; - cpi->next_frame_in_order = 0; - cpi->arf_buffered = 0; - vp9_zero(cpi->frame_coding_order); - vp9_zero(cpi->arf_buffer_idx); - vpx_memset(cpi->arf_weight, -1, sizeof(cpi->arf_weight)); - } -#endif - // Set the interval until the next gf. if (cpi->common.frame_type == KEY_FRAME || rc->source_alt_ref_active) rc->baseline_gf_interval = i - 1; else rc->baseline_gf_interval = i; + rc->frames_till_gf_update_due = rc->baseline_gf_interval; + // Should we use the alternate reference frame. if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) && @@ -1681,62 +1618,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { &b_boost); rc->source_alt_ref_pending = 1; -#if CONFIG_MULTIPLE_ARF - // Set the ARF schedule. - if (cpi->multi_arf_enabled) { - schedule_frames(cpi, 0, -(rc->baseline_gf_interval - 1), 2, 1, 0); - } -#endif } else { rc->gfu_boost = (int)boost_score; rc->source_alt_ref_pending = 0; -#if CONFIG_MULTIPLE_ARF - // Set the GF schedule. - if (cpi->multi_arf_enabled) { - schedule_frames(cpi, 0, rc->baseline_gf_interval - 1, 2, 0, 0); - assert(cpi->new_frame_coding_order_period == - rc->baseline_gf_interval); - } -#endif } -#if CONFIG_MULTIPLE_ARF - if (cpi->multi_arf_enabled && (cpi->common.frame_type != KEY_FRAME)) { - int max_level = INT_MIN; - // Replace level indicator of -1 with correct level. - for (i = 0; i < cpi->frame_coding_order_period; ++i) { - if (cpi->arf_weight[i] > max_level) { - max_level = cpi->arf_weight[i]; - } - } - ++max_level; - for (i = 0; i < cpi->frame_coding_order_period; ++i) { - if (cpi->arf_weight[i] == -1) { - cpi->arf_weight[i] = max_level; - } - } - cpi->max_arf_level = max_level; - } -#if 0 - if (cpi->multi_arf_enabled) { - printf("\nSchedule: "); - for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { - printf("%4d ", cpi->frame_coding_order[i]); - } - printf("\n"); - printf("ARFref: "); - for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { - printf("%4d ", cpi->arf_buffer_idx[i]); - } - printf("\n"); - printf("Weight: "); - for (i = 0; i < cpi->new_frame_coding_order_period; ++i) { - printf("%4d ", cpi->arf_weight[i]); - } - printf("\n"); - } -#endif -#endif // Reset the file position. reset_fpf_position(twopass, start_pos); @@ -1886,8 +1772,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { cpi->common.frame_type = KEY_FRAME; // Reset the GF group data structures. - twopass->gf_group_index = 0; - vp9_zero(twopass->gf_group_bit_allocation); + vp9_zero(twopass->gf_group); // Is this a forced key frame by interval. rc->this_key_frame_forced = rc->next_key_frame_forced; @@ -2078,7 +1963,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->kf_group_bits -= kf_bits; // Save the bits to spend on the key frame. - twopass->gf_group_bit_allocation[0] = kf_bits; + twopass->gf_group.bit_allocation[0] = kf_bits; + twopass->gf_group.update_type[0] = KF_UPDATE; + twopass->gf_group.rf_level[0] = KF_STD; // Note the total error score of the kf group minus the key frame itself. twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err); @@ -2106,6 +1993,44 @@ void vbr_rate_correction(int * this_frame_target, } } +// Define the reference buffers that will be updated post encode. +void configure_buffer_updates(VP9_COMP *cpi) { + TWO_PASS *const twopass = &cpi->twopass; + + cpi->rc.is_src_frame_alt_ref = 0; + switch (twopass->gf_group.update_type[twopass->gf_group.index]) { + case KF_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 1; + break; + case LF_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 0; + cpi->refresh_alt_ref_frame = 0; + break; + case GF_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 0; + break; + case OVERLAY_UPDATE: + cpi->refresh_last_frame = 0; + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 0; + cpi->rc.is_src_frame_alt_ref = 1; + break; + case ARF_UPDATE: + cpi->refresh_last_frame = 0; + cpi->refresh_golden_frame = 0; + cpi->refresh_alt_ref_frame = 1; + break; + default: + assert(0); + } +} + + void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; @@ -2130,14 +2055,12 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { if (!twopass->stats_in) return; - // Increment the gf group index. - ++twopass->gf_group_index; - // If this is an arf frame then we dont want to read the stats file or // advance the input pointer as we already have what we need. - if (cpi->refresh_alt_ref_frame) { + if (twopass->gf_group.update_type[twopass->gf_group.index] == ARF_UPDATE) { int target_rate; - target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index]; + configure_buffer_updates(cpi); + target_rate = twopass->gf_group.bit_allocation[twopass->gf_group.index]; target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate); rc->base_frame_target = target_rate; #ifdef LONG_TERM_VBR_CORRECTION @@ -2201,15 +2124,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // Define a new GF/ARF group. (Should always enter here for key frames). if (rc->frames_till_gf_update_due == 0) { -#if CONFIG_MULTIPLE_ARF - if (cpi->multi_arf_enabled) { - define_fixed_arf_period(cpi); - } else { -#endif - define_gf_group(cpi, &this_frame_copy); -#if CONFIG_MULTIPLE_ARF - } -#endif + define_gf_group(cpi, &this_frame_copy); if (twopass->gf_zeromotion_pct > 995) { // As long as max_thresh for encode breakout is small enough, it is ok @@ -2233,7 +2148,9 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { } } - target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index]; + configure_buffer_updates(cpi); + + target_rate = twopass->gf_group.bit_allocation[twopass->gf_group.index]; if (cpi->common.frame_type == KEY_FRAME) target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate); else @@ -2296,4 +2213,7 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { twopass->kf_group_bits -= bits_used; } twopass->kf_group_bits = MAX(twopass->kf_group_bits, 0); + + // Increment the gf group index ready for the next frame. + ++twopass->gf_group.index; } diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index 82065213e..1ee56a3a7 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -12,6 +12,7 @@ #define VP9_ENCODER_VP9_FIRSTPASS_H_ #include "vp9/encoder/vp9_lookahead.h" +#include "vp9/encoder/vp9_ratectrl.h" #ifdef __cplusplus extern "C" { @@ -39,6 +40,25 @@ typedef struct { int64_t spatial_layer_id; } FIRSTPASS_STATS; +typedef enum { + KF_UPDATE = 0, + LF_UPDATE = 1, + GF_UPDATE = 2, + ARF_UPDATE = 3, + OVERLAY_UPDATE = 4, + FRAME_UPDATE_TYPES = 5 +} FRAME_UPDATE_TYPE; + +typedef struct { + unsigned char index; + RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1]; + FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1]; + unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1]; + unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1]; + unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1]; + int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1]; +} GF_GROUP; + typedef struct { unsigned int section_intra_rating; unsigned int next_iiratio; @@ -68,8 +88,7 @@ typedef struct { int active_worst_quality; - int gf_group_index; - int gf_group_bit_allocation[MAX_LAG_BUFFERS * 2]; + GF_GROUP gf_group; } TWO_PASS; struct VP9_COMP; diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c index abe71e681..e7435170e 100644 --- a/vp9/encoder/vp9_lookahead.c +++ b/vp9/encoder/vp9_lookahead.c @@ -18,18 +18,6 @@ #include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_lookahead.h" -// The max of past frames we want to keep in the queue. -#define MAX_PRE_FRAMES 1 - -struct lookahead_ctx { - unsigned int max_sz; /* Absolute size of the queue */ - unsigned int sz; /* Number of buffers currently in the queue */ - unsigned int read_idx; /* Read index */ - unsigned int write_idx; /* Write index */ - struct lookahead_entry *buf; /* Buffer list */ -}; - - /* Return the buffer at the given absolute index and increment the index */ static struct lookahead_entry *pop(struct lookahead_ctx *ctx, unsigned int *idx) { diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h index ff63c0d0d..f9cc3c8db 100644 --- a/vp9/encoder/vp9_lookahead.h +++ b/vp9/encoder/vp9_lookahead.h @@ -14,6 +14,11 @@ #include "vpx_scale/yv12config.h" #include "vpx/vpx_integer.h" +#ifdef CONFIG_SPATIAL_SVC +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" +#endif + #ifdef __cplusplus extern "C" { #endif @@ -25,10 +30,22 @@ struct lookahead_entry { int64_t ts_start; int64_t ts_end; unsigned int flags; + +#ifdef CONFIG_SPATIAL_SVC + vpx_svc_parameters_t svc_params[VPX_SS_MAX_LAYERS]; +#endif }; +// The max of past frames we want to keep in the queue. +#define MAX_PRE_FRAMES 1 -struct lookahead_ctx; +struct lookahead_ctx { + unsigned int max_sz; /* Absolute size of the queue */ + unsigned int sz; /* Number of buffers currently in the queue */ + unsigned int read_idx; /* Read index */ + unsigned int write_idx; /* Write index */ + struct lookahead_entry *buf; /* Buffer list */ +}; /**\brief Initializes the lookahead stage * diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 3f4fcd1e1..4c340ea0b 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -23,9 +23,89 @@ #include "vp9/common/vp9_reconintra.h" #include "vp9/encoder/vp9_encoder.h" +#include "vp9/encoder/vp9_pickmode.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" +static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCKD *xd, + const TileInfo *const tile, + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, + int mi_row, int mi_col) { + const int *ref_sign_bias = cm->ref_frame_sign_bias; + int i, refmv_count = 0; + + const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type]; + + int different_ref_found = 0; + int context_counter = 0; + int const_motion = 0; + + // Blank the reference vector list + vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); + + // The nearest 2 blocks are treated differently + // if the size < 8x8 we get the mv from the bmi substructure, + // and we also need to keep a mode count. + for (i = 0; i < 2; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * + xd->mi_stride]; + const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; + // Keep counts for entropy encoding. + context_counter += mode_2_counter[candidate->mode]; + different_ref_found = 1; + + if (candidate->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1)); + } + } + + const_motion = 1; + + // Check the rest of the neighbors in much the same way + // as before except we don't need to keep track of sub blocks or + // mode counts. + for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row * + xd->mi_stride]->mbmi; + different_ref_found = 1; + + if (candidate->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST(candidate->mv[0]); + } + } + + // Since we couldn't find 2 mvs from the same reference frame + // go back through the neighbors and find motion vectors from + // different reference frames. + if (different_ref_found && !refmv_count) { + for (i = 0; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row + * xd->mi_stride]->mbmi; + + // If the candidate is INTRA we don't want to consider its mv. + IF_DIFF_REF_FRAME_ADD_MV(candidate); + } + } + } + + Done: + + mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter]; + + // Clamp vectors + for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) + clamp_mv_ref(&mv_ref_list[i].as_mv, xd); + + return const_motion; +} + static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv) { @@ -172,15 +252,31 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, else x->skip_txfm = 0; - // TODO(jingning) This is a temporary solution to account for frames with - // light changes. Need to customize the rate-distortion modeling for non-RD - // mode decision. - if ((sse >> 3) > var) - sse = var; - vp9_model_rd_from_var_lapndz(var + sse, 1 << num_pels_log2_lookup[bsize], - ac_quant >> 3, &rate, &dist); - *out_rate_sum = rate; + vp9_model_rd_from_var_lapndz(sse - var, 1 << num_pels_log2_lookup[bsize], + dc_quant >> 3, &rate, &dist); + *out_rate_sum = rate >> 1; *out_dist_sum = dist << 3; + + vp9_model_rd_from_var_lapndz(var, 1 << num_pels_log2_lookup[bsize], + ac_quant >> 3, &rate, &dist); + *out_rate_sum += rate; + *out_dist_sum += dist << 4; +} + +static int get_pred_buffer(PRED_BUFFER *p, int len) { + int i; + + for (i = 0; i < len; i++) { + if (!p[i].in_use) { + p[i].in_use = 1; + return i; + } + } + return -1; +} + +static void free_pred_buffer(PRED_BUFFER *p) { + p->in_use = 0; } // TODO(jingning) placeholder for inter-frame non-RD mode decision. @@ -228,6 +324,32 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int bsl = mi_width_log2_lookup[bsize]; const int pred_filter_search = (((mi_row + mi_col) >> bsl) + get_chessboard_index(cm)) % 2; + int const_motion[MAX_REF_FRAMES] = { 0 }; + + // For speed 6, the result of interp filter is reused later in actual encoding + // process. + int bh = num_4x4_blocks_high_lookup[bsize] << 2; + int bw = num_4x4_blocks_wide_lookup[bsize] << 2; + int pixels_in_block = bh * bw; + // tmp[3] points to dst buffer, and the other 3 point to allocated buffers. + PRED_BUFFER tmp[4]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, pred_buf, 3 * 64 * 64); + struct buf_2d orig_dst = pd->dst; + PRED_BUFFER *best_pred = NULL; + PRED_BUFFER *this_mode_pred = NULL; + int i; + + if (cpi->sf.reuse_inter_pred_sby) { + for (i = 0; i < 3; i++) { + tmp[i].data = &pred_buf[pixels_in_block * i]; + tmp[i].stride = bw; + tmp[i].in_use = 0; + } + + tmp[3].data = pd->dst.buf; + tmp[3].stride = pd->dst.stride; + tmp[3].in_use = 0; + } x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; @@ -241,18 +363,36 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_frame[0] = NONE; mbmi->ref_frame[1] = NONE; mbmi->tx_size = MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ? - EIGHTTAP : cpi->common.interp_filter; + tx_mode_to_biggest_tx_size[cm->tx_mode]); + mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? + EIGHTTAP : cm->interp_filter; mbmi->skip = 0; mbmi->segment_id = segment_id; for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { - vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, bsize, mi_row, mi_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); + int_mv *const candidates = mbmi->ref_mvs[ref_frame]; + const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; + vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, + sf, sf); + + if (cm->coding_use_prev_mi) + vp9_find_mv_refs(cm, xd, tile, xd->mi[0], ref_frame, + candidates, mi_row, mi_col); + else + const_motion[ref_frame] = mv_refs_rt(cm, xd, tile, xd->mi[0], + ref_frame, candidates, + mi_row, mi_col); + + vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, + &frame_mv[NEARESTMV][ref_frame], + &frame_mv[NEARMV][ref_frame]); + + if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8) + vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, + ref_frame, bsize); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; @@ -286,6 +426,10 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { int rate_mv = 0; + if (const_motion[ref_frame] && + (this_mode == NEARMV || this_mode == ZEROMV)) + continue; + if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue; @@ -324,6 +468,16 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Search for the best prediction filter type, when the resulting // motion vector is at sub-pixel accuracy level for luma component, i.e., // the last three bits are all zeros. + if (cpi->sf.reuse_inter_pred_sby) { + if (this_mode == NEARESTMV) { + this_mode_pred = &tmp[3]; + } else { + this_mode_pred = &tmp[get_pred_buffer(tmp, 3)]; + pd->dst.buf = this_mode_pred->data; + pd->dst.stride = bw; + } + } + if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search && ((mbmi->mv[0].as_mv.row & 0x07) != 0 || @@ -334,6 +488,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, unsigned int pf_sse[3]; int64_t best_cost = INT64_MAX; INTERP_FILTER best_filter = SWITCHABLE, filter; + PRED_BUFFER *current_pred = this_mode_pred; for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) { int64_t cost; @@ -345,12 +500,28 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_get_switchable_rate(cpi) + pf_rate[filter], pf_dist[filter]); if (cost < best_cost) { - best_filter = filter; - best_cost = cost; - skip_txfm = x->skip_txfm; + best_filter = filter; + best_cost = cost; + skip_txfm = x->skip_txfm; + + if (cpi->sf.reuse_inter_pred_sby) { + if (this_mode_pred != current_pred) { + free_pred_buffer(this_mode_pred); + this_mode_pred = current_pred; + } + + if (filter < EIGHTTAP_SHARP) { + current_pred = &tmp[get_pred_buffer(tmp, 3)]; + pd->dst.buf = current_pred->data; + pd->dst.stride = bw; + } + } } } + if (cpi->sf.reuse_inter_pred_sby && this_mode_pred != current_pred) + free_pred_buffer(current_pred); + mbmi->interp_filter = best_filter; rate = pf_rate[mbmi->interp_filter]; dist = pf_dist[mbmi->interp_filter]; @@ -370,31 +541,35 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Skipping checking: test to see if this block can be reconstructed by // prediction only. - if (!x->in_active_map) { - x->skip = 1; - } else if (cpi->allow_encode_breakout && x->encode_breakout) { + if (cpi->allow_encode_breakout) { const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); unsigned int var = var_y, sse = sse_y; // Skipping threshold for ac. unsigned int thresh_ac; // Skipping threshold for dc. unsigned int thresh_dc; - // Set a maximum for threshold to avoid big PSNR loss in low bit rate - // case. Use extreme low threshold for static frames to limit skipping. - const unsigned int max_thresh = 36000; - // The encode_breakout input - const unsigned int min_thresh = - MIN(((unsigned int)x->encode_breakout << 4), max_thresh); + if (x->encode_breakout > 0) { + // Set a maximum for threshold to avoid big PSNR loss in low bit rate + // case. Use extreme low threshold for static frames to limit + // skipping. + const unsigned int max_thresh = 36000; + // The encode_breakout input + const unsigned int min_thresh = + MIN(((unsigned int)x->encode_breakout << 4), max_thresh); - // Calculate threshold according to dequant value. - thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; - thresh_ac = clamp(thresh_ac, min_thresh, max_thresh); + // Calculate threshold according to dequant value. + thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; + thresh_ac = clamp(thresh_ac, min_thresh, max_thresh); - // Adjust ac threshold according to partition size. - thresh_ac >>= 8 - (b_width_log2_lookup[bsize] + - b_height_log2_lookup[bsize]); + // Adjust ac threshold according to partition size. + thresh_ac >>= + 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); - thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); + thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); + } else { + thresh_ac = 0; + thresh_dc = 0; + } // Y skipping condition checking for ac and dc. if (var <= thresh_ac && (sse - var) <= thresh_dc) { @@ -451,6 +626,16 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, best_pred_filter = mbmi->interp_filter; best_ref_frame = ref_frame; skip_txfm = x->skip_txfm; + + if (cpi->sf.reuse_inter_pred_sby) { + if (best_pred != NULL) + free_pred_buffer(best_pred); + + best_pred = this_mode_pred; + } + } else { + if (cpi->sf.reuse_inter_pred_sby) + free_pred_buffer(this_mode_pred); } if (x->skip) @@ -458,6 +643,19 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } + // If best prediction is not in dst buf, then copy the prediction block from + // temp buf to dst buf. + if (cpi->sf.reuse_inter_pred_sby && best_pred->data != orig_dst.buf) { + uint8_t *copy_from, *copy_to; + + pd->dst = orig_dst; + copy_to = pd->dst.buf; + + copy_from = best_pred->data; + + vp9_convolve_copy(copy_from, bw, copy_to, pd->dst.stride, NULL, 0, NULL, 0, + bw, bh); + } mbmi->mode = best_mode; mbmi->interp_filter = best_pred_filter; @@ -469,18 +667,49 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Perform intra prediction search, if the best SAD is above a certain // threshold. if (!x->skip && best_rd > inter_mode_thresh && - bsize < cpi->sf.max_intra_bsize) { - for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) { - vp9_predict_intra_block(xd, 0, b_width_log2(bsize), - mbmi->tx_size, this_mode, - &p->src.buf[0], p->src.stride, - &pd->dst.buf[0], pd->dst.stride, 0, 0, 0); + bsize <= cpi->sf.max_intra_bsize) { + int i, j; + const int step = 1 << mbmi->tx_size; + const int width = num_4x4_blocks_wide_lookup[bsize]; + const int height = num_4x4_blocks_high_lookup[bsize]; + + int rate2 = 0; + int64_t dist2 = 0; + const int dst_stride = pd->dst.stride; + const int src_stride = p->src.stride; + int block_idx = 0; + + for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) { + if (cpi->sf.reuse_inter_pred_sby) { + pd->dst.buf = tmp[0].data; + pd->dst.stride = bw; + } + + for (j = 0; j < height; j += step) { + for (i = 0; i < width; i += step) { + vp9_predict_intra_block(xd, block_idx, b_width_log2(bsize), + mbmi->tx_size, this_mode, + &p->src.buf[4 * (j * dst_stride + i)], + src_stride, + &pd->dst.buf[4 * (j * dst_stride + i)], + dst_stride, i, j, 0); + model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y); + rate2 += rate; + dist2 += dist; + ++block_idx; + } + } + + rate = rate2; + dist = dist2; - model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y); rate += cpi->mbmode_cost[this_mode]; rate += intra_cost_penalty; this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); + if (cpi->sf.reuse_inter_pred_sby) + pd->dst = orig_dst; + if (this_rd + intra_mode_cost < best_rd) { best_rd = this_rd; *returnrate = rate; @@ -494,9 +723,9 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } } + #if CONFIG_DENOISING - vp9_denoiser_denoise(&cpi->denoiser, x, cpi->common.mi_grid_visible, mi_row, - mi_col, bsize); + vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, bsize); #endif return INT64_MAX; diff --git a/vp9/encoder/vp9_pickmode.h b/vp9/encoder/vp9_pickmode.h index a9c948d31..3d89974fc 100644 --- a/vp9/encoder/vp9_pickmode.h +++ b/vp9/encoder/vp9_pickmode.h @@ -17,6 +17,12 @@ extern "C" { #endif +typedef struct { + uint8_t *data; + int stride; + int in_use; +} PRED_BUFFER; + int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const struct TileInfo *const tile, int mi_row, int mi_col, diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index f775003e9..e1109838c 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -186,6 +186,8 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { } void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { + int i; + if (pass == 0 && oxcf->rc_mode == VPX_CBR) { rc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q; rc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; @@ -227,9 +229,9 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { rc->tot_q = 0.0; rc->avg_q = vp9_convert_qindex_to_q(oxcf->worst_allowed_q); - rc->rate_correction_factor = 1.0; - rc->key_frame_rate_correction_factor = 1.0; - rc->gf_rate_correction_factor = 1.0; + for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { + rc->rate_correction_factors[i] = 1.0; + } } int vp9_rc_drop_frame(VP9_COMP *cpi) { @@ -271,28 +273,40 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) { } static double get_rate_correction_factor(const VP9_COMP *cpi) { + const RATE_CONTROL *const rc = &cpi->rc; + if (cpi->common.frame_type == KEY_FRAME) { - return cpi->rc.key_frame_rate_correction_factor; + return rc->rate_correction_factors[KF_STD]; + } else if (cpi->pass == 2) { + RATE_FACTOR_LEVEL rf_lvl = + cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index]; + return rc->rate_correction_factors[rf_lvl]; } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && - !cpi->rc.is_src_frame_alt_ref && + !rc->is_src_frame_alt_ref && !(cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR)) - return cpi->rc.gf_rate_correction_factor; + return rc->rate_correction_factors[GF_ARF_STD]; else - return cpi->rc.rate_correction_factor; + return rc->rate_correction_factors[INTER_NORMAL]; } } static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { + RATE_CONTROL *const rc = &cpi->rc; + if (cpi->common.frame_type == KEY_FRAME) { - cpi->rc.key_frame_rate_correction_factor = factor; + rc->rate_correction_factors[KF_STD] = factor; + } else if (cpi->pass == 2) { + RATE_FACTOR_LEVEL rf_lvl = + cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index]; + rc->rate_correction_factors[rf_lvl] = factor; } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && - !cpi->rc.is_src_frame_alt_ref && + !rc->is_src_frame_alt_ref && !(cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR)) - cpi->rc.gf_rate_correction_factor = factor; + rc->rate_correction_factors[GF_ARF_STD] = factor; else - cpi->rc.rate_correction_factor = factor; + rc->rate_correction_factors[INTER_NORMAL] = factor; } } @@ -628,8 +642,8 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, if (frame_is_intra_only(cm)) { active_best_quality = rc->best_quality; -#if !CONFIG_MULTIPLE_ARF - // Handle the special case for key frames forced when we have75 reached + + // Handle the special case for key frames forced when we have reached // the maximum key frame interval. Here force the Q to a range // based on the ambient Q to reduce the risk of popping. if (rc->this_key_frame_forced) { @@ -660,13 +674,6 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, active_best_quality += vp9_compute_qdelta(rc, q_val, q_val * q_adj_factor); } -#else - double current_q; - // Force the KF quantizer to be 30% of the active_worst_quality. - current_q = vp9_convert_qindex_to_q(active_worst_quality); - active_best_quality = active_worst_quality - + vp9_compute_qdelta(rc, current_q, current_q * 0.3); -#endif } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { // Use the lower of active_worst_quality and recent @@ -768,23 +775,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, q = *top_index; } } -#if CONFIG_MULTIPLE_ARF - // Force the quantizer determined by the coding order pattern. - if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) && - cpi->oxcf.rc_mode != VPX_Q) { - double new_q; - double current_q = vp9_convert_qindex_to_q(active_worst_quality); - int level = cpi->this_frame_weight; - assert(level >= 0); - new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level))); - q = active_worst_quality + - vp9_compute_qdelta(rc, current_q, new_q); - *bottom_index = q; - *top_index = q; - printf("frame:%d q:%d\n", cm->current_video_frame, q); - } -#endif assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality); assert(*bottom_index <= rc->worst_quality && @@ -805,7 +796,6 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int q; if (frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) { -#if !CONFIG_MULTIPLE_ARF // Handle the special case for key frames forced when we have75 reached // the maximum key frame interval. Here force the Q to a range // based on the ambient Q to reduce the risk of popping. @@ -840,13 +830,6 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, active_best_quality += vp9_compute_qdelta(rc, q_val, q_val * q_adj_factor); } -#else - double current_q; - // Force the KF quantizer to be 30% of the active_worst_quality. - current_q = vp9_convert_qindex_to_q(active_worst_quality); - active_best_quality = active_worst_quality - + vp9_compute_qdelta(rc, current_q, current_q * 0.3); -#endif } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { // Use the lower of active_worst_quality and recent @@ -909,21 +892,20 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, *bottom_index = active_best_quality; #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY + vp9_clear_system_state(); { - int qdelta = 0; - vp9_clear_system_state(); - - // Limit Q range for the adaptive loop. - if ((cm->frame_type == KEY_FRAME || vp9_is_upper_layer_key_frame(cpi)) && - !rc->this_key_frame_forced) { - qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, - active_worst_quality, 2.0); - } else if (!rc->is_src_frame_alt_ref && - (oxcf->rc_mode != VPX_CBR) && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { - qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, - active_worst_quality, 1.75); - } + const GF_GROUP *const gf_group = &cpi->twopass.gf_group; + const double rate_factor_deltas[RATE_FACTOR_LEVELS] = { + 1.00, // INTER_NORMAL + 1.00, // INTER_HIGH + 1.50, // GF_ARF_LOW + 1.75, // GF_ARF_STD + 2.00, // KF_STD + }; + const double rate_factor = + rate_factor_deltas[gf_group->rf_level[gf_group->index]]; + int qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, + active_worst_quality, rate_factor); *top_index = active_worst_quality + qdelta; *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index; } @@ -945,23 +927,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, q = *top_index; } } -#if CONFIG_MULTIPLE_ARF - // Force the quantizer determined by the coding order pattern. - if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) && - cpi->oxcf.rc_mode != VPX_Q) { - double new_q; - double current_q = vp9_convert_qindex_to_q(active_worst_quality); - int level = cpi->this_frame_weight; - assert(level >= 0); - new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level))); - q = active_worst_quality + - vp9_compute_qdelta(rc, current_q, new_q); - *bottom_index = q; - *top_index = q; - printf("frame:%d q:%d\n", cm->current_video_frame, q); - } -#endif assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality); assert(*bottom_index <= rc->worst_quality && @@ -1026,11 +992,8 @@ static void update_alt_ref_frame_stats(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; rc->frames_since_golden = 0; -#if CONFIG_MULTIPLE_ARF - if (!cpi->multi_arf_enabled) -#endif - // Clear the alternate reference update pending flag. - rc->source_alt_ref_pending = 0; + // Mark the alt ref as done (setting to 0 means no further alt refs pending). + rc->source_alt_ref_pending = 0; // Set the alternate reference frame active flag rc->source_alt_ref_active = 1; @@ -1044,8 +1007,13 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { // this frame refreshes means next frames don't unless specified by user rc->frames_since_golden = 0; - if (!rc->source_alt_ref_pending) + if (cpi->pass == 2) { + if (!rc->source_alt_ref_pending && + cpi->twopass.gf_group.rf_level[0] == GF_ARF_STD) rc->source_alt_ref_active = 0; + } else if (!rc->source_alt_ref_pending) { + rc->source_alt_ref_active = 0; + } // Decrement count down till next gf if (rc->frames_till_gf_update_due > 0) @@ -1388,6 +1356,8 @@ void vp9_rc_set_gf_max_interval(const VP9EncoderConfig *const oxcf, // Extended interval for genuinely static scenes rc->static_scene_max_gf_interval = oxcf->key_freq >> 1; + if (rc->static_scene_max_gf_interval > (MAX_LAG_BUFFERS * 2)) + rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2; if (is_altref_enabled(oxcf)) { if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index f1a4a3f6d..a15235c92 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -23,6 +23,15 @@ extern "C" { // Bits Per MB at different Q (Multiplied by 512) #define BPER_MB_NORMBITS 9 +typedef enum { + INTER_NORMAL = 0, + INTER_HIGH = 1, + GF_ARF_LOW = 2, + GF_ARF_STD = 3, + KF_STD = 4, + RATE_FACTOR_LEVELS = 5 +} RATE_FACTOR_LEVEL; + typedef struct { // Rate targetting variables int base_frame_target; // A baseline frame target before adjustment @@ -37,9 +46,7 @@ typedef struct { int last_boost; int kf_boost; - double rate_correction_factor; - double key_frame_rate_correction_factor; - double gf_rate_correction_factor; + double rate_correction_factors[RATE_FACTOR_LEVELS]; int frames_since_golden; int frames_till_gf_update_due; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 429dcb1c0..9402d4a4e 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1277,9 +1277,6 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; - if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode))) - continue; - if (cpi->common.frame_type == KEY_FRAME) { const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); @@ -1450,16 +1447,8 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode, int mode_context) { - const MACROBLOCK *const x = &cpi->mb; - const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id; - - // Don't account for mode here if segment skip is enabled. - if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) { - assert(is_inter_mode(mode)); - return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; - } else { - return 0; - } + assert(is_inter_mode(mode)); + return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; } static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, @@ -2077,9 +2066,9 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, return bsi->segment_rd; } -static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, - uint8_t *ref_y_buffer, int ref_y_stride, - int ref_frame, BLOCK_SIZE block_size ) { +void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, + uint8_t *ref_y_buffer, int ref_y_stride, + int ref_frame, BLOCK_SIZE block_size) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; int_mv this_mv; @@ -2218,12 +2207,12 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS); } -static void setup_pred_block(const MACROBLOCKD *xd, - struct buf_2d dst[MAX_MB_PLANE], - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col, - const struct scale_factors *scale, - const struct scale_factors *scale_uv) { +void vp9_setup_pred_block(const MACROBLOCKD *xd, + struct buf_2d dst[MAX_MB_PLANE], + const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col, + const struct scale_factors *scale, + const struct scale_factors *scale_uv) { int i; dst[0].buf = src->y_buffer; @@ -2261,7 +2250,7 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this // use the UV scaling factors. - setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); + vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); // Gets an initial list of candidate vectors from neighbours and orders them vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col); @@ -2275,8 +2264,8 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, // in full and choose the best as the centre point for subsequent searches. // The current implementation doesn't support scaling. if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8) - mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, - ref_frame, block_size); + vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, + ref_frame, block_size); } const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, @@ -2802,13 +2791,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *rate2 += vp9_get_switchable_rate(cpi); if (!is_comp_pred) { - if (!x->in_active_map || - vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - if (psse) - *psse = 0; - *distortion = 0; - x->skip = 1; - } else if (cpi->allow_encode_breakout && x->encode_breakout) { + if (cpi->allow_encode_breakout && x->encode_breakout) { const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]); const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); unsigned int var, sse; @@ -3117,13 +3100,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - // If the segment skip feature is enabled.... - // then do nothing if the current mode is not allowed.. - if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { - mode_skip_mask = ~(1 << THR_ZEROMV); - inter_mode_mask = (1 << ZEROMV); - } - // Disable this drop out case if the ref frame // segment level feature is enabled for this segment. This is to // prevent the possibility that we end up unable to pick any mode. @@ -3162,21 +3138,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mode_skip_mask |= all_intra_modes; } - if (!x->in_active_map) { - int mode_index; - assert(cpi->ref_frame_flags & VP9_LAST_FLAG); - if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0) - mode_index = THR_NEARESTMV; - else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0) - mode_index = THR_NEARMV; - else - mode_index = THR_ZEROMV; - mode_skip_mask = ~(1 << mode_index); - mode_skip_start = MAX_MODES; - inter_mode_mask = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | - (1 << NEWMV); - } - for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; @@ -3266,17 +3227,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } } else { - if (x->in_active_map && - !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame}; - if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, - inter_mode_mask, this_mode, ref_frames)) - continue; - } + const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame}; + if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, + inter_mode_mask, this_mode, ref_frames)) + continue; } mbmi->mode = this_mode; - mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode; + mbmi->uv_mode = DC_PRED; mbmi->ref_frame[0] = ref_frame; mbmi->ref_frame[1] = second_ref_frame; // Evaluate all sub-pel filters irrespective of whether we can use @@ -3348,31 +3306,20 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (!disable_skip) { - // Test for the condition where skip block will be activated - // because there are no non zero coefficients and make any - // necessary adjustment for rate. Ignore if skip is coded at - // segment level as the cost wont have been added in. - // Is Mb level skip allowed (i.e. not coded at segment level). - const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, - SEG_LVL_SKIP); - if (skippable) { + vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); + // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); // for best yrd calculation rate_uv = 0; - if (mb_skip_allowed) { - int prob_skip_cost; - - // Cost the skip mb case - vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); - if (skip_prob) { - prob_skip_cost = vp9_cost_bit(skip_prob, 1); - rate2 += prob_skip_cost; - } + // Cost the skip mb case + if (skip_prob) { + int prob_skip_cost = vp9_cost_bit(skip_prob, 1); + rate2 += prob_skip_cost; } - } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) { + } else if (ref_frame != INTRA_FRAME && !xd->lossless) { if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { // Add in the cost of the no skip flag. @@ -3387,7 +3334,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, rate_uv = 0; this_skip2 = 1; } - } else if (mb_skip_allowed) { + } else { // Add in the cost of the no skip flag. rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); } @@ -3596,16 +3543,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vp9_zero(best_tx_diff); } - if (!x->in_active_map) { - assert(mbmi->ref_frame[0] == LAST_FRAME); - assert(mbmi->ref_frame[1] == NONE); - assert(mbmi->mode == NEARESTMV || - mbmi->mode == NEARMV || - mbmi->mode == ZEROMV); - assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0); - assert(mbmi->mode == mbmi->uv_mode); - } - set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_mode_index, best_pred_diff, best_tx_diff, best_filter_diff); @@ -3613,6 +3550,113 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } +int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x, + const TileInfo *const tile, + int mi_row, int mi_col, + int *returnrate, + int64_t *returndistortion, + BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx, + int64_t best_rd_so_far) { + VP9_COMMON *const cm = &cpi->common; + RD_OPT *const rd_opt = &cpi->rd; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + const struct segmentation *const seg = &cm->seg; + unsigned char segment_id = mbmi->segment_id; + const int comp_pred = 0; + int i; + int64_t best_tx_diff[TX_MODES]; + int64_t best_pred_diff[REFERENCE_MODES]; + int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; + unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; + vp9_prob comp_mode_p; + INTERP_FILTER best_filter = SWITCHABLE; + int64_t this_rd = INT64_MAX; + int rate2 = 0; + const int64_t distortion2 = 0; + + x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; + + estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, + &comp_mode_p); + + for (i = 0; i < MAX_REF_FRAMES; ++i) + x->pred_sse[i] = INT_MAX; + for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i) + x->pred_mv_sad[i] = INT_MAX; + + *returnrate = INT_MAX; + + assert(vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)); + + mbmi->mode = ZEROMV; + mbmi->uv_mode = DC_PRED; + mbmi->ref_frame[0] = LAST_FRAME; + mbmi->ref_frame[1] = NONE; + mbmi->mv[0].as_int = 0; + x->skip = 1; + + // Search for best switchable filter by checking the variance of + // pred error irrespective of whether the filter will be used + rd_opt->mask_filter = 0; + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + rd_opt->filter_cache[i] = INT64_MAX; + + if (cm->interp_filter != BILINEAR) { + best_filter = EIGHTTAP; + if (cm->interp_filter == SWITCHABLE && + x->source_variance >= cpi->sf.disable_filter_search_var_thresh) { + int rs; + int best_rs = INT_MAX; + for (i = 0; i < SWITCHABLE_FILTERS; ++i) { + mbmi->interp_filter = i; + rs = vp9_get_switchable_rate(cpi); + if (rs < best_rs) { + best_rs = rs; + best_filter = mbmi->interp_filter; + } + } + } + } + // Set the appropriate filter + if (cm->interp_filter == SWITCHABLE) { + mbmi->interp_filter = best_filter; + rate2 += vp9_get_switchable_rate(cpi); + } else { + mbmi->interp_filter = cm->interp_filter; + } + + if (cm->reference_mode == REFERENCE_MODE_SELECT) + rate2 += vp9_cost_bit(comp_mode_p, comp_pred); + + // Estimate the reference frame signaling cost and add it + // to the rolling cost variable. + rate2 += ref_costs_single[LAST_FRAME]; + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + + *returnrate = rate2; + *returndistortion = distortion2; + + if (this_rd >= best_rd_so_far) + return INT64_MAX; + + assert((cm->interp_filter == SWITCHABLE) || + (cm->interp_filter == mbmi->interp_filter)); + + update_rd_thresh_fact(cpi, bsize, THR_ZEROMV); + + vp9_zero(best_pred_diff); + vp9_zero(best_filter_diff); + vp9_zero(best_tx_diff); + + if (!x->select_tx_size) + swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE); + store_coding_context(x, ctx, THR_ZEROMV, + best_pred_diff, best_tx_diff, best_filter_diff); + + return this_rd; +} int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 6e5631795..3dfe2d07f 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -13,7 +13,10 @@ #include -#include "vp9/encoder/vp9_encoder.h" +#include "vp9/common/vp9_blockd.h" + +#include "vp9/encoder/vp9_block.h" +#include "vp9/encoder/vp9_context_tree.h" #ifdef __cplusplus extern "C" { @@ -30,21 +33,104 @@ extern "C" { #define INVALID_MV 0x80008000 +#define MAX_MODES 30 +#define MAX_REFS 6 + +// This enumerator type needs to be kept aligned with the mode order in +// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code. +typedef enum { + THR_NEARESTMV, + THR_NEARESTA, + THR_NEARESTG, + + THR_DC, + + THR_NEWMV, + THR_NEWA, + THR_NEWG, + + THR_NEARMV, + THR_NEARA, + THR_COMP_NEARESTLA, + THR_COMP_NEARESTGA, + + THR_TM, + + THR_COMP_NEARLA, + THR_COMP_NEWLA, + THR_NEARG, + THR_COMP_NEARGA, + THR_COMP_NEWGA, + + THR_ZEROMV, + THR_ZEROG, + THR_ZEROA, + THR_COMP_ZEROLA, + THR_COMP_ZEROGA, + + THR_H_PRED, + THR_V_PRED, + THR_D135_PRED, + THR_D207_PRED, + THR_D153_PRED, + THR_D63_PRED, + THR_D117_PRED, + THR_D45_PRED, +} THR_MODES; + +typedef enum { + THR_LAST, + THR_GOLD, + THR_ALTR, + THR_COMP_LA, + THR_COMP_GA, + THR_INTRA, +} THR_MODES_SUB8X8; + +typedef struct RD_OPT { + // Thresh_mult is used to set a threshold for the rd score. A higher value + // means that we will accept the best mode so far more often. This number + // is used in combination with the current block size, and thresh_freq_fact + // to pick a threshold. + int thresh_mult[MAX_MODES]; + int thresh_mult_sub8x8[MAX_REFS]; + + int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; + int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; + + int64_t comp_pred_diff[REFERENCE_MODES]; + int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; + int64_t tx_select_diff[TX_MODES]; + // FIXME(rbultje) can this overflow? + int tx_select_threshes[MAX_REF_FRAMES][TX_MODES]; + + int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS]; + int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; + int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; + int64_t mask_filter; + + int RDMULT; + int RDDIV; +} RD_OPT; + + struct TileInfo; +struct VP9_COMP; +struct macroblock; -int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex); +int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex); -void vp9_initialize_rd_consts(VP9_COMP *cpi); +void vp9_initialize_rd_consts(struct VP9_COMP *cpi); -void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex); +void vp9_initialize_me_consts(struct VP9_COMP *cpi, int qindex); void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, unsigned int qstep, int *rate, int64_t *dist); -int vp9_get_switchable_rate(const VP9_COMP *cpi); +int vp9_get_switchable_rate(const struct VP9_COMP *cpi); -void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, +void vp9_setup_buffer_inter(struct VP9_COMP *cpi, struct macroblock *x, const TileInfo *const tile, MV_REFERENCE_FRAME ref_frame, BLOCK_SIZE block_size, @@ -53,14 +139,14 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, int_mv frame_near_mv[MAX_REF_FRAMES], struct buf_2d yv12_mb[4][MAX_MB_PLANE]); -const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, +const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi, int ref_frame); -void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, +void vp9_rd_pick_intra_mode_sb(struct VP9_COMP *cpi, struct macroblock *x, int *r, int64_t *d, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd); -int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, +int64_t vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi, struct macroblock *x, const struct TileInfo *const tile, int mi_row, int mi_col, int *returnrate, @@ -69,7 +155,18 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far); -int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, +int64_t vp9_rd_pick_inter_mode_sb_seg_skip(struct VP9_COMP *cpi, + struct macroblock *x, + const TileInfo *const tile, + int mi_row, int mi_col, + int *returnrate, + int64_t *returndistortion, + BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx, + int64_t best_rd_so_far); + +int64_t vp9_rd_pick_inter_mode_sub8x8(struct VP9_COMP *cpi, + struct macroblock *x, const struct TileInfo *const tile, int mi_row, int mi_col, int *returnrate, @@ -85,15 +182,25 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16]); -void vp9_set_rd_speed_thresholds(VP9_COMP *cpi); +void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi); -void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi); +void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi); static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, int thresh_fact) { return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX; } +void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x, + uint8_t *ref_y_buffer, int ref_y_stride, + int ref_frame, BLOCK_SIZE block_size); + +void vp9_setup_pred_block(const MACROBLOCKD *xd, + struct buf_2d dst[MAX_MB_PLANE], + const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col, + const struct scale_factors *scale, + const struct scale_factors *scale_uv); #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 6beb87234..1a14da3c8 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -84,16 +84,17 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, if (speed >= 2) { if (MIN(cm->width, cm->height) >= 720) { - sf->lf_motion_threshold = LOW_MOITION_THRESHOLD; + sf->lf_motion_threshold = LOW_MOTION_THRESHOLD; sf->last_partitioning_redo_frequency = 3; sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; } else { sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; sf->last_partitioning_redo_frequency = 2; - sf->lf_motion_threshold = NO_MOITION_THRESHOLD; + sf->lf_motion_threshold = NO_MOTION_THRESHOLD; } - sf->adaptive_pred_interp_filter = 2; + + sf->adaptive_pred_interp_filter = 0; sf->reference_masking = 1; sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | @@ -114,7 +115,7 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, else sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - sf->lf_motion_threshold = LOW_MOITION_THRESHOLD; + sf->lf_motion_threshold = LOW_MOTION_THRESHOLD; sf->last_partitioning_redo_frequency = 3; sf->recode_loop = ALLOW_RECODE_KFMAXBW; sf->adaptive_rd_thresh = 3; @@ -198,7 +199,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->comp_inter_joint_search_thresh = BLOCK_SIZES; sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; - sf->lf_motion_threshold = LOW_MOITION_THRESHOLD; + sf->lf_motion_threshold = LOW_MOTION_THRESHOLD; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; sf->use_lp32x32fdct = 1; @@ -249,6 +250,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, } if (speed >= 5) { + sf->auto_min_max_partition_size = (cm->frame_type == KEY_FRAME) ? + RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX; sf->max_partition_size = BLOCK_32X32; sf->min_partition_size = BLOCK_8X8; sf->partition_check = @@ -270,6 +273,10 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->source_var_thresh = 360; sf->tx_size_search_method = USE_TX_8X8; + sf->max_intra_bsize = BLOCK_8X8; + + // This feature is only enabled when partition search is disabled. + sf->reuse_inter_pred_sby = 1; } if (speed >= 7) { @@ -335,6 +342,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { for (i = 0; i < BLOCK_SIZES; ++i) sf->inter_mode_mask[i] = INTER_ALL; sf->max_intra_bsize = BLOCK_64X64; + sf->reuse_inter_pred_sby = 0; // This setting only takes effect when partition_search_type is set // to FIXED_PARTITION. sf->always_this_block_size = BLOCK_16X16; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index c796421db..75070a70f 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -44,8 +44,8 @@ typedef enum { } SUBPEL_SEARCH_METHODS; typedef enum { - NO_MOITION_THRESHOLD = 0, - LOW_MOITION_THRESHOLD = 7 + NO_MOTION_THRESHOLD = 0, + LOW_MOTION_THRESHOLD = 7 } MOTION_THRESHOLD; typedef enum { @@ -353,6 +353,11 @@ typedef struct SPEED_FEATURES { // The threshold used in SOURCE_VAR_BASED_PARTITION search type. unsigned int source_var_thresh; + + // When partition is pre-set, the inter prediction result from pick_inter_mode + // can be reused in final block encoding process. It is enabled only for real- + // time mode speed 6. + int reuse_inter_pred_sby; } SPEED_FEATURES; struct VP9_COMP; diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 1b995757a..07c17b22a 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -12,6 +12,7 @@ #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_svc_layercontext.h" +#include "vp9/encoder/vp9_extend.h" void vp9_init_layer_context(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; @@ -31,6 +32,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { for (layer = 0; layer < layer_end; ++layer) { LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; + int i; lc->current_video_frame_in_layer = 0; lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; lrc->ni_av_qi = oxcf->worst_allowed_q; @@ -42,8 +44,10 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { lrc->ni_frames = 0; lrc->decimation_count = 0; lrc->decimation_factor = 0; - lrc->rate_correction_factor = 1.0; - lrc->key_frame_rate_correction_factor = 1.0; + + for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { + lrc->rate_correction_factors[i] = 1.0; + } if (svc->number_temporal_layers > 1) { lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; @@ -206,3 +210,101 @@ int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) { cpi->svc.spatial_layer_id > 0 && cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame; } + +int vp9_svc_lookahead_push(const VP9_COMP *const cpi, struct lookahead_ctx *ctx, + YV12_BUFFER_CONFIG *src, int64_t ts_start, + int64_t ts_end, unsigned int flags) { + struct lookahead_entry *buf; + int i, index; + + if (vp9_lookahead_push(ctx, src, ts_start, ts_end, flags)) + return 1; + + index = ctx->write_idx - 1; + if (index < 0) + index += ctx->max_sz; + + buf = ctx->buf + index; + + if (buf == NULL) + return 1; + + // Store svc parameters for each layer + for (i = 0; i < cpi->svc.number_spatial_layers; ++i) + buf->svc_params[i] = cpi->svc.layer_context[i].svc_params_received; + + return 0; +} + +static int copy_svc_params(VP9_COMP *const cpi, struct lookahead_entry *buf) { + int layer_id; + vpx_svc_parameters_t *layer_param; + vpx_enc_frame_flags_t flags; + + // Find the next layer to be encoded + for (layer_id = 0; layer_id < cpi->svc.number_spatial_layers; ++layer_id) { + if (buf->svc_params[layer_id].spatial_layer >=0) + break; + } + + if (layer_id == cpi->svc.number_spatial_layers) + return 1; + + layer_param = &buf->svc_params[layer_id]; + buf->flags = flags = layer_param->flags; + cpi->svc.spatial_layer_id = layer_param->spatial_layer; + cpi->svc.temporal_layer_id = layer_param->temporal_layer; + cpi->lst_fb_idx = layer_param->lst_fb_idx; + cpi->gld_fb_idx = layer_param->gld_fb_idx; + cpi->alt_fb_idx = layer_param->alt_fb_idx; + + if (vp9_set_size_literal(cpi, layer_param->width, layer_param->height) != 0) + return VPX_CODEC_INVALID_PARAM; + + cpi->oxcf.worst_allowed_q = + vp9_quantizer_to_qindex(layer_param->max_quantizer); + cpi->oxcf.best_allowed_q = + vp9_quantizer_to_qindex(layer_param->min_quantizer); + + vp9_change_config(cpi, &cpi->oxcf); + + vp9_set_high_precision_mv(cpi, 1); + + // Retrieve the encoding flags for each layer and apply it to encoder. + // It includes reference frame flags and update frame flags. + vp9_apply_encoding_flags(cpi, flags); + + return 0; +} + +struct lookahead_entry *vp9_svc_lookahead_peek(VP9_COMP *const cpi, + struct lookahead_ctx *ctx, + int index, int copy_params) { + struct lookahead_entry *buf = vp9_lookahead_peek(ctx, index); + + if (buf != NULL && copy_params != 0) { + if (copy_svc_params(cpi, buf) != 0) + return NULL; + } + return buf; +} + +struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi, + struct lookahead_ctx *ctx, + int drain) { + struct lookahead_entry *buf = NULL; + + if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { + buf = vp9_svc_lookahead_peek(cpi, ctx, 0, 1); + if (buf != NULL) { + // Only remove the buffer when pop the highest layer. Simply set the + // spatial_layer to -1 for lower layers. + buf->svc_params[cpi->svc.spatial_layer_id].spatial_layer = -1; + if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) { + vp9_lookahead_pop(ctx, drain); + } + } + } + + return buf; +} diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 36e2027fd..3ebb831b5 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -28,6 +28,7 @@ typedef struct { struct vpx_fixed_buf rc_twopass_stats_in; unsigned int current_video_frame_in_layer; int is_key_frame; + vpx_svc_parameters_t svc_params_received; } LAYER_CONTEXT; typedef struct { @@ -74,6 +75,23 @@ void vp9_inc_frame_in_layer(SVC *svc); // Check if current layer is key frame in spatial upper layer int vp9_is_upper_layer_key_frame(const struct VP9_COMP *const cpi); +// Copy the source image, flags and svc parameters into a new framebuffer +// with the expected stride/border +int vp9_svc_lookahead_push(const struct VP9_COMP *const cpi, + struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, + int64_t ts_start, int64_t ts_end, + unsigned int flags); + +// Get the next source buffer to encode +struct lookahead_entry *vp9_svc_lookahead_pop(struct VP9_COMP *const cpi, + struct lookahead_ctx *ctx, + int drain); + +// Get a future source buffer to encode +struct lookahead_entry *vp9_svc_lookahead_peek(struct VP9_COMP *const cpi, + struct lookahead_ctx *ctx, + int index, int copy_params); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index edd59ab74..b1501619e 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -88,8 +88,8 @@ struct vpx_codec_alg_priv { size_t pending_frame_magnitude; vpx_image_t preview_img; vp8_postproc_cfg_t preview_ppcfg; - vpx_codec_pkt_list_decl(64) pkt_list; - unsigned int fixed_kf_cntr; + vpx_codec_pkt_list_decl(128) pkt_list; + unsigned int fixed_kf_cntr; }; static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { @@ -795,42 +795,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INVALID_PARAM; } - if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | - VP8_EFLAG_NO_REF_ARF)) { - int ref = 7; - - if (flags & VP8_EFLAG_NO_REF_LAST) - ref ^= VP9_LAST_FLAG; - - if (flags & VP8_EFLAG_NO_REF_GF) - ref ^= VP9_GOLD_FLAG; - - if (flags & VP8_EFLAG_NO_REF_ARF) - ref ^= VP9_ALT_FLAG; - - vp9_use_as_reference(ctx->cpi, ref); - } - - if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF | - VP8_EFLAG_FORCE_ARF)) { - int upd = 7; - - if (flags & VP8_EFLAG_NO_UPD_LAST) - upd ^= VP9_LAST_FLAG; - - if (flags & VP8_EFLAG_NO_UPD_GF) - upd ^= VP9_GOLD_FLAG; - - if (flags & VP8_EFLAG_NO_UPD_ARF) - upd ^= VP9_ALT_FLAG; - - vp9_update_reference(ctx->cpi, upd); - } - - if (flags & VP8_EFLAG_NO_UPD_ENTROPY) { - vp9_update_entropy(ctx->cpi, 0); - } + vp9_apply_encoding_flags(ctx->cpi, flags); // Handle fixed keyframe intervals if (ctx->cfg.kf_mode == VPX_KF_AUTO && @@ -843,7 +808,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, // Initialize the encoder instance on the first frame. if (res == VPX_CODEC_OK && ctx->cpi != NULL) { - unsigned int lib_flags; + unsigned int lib_flags = 0; YV12_BUFFER_CONFIG sd; int64_t dst_time_stamp, dst_end_time_stamp; size_t size, cx_data_sz; @@ -853,9 +818,6 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, if (ctx->base.init_flags & VPX_CODEC_USE_PSNR) ((VP9_COMP *)ctx->cpi)->b_calculate_psnr = 1; - // Convert API flags to internal codec lib flags - lib_flags = (flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0; - /* vp9 use 10,000,000 ticks/second as time stamp */ dst_time_stamp = (pts * 10000000 * ctx->cfg.g_timebase.num) / ctx->cfg.g_timebase.den; @@ -865,7 +827,9 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, if (img != NULL) { res = image2yuvconfig(img, &sd); - if (vp9_receive_raw_frame(ctx->cpi, lib_flags, + // Store the original flags in to the frame buffer. Will extract the + // key frame flag when we actually encode this frame. + if (vp9_receive_raw_frame(ctx->cpi, flags, &sd, dst_time_stamp, dst_end_time_stamp)) { VP9_COMP *cpi = (VP9_COMP *)ctx->cpi; res = update_error_state(ctx, &cpi->common.error); @@ -874,7 +838,6 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, cx_data = ctx->cx_data; cx_data_sz = ctx->cx_data_sz; - lib_flags = 0; /* Any pending invisible frames? */ if (ctx->pending_cx_data) { @@ -902,7 +865,12 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; // Pack invisible frames with the next visible frame - if (cpi->common.show_frame == 0) { + if (cpi->common.show_frame == 0 +#ifdef CONFIG_SPATIAL_SVC + || (cpi->use_svc && cpi->svc.number_temporal_layers == 1 && + cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1) +#endif + ) { if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data; ctx->pending_cx_data_sz += size; @@ -925,7 +893,12 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, / ctx->cfg.g_timebase.num / 10000000); pkt.data.frame.flags = lib_flags << 16; - if (lib_flags & FRAMEFLAGS_KEY) + if (lib_flags & FRAMEFLAGS_KEY +#ifdef CONFIG_SPATIAL_SVC + || (cpi->use_svc && cpi->svc.number_temporal_layers == 1 && + cpi->svc.layer_context[0].is_key_frame) +#endif + ) pkt.data.frame.flags |= VPX_FRAME_IS_KEY; if (cpi->common.show_frame == 0) { @@ -1165,24 +1138,19 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx, VP9_COMP *const cpi = ctx->cpi; vpx_svc_parameters_t *const params = va_arg(args, vpx_svc_parameters_t *); - if (params == NULL) + if (params == NULL || params->spatial_layer < 0 || + params->spatial_layer >= cpi->svc.number_spatial_layers) return VPX_CODEC_INVALID_PARAM; - cpi->svc.spatial_layer_id = params->spatial_layer; - cpi->svc.temporal_layer_id = params->temporal_layer; + if (params->spatial_layer == 0) { + int i; + for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { + cpi->svc.layer_context[i].svc_params_received.spatial_layer = -1; + } + } - cpi->lst_fb_idx = params->lst_fb_idx; - cpi->gld_fb_idx = params->gld_fb_idx; - cpi->alt_fb_idx = params->alt_fb_idx; - - if (vp9_set_size_literal(ctx->cpi, params->width, params->height) != 0) - return VPX_CODEC_INVALID_PARAM; - - ctx->cfg.rc_max_quantizer = params->max_quantizer; - ctx->cfg.rc_min_quantizer = params->min_quantizer; - - set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); - vp9_change_config(ctx->cpi, &ctx->oxcf); + cpi->svc.layer_context[params->spatial_layer].svc_params_received = + *params; return VPX_CODEC_OK; } diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 3b5d4bf71..fd868ae73 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -32,7 +32,6 @@ struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_dec_cfg_t cfg; vp9_stream_info_t si; - struct VP9Decoder *pbi; int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; vpx_decrypt_cb decrypt_cb; @@ -42,6 +41,11 @@ struct vpx_codec_alg_priv { int frame_parallel_decode; // frame-based threading. int last_show_frame; // Index of last output frame. + VP9Worker *frame_workers; + int num_frame_workers; + int next_submit_thread_id; + int next_output_thread_id; + // External frame buffer info to save for VP9 common. void *ext_priv; // Private data associated with the external frame buffers. vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb; @@ -85,11 +89,17 @@ static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, } static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { - if (ctx->pbi) { - vp9_decoder_remove(ctx->pbi); - ctx->pbi = NULL; + if (ctx->frame_workers != NULL) { + int i; + for (i = 0; i < ctx->num_frame_workers; ++i) { + VP9Worker *const worker = &ctx->frame_workers[i]; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + vp9_decoder_remove(worker_data->pbi); + vpx_free(worker_data); + } } + vpx_free(ctx->frame_workers); vpx_free(ctx); return VPX_CODEC_OK; @@ -102,9 +112,6 @@ static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, void *decrypt_state) { uint8_t clear_buffer[9]; - if (data_sz <= 8) - return VPX_CODEC_UNSUP_BITSTREAM; - if (data + data_sz <= data) return VPX_CODEC_INVALID_PARAM; @@ -125,12 +132,16 @@ static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, if (frame_marker != VP9_FRAME_MARKER) return VPX_CODEC_UNSUP_BITSTREAM; + if (version > 1) return VPX_CODEC_UNSUP_BITSTREAM; if (vp9_rb_read_bit(&rb)) { // show an existing frame return VPX_CODEC_OK; } + if (data_sz <= 8) + return VPX_CODEC_UNSUP_BITSTREAM; + si->is_kf = !vp9_rb_read_bit(&rb); if (si->is_kf) { const int sRGB = 7; @@ -187,32 +198,42 @@ static vpx_codec_err_t decoder_get_si(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } +static void set_error_detail(vpx_codec_alg_priv_t *ctx, + const char *const error) { + ctx->base.err_detail = error; +} + static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, const struct vpx_internal_error_info *error) { if (error->error_code) - ctx->base.err_detail = error->has_detail ? error->detail : NULL; + set_error_detail(ctx, error->has_detail ? error->detail : NULL); return error->error_code; } static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { - VP9_COMMON *const cm = &ctx->pbi->common; + int i; - cm->new_fb_idx = -1; + for (i = 0; i < ctx->num_frame_workers; ++i) { + VP9Worker *const worker = &ctx->frame_workers[i]; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + VP9_COMMON *const cm = &worker_data->pbi->common; - if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { - cm->get_fb_cb = ctx->get_ext_fb_cb; - cm->release_fb_cb = ctx->release_ext_fb_cb; - cm->cb_priv = ctx->ext_priv; - } else { - cm->get_fb_cb = vp9_get_frame_buffer; - cm->release_fb_cb = vp9_release_frame_buffer; + cm->new_fb_idx = -1; + if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { + cm->get_fb_cb = ctx->get_ext_fb_cb; + cm->release_fb_cb = ctx->release_ext_fb_cb; + cm->cb_priv = ctx->ext_priv; + } else { + cm->get_fb_cb = vp9_get_frame_buffer; + cm->release_fb_cb = vp9_release_frame_buffer; - if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to initialize internal frame buffers"); + if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to initialize internal frame buffers"); - cm->cb_priv = &cm->int_frame_buffers; + cm->cb_priv = &cm->int_frame_buffers; + } } } @@ -231,15 +252,58 @@ static void set_ppflags(const vpx_codec_alg_priv_t *ctx, flags->noise_level = ctx->postproc_cfg.noise_level; } -static void init_decoder(vpx_codec_alg_priv_t *ctx) { - ctx->pbi = vp9_decoder_create(); - if (ctx->pbi == NULL) - return; +static int frame_worker_hook(void *arg1, void *arg2) { + FrameWorkerData *const worker_data = (FrameWorkerData *)arg1; + const uint8_t *data = worker_data->data; + (void)arg2; + worker_data->result = vp9_receive_compressed_data(worker_data->pbi, + worker_data->data_size, + &data); + worker_data->data_end = data; + return !worker_data->result; +} + +static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { + int i; - ctx->pbi->max_threads = ctx->cfg.threads; - ctx->pbi->inv_tile_order = ctx->invert_tile_order; - ctx->pbi->frame_parallel_decode = ctx->frame_parallel_decode; ctx->last_show_frame = -1; + ctx->next_submit_thread_id = 0; + ctx->next_output_thread_id = 0; + ctx->num_frame_workers = + (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1; + + ctx->frame_workers = (VP9Worker *) + vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers)); + if (ctx->frame_workers == NULL) { + set_error_detail(ctx, "Failed to allocate frame_workers"); + return VPX_CODEC_MEM_ERROR; + } + + for (i = 0; i < ctx->num_frame_workers; ++i) { + VP9Worker *const worker = &ctx->frame_workers[i]; + FrameWorkerData *worker_data = NULL; + vp9_worker_init(worker); + worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData)); + if (worker->data1 == NULL) { + set_error_detail(ctx, "Failed to allocate worker_data"); + return VPX_CODEC_MEM_ERROR; + } + worker_data = (FrameWorkerData *)worker->data1; + worker_data->pbi = vp9_decoder_create(); + if (worker_data->pbi == NULL) { + set_error_detail(ctx, "Failed to allocate worker_data"); + return VPX_CODEC_MEM_ERROR; + } + + // If decoding in serial mode, FrameWorker thread could create tile worker + // thread or loopfilter thread. + worker_data->pbi->max_threads = + (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0; + + worker_data->pbi->inv_tile_order = ctx->invert_tile_order; + worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode; + worker->hook = (VP9WorkerHook)frame_worker_hook; + } // If postprocessing was enabled by the application and a // configuration has not been provided, default it. @@ -248,14 +312,14 @@ static void init_decoder(vpx_codec_alg_priv_t *ctx) { set_default_ppflags(&ctx->postproc_cfg); init_buffer_callbacks(ctx); + + return VPX_CODEC_OK; } static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, const uint8_t **data, unsigned int data_sz, void *user_priv, int64_t deadline) { vp9_ppflags_t flags = {0}; - VP9_COMMON *cm = NULL; - (void)deadline; // Determine the stream parameters. Note that we rely on peek_si to @@ -272,22 +336,35 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_ERROR; } - // Initialize the decoder instance on the first frame - if (ctx->pbi == NULL) { - init_decoder(ctx); - if (ctx->pbi == NULL) - return VPX_CODEC_ERROR; + // Initialize the decoder workers on the first frame + if (ctx->frame_workers == NULL) { + const vpx_codec_err_t res = init_decoder(ctx); + if (res != VPX_CODEC_OK) + return res; } - // Set these even if already initialized. The caller may have changed the - // decrypt config between frames. - ctx->pbi->decrypt_cb = ctx->decrypt_cb; - ctx->pbi->decrypt_state = ctx->decrypt_state; + if (!ctx->frame_parallel_decode) { + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + worker_data->data = *data; + worker_data->data_size = data_sz; + worker_data->user_priv = user_priv; - cm = &ctx->pbi->common; + // Set these even if already initialized. The caller may have changed the + // decrypt config between frames. + worker_data->pbi->decrypt_cb = ctx->decrypt_cb; + worker_data->pbi->decrypt_state = ctx->decrypt_state; - if (vp9_receive_compressed_data(ctx->pbi, data_sz, data)) - return update_error_state(ctx, &cm->error); + vp9_worker_execute(worker); + if (worker->had_error) + return update_error_state(ctx, &worker_data->pbi->common.error); + + // Update data pointer after decode. + *data = worker_data->data_end; + } else { + // TODO(hkuang): Implement frame parallel decode. + return VPX_CODEC_INCAPABLE; + } if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) set_ppflags(ctx, &flags); @@ -306,10 +383,17 @@ static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb, return *data; } -static void parse_superframe_index(const uint8_t *data, size_t data_sz, - uint32_t sizes[8], int *count, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { +static vpx_codec_err_t parse_superframe_index(const uint8_t *data, + size_t data_sz, + uint32_t sizes[8], int *count, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) { + // A chunk ending with a byte matching 0xc0 is an invalid chunk unless + // it is a super frame index. If the last byte of real video compression + // data is 0xc0 the encoder must add a 0 byte. If we have the marker but + // not the associated matching marker byte at the front of the index we have + // an invalid bitstream and need to return an error. + uint8_t marker; assert(data_sz); @@ -321,56 +405,45 @@ static void parse_superframe_index(const uint8_t *data, size_t data_sz, const uint32_t mag = ((marker >> 3) & 0x3) + 1; const size_t index_sz = 2 + mag * frames; - if (data_sz >= index_sz) { - uint8_t marker2 = read_marker(decrypt_cb, decrypt_state, - data + data_sz - index_sz); + // This chunk is marked as having a superframe index but doesn't have + // enough data for it, thus it's an invalid superframe index. + if (data_sz < index_sz) + return VPX_CODEC_CORRUPT_FRAME; - if (marker == marker2) { - // Found a valid superframe index. - uint32_t i, j; - const uint8_t *x = &data[data_sz - index_sz + 1]; + { + const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state, + data + data_sz - index_sz); - // Frames has a maximum of 8 and mag has a maximum of 4. - uint8_t clear_buffer[32]; - assert(sizeof(clear_buffer) >= frames * mag); - if (decrypt_cb) { - decrypt_cb(decrypt_state, x, clear_buffer, frames * mag); - x = clear_buffer; - } + // This chunk is marked as having a superframe index but doesn't have + // the matching marker byte at the front of the index therefore it's an + // invalid chunk. + if (marker != marker2) + return VPX_CODEC_CORRUPT_FRAME; + } - for (i = 0; i < frames; ++i) { - uint32_t this_sz = 0; + { + // Found a valid superframe index. + uint32_t i, j; + const uint8_t *x = &data[data_sz - index_sz + 1]; - for (j = 0; j < mag; ++j) - this_sz |= (*x++) << (j * 8); - sizes[i] = this_sz; - } - - *count = frames; + // Frames has a maximum of 8 and mag has a maximum of 4. + uint8_t clear_buffer[32]; + assert(sizeof(clear_buffer) >= frames * mag); + if (decrypt_cb) { + decrypt_cb(decrypt_state, x, clear_buffer, frames * mag); + x = clear_buffer; } + + for (i = 0; i < frames; ++i) { + uint32_t this_sz = 0; + + for (j = 0; j < mag; ++j) + this_sz |= (*x++) << (j * 8); + sizes[i] = this_sz; + } + *count = frames; } } -} - -static vpx_codec_err_t decode_one_iter(vpx_codec_alg_priv_t *ctx, - const uint8_t **data_start_ptr, - const uint8_t *data_end, - uint32_t frame_size, void *user_priv, - long deadline) { - const vpx_codec_err_t res = decode_one(ctx, data_start_ptr, frame_size, - user_priv, deadline); - if (res != VPX_CODEC_OK) - return res; - - // Account for suboptimal termination by the encoder. - while (*data_start_ptr < data_end) { - const uint8_t marker = read_marker(ctx->decrypt_cb, ctx->decrypt_state, - *data_start_ptr); - if (marker) - break; - (*data_start_ptr)++; - } - return VPX_CODEC_OK; } @@ -378,7 +451,7 @@ static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline) { const uint8_t *data_start = data; - const uint8_t *const data_end = data + data_sz; + const uint8_t * const data_end = data + data_sz; vpx_codec_err_t res; uint32_t frame_sizes[8]; int frame_count; @@ -386,32 +459,86 @@ static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, if (data == NULL || data_sz == 0) return VPX_CODEC_INVALID_PARAM; - parse_superframe_index(data, data_sz, frame_sizes, &frame_count, - ctx->decrypt_cb, ctx->decrypt_state); + res = parse_superframe_index(data, data_sz, frame_sizes, &frame_count, + ctx->decrypt_cb, ctx->decrypt_state); + if (res != VPX_CODEC_OK) + return res; - if (frame_count > 0) { - int i; + if (ctx->frame_parallel_decode) { + // Decode in frame parallel mode. When decoding in this mode, the frame + // passed to the decoder must be either a normal frame or a superframe with + // superframe index so the decoder could get each frame's start position + // in the superframe. + if (frame_count > 0) { + int i; - for (i = 0; i < frame_count; ++i) { - const uint32_t frame_size = frame_sizes[i]; - if (data_start < data || - frame_size > (uint32_t)(data_end - data_start)) { - ctx->base.err_detail = "Invalid frame size in index"; - return VPX_CODEC_CORRUPT_FRAME; + for (i = 0; i < frame_count; ++i) { + const uint8_t *data_start_copy = data_start; + const uint32_t frame_size = frame_sizes[i]; + vpx_codec_err_t res; + if (data_start < data + || frame_size > (uint32_t) (data_end - data_start)) { + set_error_detail(ctx, "Invalid frame size in index"); + return VPX_CODEC_CORRUPT_FRAME; + } + + res = decode_one(ctx, &data_start_copy, frame_size, user_priv, + deadline); + if (res != VPX_CODEC_OK) + return res; + + data_start += frame_size; } - - res = decode_one_iter(ctx, &data_start, data_end, frame_size, - user_priv, deadline); + } else { + res = decode_one(ctx, &data_start, data_sz, user_priv, deadline); if (res != VPX_CODEC_OK) return res; + + // Extra data detected after the frame. + if (data_start < data_end - 1) { + set_error_detail(ctx, "Fail to decode frame in parallel mode"); + return VPX_CODEC_INCAPABLE; + } } } else { - while (data_start < data_end) { - res = decode_one_iter(ctx, &data_start, data_end, - (uint32_t)(data_end - data_start), - user_priv, deadline); - if (res != VPX_CODEC_OK) - return res; + // Decode in serial mode. + if (frame_count > 0) { + int i; + + for (i = 0; i < frame_count; ++i) { + const uint8_t *data_start_copy = data_start; + const uint32_t frame_size = frame_sizes[i]; + vpx_codec_err_t res; + if (data_start < data + || frame_size > (uint32_t) (data_end - data_start)) { + set_error_detail(ctx, "Invalid frame size in index"); + return VPX_CODEC_CORRUPT_FRAME; + } + + res = decode_one(ctx, &data_start_copy, frame_size, user_priv, + deadline); + if (res != VPX_CODEC_OK) + return res; + + data_start += frame_size; + } + } else { + while (data_start < data_end) { + const uint32_t frame_size = (uint32_t) (data_end - data_start); + const vpx_codec_err_t res = decode_one(ctx, &data_start, frame_size, + user_priv, deadline); + if (res != VPX_CODEC_OK) + return res; + + // Account for suboptimal termination by the encoder. + while (data_start < data_end) { + const uint8_t marker = read_marker(ctx->decrypt_cb, + ctx->decrypt_state, data_start); + if (marker) + break; + ++data_start; + } + } } } @@ -424,13 +551,15 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, // iter acts as a flip flop, so an image is only returned on the first // call to get_frame. - if (*iter == NULL && ctx->pbi != NULL) { + if (*iter == NULL && ctx->frame_workers != NULL) { YV12_BUFFER_CONFIG sd; vp9_ppflags_t flags = {0, 0, 0}; - if (vp9_get_raw_frame(ctx->pbi, &sd, &flags) == 0) { - VP9_COMMON *cm = &ctx->pbi->common; - yuvconfig2image(&ctx->img, &sd, NULL); + VP9Worker *const worker = &ctx->frame_workers[ctx->next_output_thread_id]; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + if (vp9_get_raw_frame(worker_data->pbi, &sd, &flags) == 0) { + VP9_COMMON *const cm = &worker_data->pbi->common; + yuvconfig2image(&ctx->img, &sd, worker_data->user_priv); ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; img = &ctx->img; *iter = img; @@ -442,7 +571,7 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, &cm->frame_bufs[ctx->last_show_frame].raw_frame_buffer); } } - ctx->last_show_frame = ctx->pbi->common.new_fb_idx; + ctx->last_show_frame = worker_data->pbi->common.new_fb_idx; } } @@ -455,7 +584,7 @@ static vpx_codec_err_t decoder_set_fb_fn( vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { if (cb_get == NULL || cb_release == NULL) { return VPX_CODEC_INVALID_PARAM; - } else if (ctx->pbi == NULL) { + } else if (ctx->frame_workers == NULL) { // If the decoder has already been initialized, do not accept changes to // the frame buffer functions. ctx->get_ext_fb_cb = cb_get; @@ -471,12 +600,19 @@ static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *); + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + if (data) { vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; - + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; image2yuvconfig(&frame->img, &sd); - return vp9_set_reference_dec(&ctx->pbi->common, + return vp9_set_reference_dec(&worker_data->pbi->common, (VP9_REFFRAME)frame->frame_type, &sd); } else { return VPX_CODEC_INVALID_PARAM; @@ -487,13 +623,19 @@ static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + if (data) { - vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; + vpx_ref_frame_t *frame = (vpx_ref_frame_t *) data; YV12_BUFFER_CONFIG sd; - + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; image2yuvconfig(&frame->img, &sd); - - return vp9_copy_reference_dec(ctx->pbi, + return vp9_copy_reference_dec(worker_data->pbi, (VP9_REFFRAME)frame->frame_type, &sd); } else { return VPX_CODEC_INVALID_PARAM; @@ -504,11 +646,18 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + if (data) { YV12_BUFFER_CONFIG* fb; - - vp9_get_reference_dec(ctx->pbi, data->idx, &fb); - yuvconfig2image(&data->img, fb, NULL); + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + vp9_get_reference_dec(worker_data->pbi, data->idx, &fb); + yuvconfig2image(&data->img, fb, worker_data->user_priv); return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; @@ -545,11 +694,20 @@ static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, va_list args) { int *const update_info = va_arg(args, int *); + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + if (update_info) { - if (ctx->pbi) - *update_info = ctx->pbi->refresh_frame_flags; - else + if (ctx->frame_workers) { + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + *update_info = worker_data->pbi->refresh_frame_flags; + } else { return VPX_CODEC_ERROR; + } return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; @@ -561,11 +719,20 @@ static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, va_list args) { int *corrupted = va_arg(args, int *); + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + if (corrupted) { - if (ctx->pbi) - *corrupted = ctx->pbi->common.frame_to_show->corrupted; - else + if (ctx->frame_workers) { + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + *corrupted = worker_data->pbi->common.frame_to_show->corrupted; + } else { return VPX_CODEC_ERROR; + } return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; @@ -576,9 +743,17 @@ static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, va_list args) { int *const display_size = va_arg(args, int *); + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + if (display_size) { - if (ctx->pbi) { - const VP9_COMMON *const cm = &ctx->pbi->common; + if (ctx->frame_workers) { + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + const VP9_COMMON *const cm = &worker_data->pbi->common; display_size[0] = cm->display_width; display_size[1] = cm->display_height; } else { diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 9dbb67810..6a34f7e0f 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -105,11 +105,9 @@ VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm ifeq ($(CONFIG_USE_X86INC),yes) VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm -VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c -VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm endif @@ -124,7 +122,9 @@ VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt_x86_64.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct32x32_sse2.c -VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct_avx2.c VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct32x32_avx2.c +VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct_avx2.c +VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c +VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_avx2.c VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes)) diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c index 17e165bfb..4efba9c00 100644 --- a/vpx/src/svc_encodeframe.c +++ b/vpx/src/svc_encodeframe.c @@ -24,6 +24,7 @@ #include "vpx/svc_context.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" +#include "vpx_mem/vpx_mem.h" #ifdef __MINGW32__ #define strtok_r strtok_s @@ -47,6 +48,14 @@ _CRTIMP char *__cdecl strtok_s(char *str, const char *delim, char **context); static const char *DEFAULT_QUANTIZER_VALUES = "60,53,39,33,27"; static const char *DEFAULT_SCALE_FACTORS = "4/16,5/16,7/16,11/16,16/16"; +// One encoded frame +typedef struct FrameData { + void *buf; // compressed data buffer + size_t size; // length of compressed data + vpx_codec_frame_flags_t flags; /**< flags for this frame */ + struct FrameData *next; +} FrameData; + typedef struct SvcInternal { char options[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_options char quantizers[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_quantizers @@ -72,15 +81,15 @@ typedef struct SvcInternal { // state variables int encode_frame_count; + int frame_received; int frame_within_gop; vpx_enc_frame_flags_t enc_frame_flags; int layers; int layer; int is_keyframe; - size_t frame_size; - size_t buffer_size; - void *buffer; + FrameData *frame_list; + FrameData *frame_temp; char *rc_stats_buf; size_t rc_stats_buf_size; @@ -90,128 +99,54 @@ typedef struct SvcInternal { vpx_codec_ctx_t *codec_ctx; } SvcInternal; -// Superframe is used to generate an index of individual frames (i.e., layers) -struct Superframe { - int count; - uint32_t sizes[SUPERFRAME_SLOTS]; - uint32_t magnitude; - uint8_t buffer[SUPERFRAME_BUFFER_SIZE]; - size_t index_size; -}; - -// One encoded frame layer -struct LayerData { - void *buf; // compressed data buffer - size_t size; // length of compressed data - struct LayerData *next; -}; - -// create LayerData from encoder output -static struct LayerData *ld_create(void *buf, size_t size) { - struct LayerData *const layer_data = - (struct LayerData *)malloc(sizeof(*layer_data)); - if (layer_data == NULL) { +// create FrameData from encoder output +static struct FrameData *fd_create(void *buf, size_t size, + vpx_codec_frame_flags_t flags) { + struct FrameData *const frame_data = + (struct FrameData *)vpx_malloc(sizeof(*frame_data)); + if (frame_data == NULL) { return NULL; } - layer_data->buf = malloc(size); - if (layer_data->buf == NULL) { - free(layer_data); + frame_data->buf = vpx_malloc(size); + if (frame_data->buf == NULL) { + vpx_free(frame_data); return NULL; } - memcpy(layer_data->buf, buf, size); - layer_data->size = size; - return layer_data; + vpx_memcpy(frame_data->buf, buf, size); + frame_data->size = size; + frame_data->flags = flags; + return frame_data; } -// free LayerData -static void ld_free(struct LayerData *layer_data) { - if (layer_data) { - if (layer_data->buf) { - free(layer_data->buf); - layer_data->buf = NULL; - } - free(layer_data); +// free FrameData +static void fd_free(struct FrameData *p) { + if (p) { + if (p->buf) + vpx_free(p->buf); + vpx_free(p); } } -// add layer data to list -static void ld_list_add(struct LayerData **list, struct LayerData *layer_data) { - struct LayerData **p = list; +// add FrameData to list +static void fd_list_add(struct FrameData **list, struct FrameData *layer_data) { + struct FrameData **p = list; while (*p != NULL) p = &(*p)->next; *p = layer_data; layer_data->next = NULL; } -// get accumulated size of layer data -static size_t ld_list_get_buffer_size(struct LayerData *list) { - struct LayerData *p; - size_t size = 0; - - for (p = list; p != NULL; p = p->next) { - size += p->size; - } - return size; -} - -// copy layer data to buffer -static void ld_list_copy_to_buffer(struct LayerData *list, uint8_t *buffer) { - struct LayerData *p; - - for (p = list; p != NULL; p = p->next) { - buffer[0] = 1; - memcpy(buffer, p->buf, p->size); - buffer += p->size; - } -} - -// free layer data list -static void ld_list_free(struct LayerData *list) { - struct LayerData *p = list; +// free FrameData list +static void fd_free_list(struct FrameData *list) { + struct FrameData *p = list; while (p) { list = list->next; - ld_free(p); + fd_free(p); p = list; } } -static void sf_create_index(struct Superframe *sf) { - uint8_t marker = 0xc0; - int i; - uint32_t mag, mask; - uint8_t *bufp; - - if (sf->count == 0 || sf->count >= 8) return; - - // Add the number of frames to the marker byte - marker |= sf->count - 1; - - // Choose the magnitude - for (mag = 0, mask = 0xff; mag < 4; ++mag) { - if (sf->magnitude < mask) break; - mask <<= 8; - mask |= 0xff; - } - marker |= mag << 3; - - // Write the index - sf->index_size = 2 + (mag + 1) * sf->count; - bufp = sf->buffer; - - *bufp++ = marker; - for (i = 0; i < sf->count; ++i) { - int this_sz = sf->sizes[i]; - uint32_t j; - - for (j = 0; j <= mag; ++j) { - *bufp++ = this_sz & 0xff; - this_sz >>= 8; - } - } - *bufp++ = marker; -} - static SvcInternal *get_svc_internal(SvcContext *svc_ctx) { if (svc_ctx == NULL) return NULL; if (svc_ctx->internal == NULL) { @@ -574,8 +509,6 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, // modify encoder configuration enc_cfg->ss_number_layers = si->layers; enc_cfg->ts_number_layers = 1; // Temporal layers not used in this encoder. - // Lag in frames not currently supported - enc_cfg->g_lag_in_frames = 0; // TODO(ivanmaltz): determine if these values need to be set explicitly for // svc, or if the normal default/override mechanism can be used @@ -608,6 +541,34 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, return VPX_CODEC_OK; } +static void accumulate_frame_size_for_each_layer(SvcInternal *const si, + const uint8_t *const buf, + const size_t size) { + uint8_t marker = buf[size - 1]; + if ((marker & 0xe0) == 0xc0) { + const uint32_t frames = (marker & 0x7) + 1; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const size_t index_sz = 2 + mag * frames; + + uint8_t marker2 = buf[size - index_sz]; + + if (size >= index_sz && marker2 == marker) { + // found a valid superframe index + uint32_t i, j; + const uint8_t *x = &buf[size - index_sz + 1]; + + // frames has a maximum of 8 and mag has a maximum of 4. + for (i = 0; i < frames; i++) { + uint32_t this_sz = 0; + + for (j = 0; j < mag; j++) + this_sz |= (*x++) << (j * 8); + si->bytes_sum[i] += this_sz; + } + } + } +} + // SVC Algorithm flags - these get mapped to VP8_EFLAG_* defined in vp8cx.h // encoder should reference the last frame @@ -846,15 +807,12 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, vpx_codec_err_t res; vpx_codec_iter_t iter; const vpx_codec_cx_pkt_t *cx_pkt; - struct LayerData *cx_layer_list = NULL; - struct LayerData *layer_data; - struct Superframe superframe; + int layer_for_psnr = 0; SvcInternal *const si = get_svc_internal(svc_ctx); if (svc_ctx == NULL || codec_ctx == NULL || si == NULL) { return VPX_CODEC_INVALID_PARAM; } - memset(&superframe, 0, sizeof(superframe)); svc_log_reset(svc_ctx); si->rc_stats_buf_used = 0; @@ -863,7 +821,6 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, si->frame_within_gop = 0; } si->is_keyframe = (si->frame_within_gop == 0); - si->frame_size = 0; if (rawimg != NULL) { svc_log(svc_ctx, SVC_LOG_DEBUG, @@ -872,124 +829,90 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, si->frame_within_gop); } - // encode each layer - for (si->layer = 0; si->layer < si->layers; ++si->layer) { - if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && - si->is_keyframe && (si->layer == 1 || si->layer == 3)) { - svc_log(svc_ctx, SVC_LOG_DEBUG, "Skip encoding layer %d\n", si->layer); - continue; - } - - if (rawimg != NULL) { + if (rawimg != NULL) { + // encode each layer + for (si->layer = 0; si->layer < si->layers; ++si->layer) { + if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP && + si->is_keyframe && (si->layer == 1 || si->layer == 3)) { + svc_log(svc_ctx, SVC_LOG_DEBUG, "Skip encoding layer %d\n", si->layer); + continue; + } calculate_enc_frame_flags(svc_ctx); set_svc_parameters(svc_ctx, codec_ctx); } + } - res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration, - si->enc_frame_flags, deadline); - if (res != VPX_CODEC_OK) { - return res; - } - // save compressed data - iter = NULL; - while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) { - switch (cx_pkt->kind) { - case VPX_CODEC_CX_FRAME_PKT: { - const uint32_t frame_pkt_size = (uint32_t)(cx_pkt->data.frame.sz); - si->bytes_sum[si->layer] += frame_pkt_size; - svc_log(svc_ctx, SVC_LOG_DEBUG, - "SVC frame: %d, layer: %d, size: %u\n", - si->encode_frame_count, si->layer, frame_pkt_size); - layer_data = - ld_create(cx_pkt->data.frame.buf, (size_t)frame_pkt_size); - if (layer_data == NULL) { - svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating LayerData\n"); - return VPX_CODEC_OK; + res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration, 0, + deadline); + if (res != VPX_CODEC_OK) { + return res; + } + // save compressed data + iter = NULL; + while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) { + switch (cx_pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: { + fd_list_add(&si->frame_list, fd_create(cx_pkt->data.frame.buf, + cx_pkt->data.frame.sz, + cx_pkt->data.frame.flags)); + accumulate_frame_size_for_each_layer(si, cx_pkt->data.frame.buf, + cx_pkt->data.frame.sz); + + svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, " + "pts: %d\n", si->frame_received, + (cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? 1 : 0, + (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts); + + ++si->frame_received; + layer_for_psnr = 0; + break; + } + case VPX_CODEC_PSNR_PKT: { + int i; + svc_log(svc_ctx, SVC_LOG_DEBUG, + "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): " + "%2.3f %2.3f %2.3f %2.3f \n", + si->frame_received, layer_for_psnr, + cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1], + cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]); + svc_log(svc_ctx, SVC_LOG_DEBUG, + "SVC frame: %d, layer: %d, SSE(Total/Y/U/V): " + "%2.3f %2.3f %2.3f %2.3f \n", + si->frame_received, layer_for_psnr, + cx_pkt->data.psnr.sse[0], cx_pkt->data.psnr.sse[1], + cx_pkt->data.psnr.sse[2], cx_pkt->data.psnr.sse[3]); + for (i = 0; i < COMPONENTS; i++) { + si->psnr_sum[layer_for_psnr][i] += cx_pkt->data.psnr.psnr[i]; + si->sse_sum[layer_for_psnr][i] += cx_pkt->data.psnr.sse[i]; + } + ++layer_for_psnr; + break; + } + case VPX_CODEC_STATS_PKT: { + size_t new_size = si->rc_stats_buf_used + + cx_pkt->data.twopass_stats.sz; + + if (new_size > si->rc_stats_buf_size) { + char *p = (char*)realloc(si->rc_stats_buf, new_size); + if (p == NULL) { + svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating stats buf\n"); + return VPX_CODEC_MEM_ERROR; } - ld_list_add(&cx_layer_list, layer_data); + si->rc_stats_buf = p; + si->rc_stats_buf_size = new_size; + } - // save layer size in superframe index - superframe.sizes[superframe.count++] = frame_pkt_size; - superframe.magnitude |= frame_pkt_size; - break; - } - case VPX_CODEC_PSNR_PKT: { - int i; - svc_log(svc_ctx, SVC_LOG_DEBUG, - "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): " - "%2.3f %2.3f %2.3f %2.3f \n", - si->encode_frame_count, si->layer, - cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1], - cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]); - svc_log(svc_ctx, SVC_LOG_DEBUG, - "SVC frame: %d, layer: %d, SSE(Total/Y/U/V): " - "%2.3f %2.3f %2.3f %2.3f \n", - si->encode_frame_count, si->layer, - cx_pkt->data.psnr.sse[0], cx_pkt->data.psnr.sse[1], - cx_pkt->data.psnr.sse[2], cx_pkt->data.psnr.sse[3]); - for (i = 0; i < COMPONENTS; i++) { - si->psnr_sum[si->layer][i] += cx_pkt->data.psnr.psnr[i]; - si->sse_sum[si->layer][i] += cx_pkt->data.psnr.sse[i]; - } - break; - } - case VPX_CODEC_STATS_PKT: { - size_t new_size = si->rc_stats_buf_used + - cx_pkt->data.twopass_stats.sz; - - if (new_size > si->rc_stats_buf_size) { - char *p = (char*)realloc(si->rc_stats_buf, new_size); - if (p == NULL) { - svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating stats buf\n"); - break; - } - si->rc_stats_buf = p; - si->rc_stats_buf_size = new_size; - } - - memcpy(si->rc_stats_buf + si->rc_stats_buf_used, - cx_pkt->data.twopass_stats.buf, cx_pkt->data.twopass_stats.sz); - si->rc_stats_buf_used += cx_pkt->data.twopass_stats.sz; - break; - } - default: { - break; - } + memcpy(si->rc_stats_buf + si->rc_stats_buf_used, + cx_pkt->data.twopass_stats.buf, cx_pkt->data.twopass_stats.sz); + si->rc_stats_buf_used += cx_pkt->data.twopass_stats.sz; + break; + } + default: { + break; } } - if (rawimg == NULL) { - break; - } } - if (codec_ctx->config.enc->g_pass != VPX_RC_FIRST_PASS) { - // add superframe index to layer data list - sf_create_index(&superframe); - layer_data = ld_create(superframe.buffer, superframe.index_size); - ld_list_add(&cx_layer_list, layer_data); - // get accumulated size of layer data - si->frame_size = ld_list_get_buffer_size(cx_layer_list); - if (si->frame_size > 0) { - // all layers encoded, create single buffer with concatenated layers - if (si->frame_size > si->buffer_size) { - free(si->buffer); - si->buffer = malloc(si->frame_size); - if (si->buffer == NULL) { - ld_list_free(cx_layer_list); - return VPX_CODEC_MEM_ERROR; - } - si->buffer_size = si->frame_size; - } - // copy layer data into packet - ld_list_copy_to_buffer(cx_layer_list, (uint8_t *)si->buffer); - - ld_list_free(cx_layer_list); - - svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, " - "pts: %d\n", si->encode_frame_count, si->is_keyframe, - (int)si->frame_size, (int)pts); - } - } if (rawimg != NULL) { ++si->frame_within_gop; ++si->encode_frame_count; @@ -1004,16 +927,27 @@ const char *vpx_svc_get_message(const SvcContext *svc_ctx) { return si->message_buffer; } -void *vpx_svc_get_buffer(const SvcContext *svc_ctx) { - const SvcInternal *const si = get_const_svc_internal(svc_ctx); - if (svc_ctx == NULL || si == NULL) return NULL; - return si->buffer; +// We will maintain a list of output frame buffers since with lag_in_frame +// we need to output all frame buffers at the end. vpx_svc_get_buffer() will +// remove a frame buffer from the list the put it to a temporal pointer, which +// will be removed at the next vpx_svc_get_buffer() or when closing encoder. +void *vpx_svc_get_buffer(SvcContext *svc_ctx) { + SvcInternal *const si = get_svc_internal(svc_ctx); + if (svc_ctx == NULL || si == NULL || si->frame_list == NULL) return NULL; + + if (si->frame_temp) + fd_free(si->frame_temp); + + si->frame_temp = si->frame_list; + si->frame_list = si->frame_list->next; + + return si->frame_temp->buf; } size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx) { const SvcInternal *const si = get_const_svc_internal(svc_ctx); - if (svc_ctx == NULL || si == NULL) return 0; - return si->frame_size; + if (svc_ctx == NULL || si == NULL || si->frame_list == NULL) return 0; + return si->frame_list->size; } int vpx_svc_get_encode_frame_count(const SvcContext *svc_ctx) { @@ -1024,8 +958,8 @@ int vpx_svc_get_encode_frame_count(const SvcContext *svc_ctx) { int vpx_svc_is_keyframe(const SvcContext *svc_ctx) { const SvcInternal *const si = get_const_svc_internal(svc_ctx); - if (svc_ctx == NULL || si == NULL) return 0; - return si->is_keyframe; + if (svc_ctx == NULL || si == NULL || si->frame_list == NULL) return 0; + return (si->frame_list->flags & VPX_FRAME_IS_KEY) != 0; } void vpx_svc_set_keyframe(SvcContext *svc_ctx) { @@ -1112,7 +1046,8 @@ void vpx_svc_release(SvcContext *svc_ctx) { // SvcInternal if it was not already allocated si = (SvcInternal *)svc_ctx->internal; if (si != NULL) { - free(si->buffer); + fd_free(si->frame_temp); + fd_free_list(si->frame_list); if (si->rc_stats_buf) { free(si->rc_stats_buf); } diff --git a/vpx/svc_context.h b/vpx/svc_context.h index 5d0fbbd77..058ee2094 100644 --- a/vpx/svc_context.h +++ b/vpx/svc_context.h @@ -104,14 +104,16 @@ const char *vpx_svc_dump_statistics(SvcContext *svc_ctx); const char *vpx_svc_get_message(const SvcContext *svc_ctx); /** - * return size of encoded data to be returned by vpx_svc_get_buffer + * return size of encoded data to be returned by vpx_svc_get_buffer. + * it needs to be called before vpx_svc_get_buffer. */ size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx); /** - * return buffer with encoded data + * return buffer with encoded data. encoder will maintain a list of frame + * buffers. each call of vpx_svc_get_buffer() will return one frame. */ -void *vpx_svc_get_buffer(const SvcContext *svc_ctx); +void *vpx_svc_get_buffer(SvcContext *svc_ctx); /** * return size of two pass rate control stats data to be returned by