diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c index 223f37eb5..81d380045 100644 --- a/examples/vp9_spatial_svc_encoder.c +++ b/examples/vp9_spatial_svc_encoder.c @@ -38,8 +38,10 @@ static const arg_def_t timebase_arg = ARG_DEF("t", "timebase", 1, "timebase (num/den)"); static const arg_def_t bitrate_arg = ARG_DEF( "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second"); -static const arg_def_t layers_arg = - ARG_DEF("l", "layers", 1, "number of SVC layers"); +static const arg_def_t spatial_layers_arg = + ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers"); +static const arg_def_t temporal_layers_arg = + ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers"); static const arg_def_t kf_dist_arg = ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes"); static const arg_def_t scale_factors_arg = @@ -65,10 +67,11 @@ static const arg_def_t max_bitrate_arg = static const arg_def_t *svc_args[] = { &frames_arg, &width_arg, &height_arg, - &timebase_arg, &bitrate_arg, &skip_frames_arg, &layers_arg, + &timebase_arg, &bitrate_arg, &skip_frames_arg, &spatial_layers_arg, &kf_dist_arg, &scale_factors_arg, &quantizers_arg, &passes_arg, &pass_arg, &fpf_name_arg, &min_q_arg, &max_q_arg, - &min_bitrate_arg, &max_bitrate_arg, NULL + &min_bitrate_arg, &max_bitrate_arg, &temporal_layers_arg, + NULL }; static const uint32_t default_frames_to_skip = 0; @@ -79,6 +82,7 @@ static const uint32_t default_timebase_num = 1; static const uint32_t default_timebase_den = 60; static const uint32_t default_bitrate = 1000; static const uint32_t default_spatial_layers = 5; +static const uint32_t default_temporal_layers = 1; static const uint32_t default_kf_dist = 100; typedef struct { @@ -119,6 +123,7 @@ static void parse_command_line(int argc, const char **argv_, // initialize SvcContext with parameters that will be passed to vpx_svc_init svc_ctx->log_level = SVC_LOG_DEBUG; svc_ctx->spatial_layers = default_spatial_layers; + svc_ctx->temporal_layers = default_temporal_layers; // start with default encoder configuration res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0); @@ -156,8 +161,10 @@ static void parse_command_line(int argc, const char **argv_, enc_cfg->rc_target_bitrate = arg_parse_uint(&arg); } else if (arg_match(&arg, &skip_frames_arg, argi)) { app_input->frames_to_skip = arg_parse_uint(&arg); - } else if (arg_match(&arg, &layers_arg, argi)) { + } else if (arg_match(&arg, &spatial_layers_arg, argi)) { svc_ctx->spatial_layers = arg_parse_uint(&arg); + } else if (arg_match(&arg, &temporal_layers_arg, argi)) { + svc_ctx->temporal_layers = arg_parse_uint(&arg); } else if (arg_match(&arg, &kf_dist_arg, argi)) { enc_cfg->kf_min_dist = arg_parse_uint(&arg); enc_cfg->kf_max_dist = enc_cfg->kf_min_dist; diff --git a/test/svc_test.cc b/test/svc_test.cc index e219488b0..218f53db7 100644 --- a/test/svc_test.cc +++ b/test/svc_test.cc @@ -212,13 +212,13 @@ class SvcTest : public ::testing::Test { EXPECT_EQ(received_frames, n); } - void DropEnhancementLayers(struct vpx_fixed_buf *const inputs, - const int num_super_frames, - const int remained_layers, - const bool is_multiple_frame_context) { + void DropLayersAndMakeItVP9Comaptible(struct vpx_fixed_buf *const inputs, + const int num_super_frames, + const int remained_spatial_layers, + const bool is_multiple_frame_contexts) { ASSERT_TRUE(inputs != NULL); ASSERT_GT(num_super_frames, 0); - ASSERT_GT(remained_layers, 0); + ASSERT_GT(remained_spatial_layers, 0); for (int i = 0; i < num_super_frames; ++i) { uint32_t frame_sizes[8] = {0}; @@ -234,64 +234,110 @@ class SvcTest : public ::testing::Test { NULL, NULL); ASSERT_EQ(VPX_CODEC_OK, res); - uint8_t *frame_data = static_cast(inputs[i].buf); - uint8_t *frame_start = frame_data; - for (frame = 0; frame < frame_count; ++frame) { - // Looking for a visible frame. - if (frame_data[0] & 0x02) { - ++frames_found; - if (frames_found == remained_layers) - break; + if (frame_count == 0) { + // There's no super frame but only a single frame. + ASSERT_EQ(1, remained_spatial_layers); + if (is_multiple_frame_contexts) { + // Make a new super frame. + uint8_t marker = 0xc1; + unsigned int mask; + int mag; + + // Choose the magnitude. + for (mag = 0, mask = 0xff; mag < 4; ++mag) { + if (inputs[i].sz < mask) + break; + mask <<= 8; + mask |= 0xff; + } + marker |= mag << 3; + int index_sz = 2 + (mag + 1) * 2; + + inputs[i].buf = realloc(inputs[i].buf, inputs[i].sz + index_sz + 16); + ASSERT_TRUE(inputs[i].buf != NULL); + uint8_t *frame_data = static_cast(inputs[i].buf); + frame_data[0] &= ~2; // Set the show_frame flag to 0. + frame_data += inputs[i].sz; + // Add an one byte frame with show_existing_frame. + *frame_data++ = 0x88; + + // Write the super frame index. + *frame_data++ = marker; + + frame_sizes[0] = inputs[i].sz; + frame_sizes[1] = 1; + for (int j = 0; j < 2; ++j) { + unsigned int this_sz = frame_sizes[j]; + for (int k = 0; k <= mag; k++) { + *frame_data++ = this_sz & 0xff; + this_sz >>= 8; + } + } + *frame_data++ = marker; + inputs[i].sz += index_sz + 1; } + } else { + // Found a super frame. + uint8_t *frame_data = static_cast(inputs[i].buf); + uint8_t *frame_start = frame_data; + for (frame = 0; frame < frame_count; ++frame) { + // Looking for a visible frame. + if (frame_data[0] & 0x02) { + ++frames_found; + if (frames_found == remained_spatial_layers) + break; + } + frame_data += frame_sizes[frame]; + } + ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. " + << "remained_spatial_layers: " << remained_spatial_layers + << " super_frame: " << i + << " is_multiple_frame_context: " << is_multiple_frame_contexts; + if (frame == frame_count - 1 && !is_multiple_frame_contexts) + continue; + frame_data += frame_sizes[frame]; - } - ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. " - << "remaining_layers: " << remained_layers - << " super_frame: " << i - << " is_multiple_frame_context: " << is_multiple_frame_context; - if (frame == frame_count - 1 && !is_multiple_frame_context) - continue; - frame_data += frame_sizes[frame]; - // We need to add one more frame for multiple frame context. - if (is_multiple_frame_context) - ++frame; - uint8_t marker = - static_cast(inputs[i].buf)[inputs[i].sz - 1]; - const uint32_t mag = ((marker >> 3) & 0x3) + 1; - const size_t index_sz = 2 + mag * frame_count; - const size_t new_index_sz = 2 + mag * (frame + 1); - marker &= 0x0f8; - marker |= frame; + // We need to add one more frame for multiple frame contexts. + if (is_multiple_frame_contexts) + ++frame; + uint8_t marker = + static_cast(inputs[i].buf)[inputs[i].sz - 1]; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const size_t index_sz = 2 + mag * frame_count; + const size_t new_index_sz = 2 + mag * (frame + 1); + marker &= 0x0f8; + marker |= frame; - // Copy existing frame sizes. - memmove(frame_data + (is_multiple_frame_context ? 2 : 1), - frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2); - if (is_multiple_frame_context) { - // Add a one byte frame with flag show_existing frame. - *frame_data++ = 0x88 | (remained_layers - 1); - } - // New marker. - frame_data[0] = marker; - frame_data += (mag * (frame + 1) + 1); - - if (is_multiple_frame_context) { - // Write the frame size for the one byte frame. - frame_data -= mag; - *frame_data++ = 1; - for (uint32_t j = 1; j < mag; ++j) { - *frame_data++ = 0; + // Copy existing frame sizes. + memmove(frame_data + (is_multiple_frame_contexts ? 2 : 1), + frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2); + if (is_multiple_frame_contexts) { + // Add a one byte frame with flag show_existing_frame. + *frame_data++ = 0x88 | (remained_spatial_layers - 1); } - } + // New marker. + frame_data[0] = marker; + frame_data += (mag * (frame + 1) + 1); - *frame_data++ = marker; - inputs[i].sz = frame_data - frame_start; + if (is_multiple_frame_contexts) { + // Write the frame size for the one byte frame. + frame_data -= mag; + *frame_data++ = 1; + for (uint32_t j = 1; j < mag; ++j) { + *frame_data++ = 0; + } + } - if (is_multiple_frame_context) { - // Change the show frame flag to 0 for all frames. - for (int j = 0; j < frame; ++j) { - frame_start[0] &= ~2; - frame_start += frame_sizes[j]; + *frame_data++ = marker; + inputs[i].sz = frame_data - frame_start; + + if (is_multiple_frame_contexts) { + // Change the show frame flag to 0 for all frames. + for (int j = 0; j < frame; ++j) { + frame_start[0] &= ~2; + frame_start += frame_sizes[j]; + } } } } @@ -359,7 +405,7 @@ TEST_F(SvcTest, InvalidOptions) { } TEST_F(SvcTest, SetLayersOption) { - vpx_codec_err_t res = vpx_svc_set_options(&svc_, "layers=3"); + vpx_codec_err_t res = vpx_svc_set_options(&svc_, "spatial-layers=3"); EXPECT_EQ(VPX_CODEC_OK, res); InitializeEncoder(); EXPECT_EQ(3, svc_.spatial_layers); @@ -367,7 +413,7 @@ TEST_F(SvcTest, SetLayersOption) { TEST_F(SvcTest, SetMultipleOptions) { vpx_codec_err_t res = - vpx_svc_set_options(&svc_, "layers=2 scale-factors=1/3,2/3"); + vpx_svc_set_options(&svc_, "spatial-layers=2 scale-factors=1/3,2/3"); EXPECT_EQ(VPX_CODEC_OK, res); InitializeEncoder(); EXPECT_EQ(2, svc_.spatial_layers); @@ -529,7 +575,7 @@ TEST_F(SvcTest, TwoPassEncode20FramesWithAltRef) { FreeBitstreamBuffers(&outputs[0], 20); } -TEST_F(SvcTest, TwoPassEncode2LayersDecodeBaseLayerOnly) { +TEST_F(SvcTest, TwoPassEncode2SpatialLayersDecodeBaseLayerOnly) { // First pass encode std::string stats_buf; Pass1EncodeNFrames(10, 2, &stats_buf); @@ -540,12 +586,12 @@ TEST_F(SvcTest, TwoPassEncode2LayersDecodeBaseLayerOnly) { vpx_fixed_buf outputs[10]; memset(&outputs[0], 0, sizeof(outputs)); Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); - DropEnhancementLayers(&outputs[0], 10, 1, false); + DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, false); DecodeNFrames(&outputs[0], 10); FreeBitstreamBuffers(&outputs[0], 10); } -TEST_F(SvcTest, TwoPassEncode5LayersDecode54321Layers) { +TEST_F(SvcTest, TwoPassEncode5SpatialLayersDecode54321Layers) { // First pass encode std::string stats_buf; Pass1EncodeNFrames(10, 5, &stats_buf); @@ -558,13 +604,13 @@ TEST_F(SvcTest, TwoPassEncode5LayersDecode54321Layers) { Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]); DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 4, false); + DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 4, false); DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 3, false); + DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 3, false); DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 2, false); + DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, false); DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 1, false); + DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, false); DecodeNFrames(&outputs[0], 10); FreeBitstreamBuffers(&outputs[0], 10); @@ -601,15 +647,15 @@ TEST_F(SvcTest, TwoPassEncode3SNRLayersDecode321Layers) { memset(&outputs[0], 0, sizeof(outputs)); Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]); DecodeNFrames(&outputs[0], 20); - DropEnhancementLayers(&outputs[0], 20, 2, false); + DropLayersAndMakeItVP9Comaptible(&outputs[0], 20, 2, false); DecodeNFrames(&outputs[0], 20); - DropEnhancementLayers(&outputs[0], 20, 1, false); + DropLayersAndMakeItVP9Comaptible(&outputs[0], 20, 1, false); DecodeNFrames(&outputs[0], 20); FreeBitstreamBuffers(&outputs[0], 20); } -TEST_F(SvcTest, SetMultipleFrameContextOption) { +TEST_F(SvcTest, SetMultipleFrameContextsOption) { svc_.spatial_layers = 5; vpx_codec_err_t res = vpx_svc_set_options(&svc_, "multi-frame-contexts=1"); @@ -622,7 +668,7 @@ TEST_F(SvcTest, SetMultipleFrameContextOption) { InitializeEncoder(); } -TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContext) { +TEST_F(SvcTest, TwoPassEncode2SpatialLayersWithMultipleFrameContexts) { // First pass encode std::string stats_buf; Pass1EncodeNFrames(10, 2, &stats_buf); @@ -634,12 +680,13 @@ TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContext) { vpx_fixed_buf outputs[10]; memset(&outputs[0], 0, sizeof(outputs)); Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); - DropEnhancementLayers(&outputs[0], 10, 2, true); + DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, true); DecodeNFrames(&outputs[0], 10); FreeBitstreamBuffers(&outputs[0], 10); } -TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContextDecodeBaselayer) { +TEST_F(SvcTest, + TwoPassEncode2SpatialLayersWithMultipleFrameContextsDecodeBaselayer) { // First pass encode std::string stats_buf; Pass1EncodeNFrames(10, 2, &stats_buf); @@ -651,12 +698,12 @@ TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContextDecodeBaselayer) { vpx_fixed_buf outputs[10]; memset(&outputs[0], 0, sizeof(outputs)); Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); - DropEnhancementLayers(&outputs[0], 10, 1, true); + DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true); DecodeNFrames(&outputs[0], 10); FreeBitstreamBuffers(&outputs[0], 10); } -TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContext) { +TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContexts) { // First pass encode std::string stats_buf; vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1"); @@ -670,12 +717,13 @@ TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContext) { vpx_fixed_buf outputs[10]; memset(&outputs[0], 0, sizeof(outputs)); Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); - DropEnhancementLayers(&outputs[0], 10, 2, true); + DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, true); DecodeNFrames(&outputs[0], 10); FreeBitstreamBuffers(&outputs[0], 10); } -TEST_F(SvcTest, TwoPassEncode3SNRLayersWithMultipleFrameContextDecode321Layer) { +TEST_F(SvcTest, + TwoPassEncode3SNRLayersWithMultipleFrameContextsDecode321Layer) { // First pass encode std::string stats_buf; vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1"); @@ -697,25 +745,114 @@ TEST_F(SvcTest, TwoPassEncode3SNRLayersWithMultipleFrameContextDecode321Layer) { memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz); outputs_new[i].sz = outputs[i].sz; } - DropEnhancementLayers(&outputs_new[0], 10, 3, true); + DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 3, true); DecodeNFrames(&outputs_new[0], 10); for (int i = 0; i < 10; ++i) { memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz); outputs_new[i].sz = outputs[i].sz; } - DropEnhancementLayers(&outputs_new[0], 10, 2, true); + DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 2, true); DecodeNFrames(&outputs_new[0], 10); for (int i = 0; i < 10; ++i) { memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz); outputs_new[i].sz = outputs[i].sz; } - DropEnhancementLayers(&outputs_new[0], 10, 1, true); + DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 1, true); DecodeNFrames(&outputs_new[0], 10); FreeBitstreamBuffers(&outputs[0], 10); FreeBitstreamBuffers(&outputs_new[0], 10); } +TEST_F(SvcTest, TwoPassEncode2TemporalLayers) { + // First pass encode + std::string stats_buf; + vpx_svc_set_options(&svc_, "scale-factors=1/1"); + svc_.temporal_layers = 2; + Pass1EncodeNFrames(10, 1, &stats_buf); + + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; + svc_.temporal_layers = 2; + vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1"); + vpx_fixed_buf outputs[10]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]); + DecodeNFrames(&outputs[0], 10); + FreeBitstreamBuffers(&outputs[0], 10); +} + +TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithMultipleFrameContexts) { + // First pass encode + std::string stats_buf; + vpx_svc_set_options(&svc_, "scale-factors=1/1"); + svc_.temporal_layers = 2; + Pass1EncodeNFrames(10, 1, &stats_buf); + + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; + svc_.temporal_layers = 2; + codec_enc_.g_error_resilient = 0; + vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1 " + "multi-frame-contexts=1"); + vpx_fixed_buf outputs[10]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]); + DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true); + DecodeNFrames(&outputs[0], 10); + FreeBitstreamBuffers(&outputs[0], 10); +} + +TEST_F(SvcTest, TwoPassEncode2TemporalLayersDecodeBaseLayer) { + // First pass encode + std::string stats_buf; + vpx_svc_set_options(&svc_, "scale-factors=1/1"); + svc_.temporal_layers = 2; + Pass1EncodeNFrames(10, 1, &stats_buf); + + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; + svc_.temporal_layers = 2; + vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1"); + vpx_fixed_buf outputs[10]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]); + + vpx_fixed_buf base_layer[5]; + for (int i = 0; i < 5; ++i) + base_layer[i] = outputs[i * 2]; + + DecodeNFrames(&base_layer[0], 5); + FreeBitstreamBuffers(&outputs[0], 10); +} + +TEST_F(SvcTest, + TwoPassEncode2TemporalLayersWithMultipleFrameContextsDecodeBaseLayer) { + // First pass encode + std::string stats_buf; + vpx_svc_set_options(&svc_, "scale-factors=1/1"); + svc_.temporal_layers = 2; + Pass1EncodeNFrames(10, 1, &stats_buf); + + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; + svc_.temporal_layers = 2; + codec_enc_.g_error_resilient = 0; + vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1 " + "multi-frame-contexts=1"); + vpx_fixed_buf outputs[10]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]); + DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true); + + vpx_fixed_buf base_layer[5]; + for (int i = 0; i < 5; ++i) + base_layer[i] = outputs[i * 2]; + + DecodeNFrames(&base_layer[0], 5); + FreeBitstreamBuffers(&outputs[0], 10); +} + } // namespace diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index a23d4b762..e47b91dda 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -999,8 +999,10 @@ static void write_frame_size_with_refs(VP9_COMP *cpi, // Set "found" to 0 for temporal svc and for spatial svc key frame if (cpi->use_svc && - (cpi->svc.number_spatial_layers == 1 || - cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame)) { + ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.rc_mode == VPX_CBR) || + (cpi->svc.number_spatial_layers > 1 && + cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame))) { found = 0; } vp9_wb_write_bit(wb, found); @@ -1093,7 +1095,7 @@ static void write_uncompressed_header(VP9_COMP *cpi, // show_existing_frame flag which tells the decoder which frame we want to // show. if (!cm->show_frame || - (is_spatial_svc(cpi) && cm->error_resilient_mode == 0)) + (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0)) vp9_wb_write_bit(wb, cm->intra_only); if (!cm->error_resilient_mode) diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h index 8e82d1c97..b48826140 100644 --- a/vp9/encoder/vp9_bitstream.h +++ b/vp9/encoder/vp9_bitstream.h @@ -26,7 +26,7 @@ static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) { return !cpi->multi_arf_allowed && cpi->refresh_golden_frame && cpi->rc.is_src_frame_alt_ref && (!cpi->use_svc || // Add spatial svc base layer case here - (is_spatial_svc(cpi) && + (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id == 0 && cpi->svc.layer_context[0].gold_ref_idx >=0 && cpi->oxcf.ss_play_alternate[0])); diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 8a74aec5f..0026ce8c2 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -128,7 +128,7 @@ static void setup_frame(VP9_COMP *cpi) { } if (cm->frame_type == KEY_FRAME) { - if (!is_spatial_svc(cpi)) + if (!is_two_pass_svc(cpi)) cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 1; vp9_zero(cpi->interp_filter_selected); @@ -525,7 +525,7 @@ static void update_frame_size(VP9_COMP *cpi) { vp9_init_context_buffers(cm); init_macroblockd(cm, xd); - if (is_spatial_svc(cpi)) { + if (is_two_pass_svc(cpi)) { if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, @@ -580,7 +580,9 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { cpi->svc.number_temporal_layers = oxcf->ts_number_layers; if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || - (cpi->svc.number_spatial_layers > 1 && cpi->oxcf.pass == 2)) { + ((cpi->svc.number_temporal_layers > 1 || + cpi->svc.number_spatial_layers > 1) && + cpi->oxcf.pass == 2)) { vp9_init_layer_context(cpi); } @@ -672,7 +674,9 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || - (cpi->svc.number_spatial_layers > 1 && cpi->oxcf.pass == 2)) { + ((cpi->svc.number_temporal_layers > 1 || + cpi->svc.number_spatial_layers > 1) && + cpi->oxcf.pass == 2)) { vp9_update_layer_context_change_config(cpi, (int)cpi->oxcf.target_bandwidth); } @@ -923,7 +927,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); if (cpi->svc.number_spatial_layers > 1 - && cpi->svc.number_temporal_layers == 1) { + || cpi->svc.number_temporal_layers > 1) { FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf; FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = {0}; int i; @@ -1531,7 +1535,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { cpi->alt_fb_idx = cpi->gld_fb_idx; cpi->gld_fb_idx = tmp; - if (is_spatial_svc(cpi)) { + if (is_two_pass_svc(cpi)) { cpi->svc.layer_context[0].gold_ref_idx = cpi->gld_fb_idx; cpi->svc.layer_context[0].alt_ref_idx = cpi->alt_fb_idx; } @@ -1960,8 +1964,7 @@ static int get_ref_frame_flags(const VP9_COMP *cpi) { if (gold_is_last) flags &= ~VP9_GOLD_FLAG; - if (cpi->rc.frames_till_gf_update_due == INT_MAX && - !is_spatial_svc(cpi)) + if (cpi->rc.frames_till_gf_update_due == INT_MAX && !is_two_pass_svc(cpi)) flags &= ~VP9_GOLD_FLAG; if (alt_is_last) @@ -2008,7 +2011,7 @@ static int is_skippable_frame(const VP9_COMP *cpi) { // can be skipped for partition check, and the partition size is assigned // according to the variance const SVC *const svc = &cpi->svc; - const TWO_PASS *const twopass = is_spatial_svc(cpi) ? + const TWO_PASS *const twopass = is_two_pass_svc(cpi) ? &svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass; return (!frame_is_intra_only(&cpi->common) && @@ -2160,18 +2163,34 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->reset_frame_context = 2; } } - if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0) { - cm->frame_context_idx = cpi->svc.spatial_layer_id; + if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) { + cm->frame_context_idx = + cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + + cpi->svc.temporal_layer_id; // The probs will be updated based on the frame type of its previous // frame if frame_parallel_decoding_mode is 0. The type may vary for // the frame after a key frame in base layer since we may drop enhancement // layers. So set frame_parallel_decoding_mode to 1 in this case. - if (cpi->svc.spatial_layer_id == 0 && - cpi->svc.layer_context[0].last_frame_type == KEY_FRAME) - cm->frame_parallel_decoding_mode = 1; - else - cm->frame_parallel_decoding_mode = 0; + if (cpi->svc.number_temporal_layers == 1) { + if (cpi->svc.spatial_layer_id == 0 && + cpi->svc.layer_context[0].last_frame_type == KEY_FRAME) + cm->frame_parallel_decoding_mode = 1; + else + cm->frame_parallel_decoding_mode = 0; + } else if (cpi->svc.spatial_layer_id == 0) { + // Find the 2nd frame in temporal base layer and 1st frame in temporal + // enhancement layers from the key frame. + int i; + for (i = 0; i < cpi->svc.number_temporal_layers; ++i) { + if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) { + cm->frame_parallel_decoding_mode = 1; + break; + } + } + if (i == cpi->svc.number_temporal_layers) + cm->frame_parallel_decoding_mode = 0; + } } // Configure experimental use of segmentation for enhanced coding of @@ -2184,7 +2203,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Check if the current frame is skippable for the partition search in the // second pass according to the first pass stats if (oxcf->pass == 2 && - (!cpi->use_svc || is_spatial_svc(cpi))) { + (!cpi->use_svc || is_two_pass_svc(cpi))) { cpi->skippable_frame = is_skippable_frame(cpi); } @@ -2330,7 +2349,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // reset to normal state now that we are done. if (!cm->show_existing_frame) { - if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0) + if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) cm->last_show_frame = 0; else cm->last_show_frame = cm->show_frame; @@ -2343,10 +2362,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // update not a real frame ++cm->current_video_frame; if (cpi->use_svc) - vp9_inc_frame_in_layer(&cpi->svc); + vp9_inc_frame_in_layer(cpi); } - if (is_spatial_svc(cpi)) + if (is_two_pass_svc(cpi)) cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type = cm->frame_type; } @@ -2421,7 +2440,7 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, vpx_usec_timer_start(&timer); #if CONFIG_SPATIAL_SVC - if (is_spatial_svc(cpi)) + if (is_two_pass_svc(cpi)) res = vp9_svc_lookahead_push(cpi, cpi->lookahead, sd, time_stamp, end_time, frame_flags); else @@ -2557,7 +2576,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, MV_REFERENCE_FRAME ref_frame; int arf_src_index; - if (is_spatial_svc(cpi) && oxcf->pass == 2) { + if (is_two_pass_svc(cpi) && oxcf->pass == 2) { #if CONFIG_SPATIAL_SVC vp9_svc_lookahead_peek(cpi, cpi->lookahead, 0, 1); #endif @@ -2581,7 +2600,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, assert(arf_src_index <= rc->frames_to_key); #if CONFIG_SPATIAL_SVC - if (is_spatial_svc(cpi)) + if (is_two_pass_svc(cpi)) source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, arf_src_index, 0); else #endif @@ -2590,7 +2609,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->alt_ref_source = source; #if CONFIG_SPATIAL_SVC - if (is_spatial_svc(cpi) && cpi->svc.spatial_layer_id > 0) { + if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0) { int i; // Reference a hidden frame from a lower layer for (i = cpi->svc.spatial_layer_id - 1; i >= 0; --i) { @@ -2625,7 +2644,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // Get last frame source. if (cm->current_video_frame > 0) { #if CONFIG_SPATIAL_SVC - if (is_spatial_svc(cpi)) + if (is_two_pass_svc(cpi)) last_source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, -1, 0); else #endif @@ -2636,7 +2655,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // Read in the source frame. #if CONFIG_SPATIAL_SVC - if (is_spatial_svc(cpi)) + if (is_two_pass_svc(cpi)) source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush); else #endif @@ -2750,13 +2769,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } if (oxcf->pass == 1 && - (!cpi->use_svc || is_spatial_svc(cpi))) { + (!cpi->use_svc || is_two_pass_svc(cpi))) { const int lossless = is_lossless_requested(oxcf); cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4; cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; vp9_first_pass(cpi, source); } else if (oxcf->pass == 2 && - (!cpi->use_svc || is_spatial_svc(cpi))) { + (!cpi->use_svc || is_two_pass_svc(cpi))) { Pass2Encode(cpi, size, dest, frame_flags); } else if (cpi->use_svc) { SvcEncode(cpi, size, dest, frame_flags); @@ -2779,8 +2798,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // Save layer specific state. if ((cpi->svc.number_temporal_layers > 1 && - oxcf->rc_mode == VPX_CBR) || - (cpi->svc.number_spatial_layers > 1 && oxcf->pass == 2)) { + oxcf->rc_mode == VPX_CBR) || + ((cpi->svc.number_temporal_layers > 1 || + cpi->svc.number_spatial_layers > 1) && + oxcf->pass == 2)) { vp9_save_layer_context(cpi); } diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 9730831de..0d3c4c19a 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -495,16 +495,17 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); -static INLINE int is_spatial_svc(const struct VP9_COMP *const cpi) { +static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) { return cpi->use_svc && - cpi->svc.number_temporal_layers == 1 && - cpi->svc.number_spatial_layers > 1; + (cpi->svc.number_temporal_layers > 1 || + cpi->svc.number_spatial_layers > 1) && + (cpi->oxcf.pass == 1 || cpi->oxcf.pass == 2); } static INLINE int is_altref_enabled(const VP9_COMP *const cpi) { return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 && (cpi->oxcf.play_alternate && - (!is_spatial_svc(cpi) || + (!is_two_pass_svc(cpi) || cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id])); } diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index c2c2d284c..8041b59cf 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -246,7 +246,7 @@ void vp9_init_first_pass(VP9_COMP *cpi) { } void vp9_end_first_pass(VP9_COMP *cpi) { - if (is_spatial_svc(cpi)) { + if (is_two_pass_svc(cpi)) { int i; for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { output_stats(&cpi->svc.layer_context[i].twopass.total_stats, @@ -422,8 +422,8 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { TWO_PASS *twopass = &cpi->twopass; const MV zero_mv = {0, 0}; const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; - LAYER_CONTEXT *const lc = is_spatial_svc(cpi) ? - &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0; + LAYER_CONTEXT *const lc = is_two_pass_svc(cpi) ? + &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : NULL; #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { @@ -438,13 +438,13 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { if (lc != NULL) { MV_REFERENCE_FRAME ref_frame = LAST_FRAME; - const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL; twopass = &lc->twopass; if (cpi->common.current_video_frame == 0) { cpi->ref_frame_flags = 0; } else { - if (lc->current_video_frame_in_layer == 0) + if (lc->current_video_frame_in_layer < + (unsigned int)cpi->svc.number_temporal_layers) cpi->ref_frame_flags = VP9_GOLD_FLAG; else cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; @@ -454,16 +454,17 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { // Use either last frame or alt frame for motion search. if (cpi->ref_frame_flags & VP9_LAST_FLAG) { - scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME); + first_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME); ref_frame = LAST_FRAME; + if (first_ref_buf == NULL) + first_ref_buf = get_ref_frame_buffer(cpi, LAST_FRAME); } else if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME); + first_ref_buf = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME); ref_frame = GOLDEN_FRAME; + if (first_ref_buf == NULL) + first_ref_buf = get_ref_frame_buffer(cpi, GOLDEN_FRAME); } - if (scaled_ref_buf != NULL) - first_ref_buf = scaled_ref_buf; - recon_y_stride = new_yv12->y_stride; recon_uv_stride = new_yv12->uv_stride; uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height); @@ -914,7 +915,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { ++cm->current_video_frame; if (cpi->use_svc) - vp9_inc_frame_in_layer(&cpi->svc); + vp9_inc_frame_in_layer(cpi); } static double calc_correction_factor(double err_per_mb, @@ -952,7 +953,7 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi, BPER_MB_NORMBITS) / num_mbs; int q; int is_svc_upper_layer = 0; - if (is_spatial_svc(cpi) && cpi->svc.spatial_layer_id > 0) + if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0) is_svc_upper_layer = 1; // Try and pick a max Q that will be high enough to encode the @@ -980,9 +981,9 @@ extern void vp9_new_framerate(VP9_COMP *cpi, double framerate); void vp9_init_second_pass(VP9_COMP *cpi) { SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; - const int is_spatial_svc = (svc->number_spatial_layers > 1) && - (svc->number_temporal_layers == 1); - TWO_PASS *const twopass = is_spatial_svc ? + const int is_two_pass_svc = (svc->number_spatial_layers > 1) || + (svc->number_temporal_layers > 1); + TWO_PASS *const twopass = is_two_pass_svc ? &svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass; double frame_rate; FIRSTPASS_STATS *stats; @@ -1005,7 +1006,7 @@ void vp9_init_second_pass(VP9_COMP *cpi) { // It is calculated based on the actual durations of all frames from the // first pass. - if (is_spatial_svc) { + if (is_two_pass_svc) { vp9_update_spatial_layer_framerate(cpi, frame_rate); twopass->bits_left = (int64_t)(stats->duration * svc->layer_context[svc->spatial_layer_id].target_bandwidth / @@ -1020,7 +1021,7 @@ void vp9_init_second_pass(VP9_COMP *cpi) { // scores used in the second pass. We have this minimum to make sure // that clips that are static but "low complexity" in the intra domain // are still boosted appropriately for KF/GF/ARF. - if (!is_spatial_svc) { + if (!is_two_pass_svc) { // We don't know the number of MBs for each layer at this point. // So we will do it later. twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; @@ -1368,6 +1369,13 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, int mid_boost_bits = 0; int mid_frame_idx; unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS]; + int alt_frame_index = frame_index; + int has_temporal_layers = is_two_pass_svc(cpi) && + cpi->svc.number_temporal_layers > 1; + + // Only encode alt reference frame in temporal base layer. + if (has_temporal_layers) + alt_frame_index = cpi->svc.number_temporal_layers; key_frame = cpi->common.frame_type == KEY_FRAME || vp9_is_upper_layer_key_frame(cpi); @@ -1403,16 +1411,24 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, // Store the bits to spend on the ARF if there is one. if (rc->source_alt_ref_pending) { - gf_group->update_type[frame_index] = ARF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_STD; - gf_group->bit_allocation[frame_index] = gf_arf_bits; - gf_group->arf_src_offset[frame_index] = - (unsigned char)(rc->baseline_gf_interval - 1); - gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0]; - gf_group->arf_ref_idx[frame_index] = + gf_group->update_type[alt_frame_index] = ARF_UPDATE; + gf_group->rf_level[alt_frame_index] = GF_ARF_STD; + gf_group->bit_allocation[alt_frame_index] = gf_arf_bits; + + if (has_temporal_layers) + gf_group->arf_src_offset[alt_frame_index] = + (unsigned char)(rc->baseline_gf_interval - + cpi->svc.number_temporal_layers); + else + gf_group->arf_src_offset[alt_frame_index] = + (unsigned char)(rc->baseline_gf_interval - 1); + + gf_group->arf_update_idx[alt_frame_index] = arf_buffer_indices[0]; + gf_group->arf_ref_idx[alt_frame_index] = arf_buffer_indices[cpi->multi_arf_last_grp_enabled && rc->source_alt_ref_active]; - ++frame_index; + if (!has_temporal_layers) + ++frame_index; if (cpi->multi_arf_enabled) { // Set aside a slot for a level 1 arf. @@ -1435,6 +1451,10 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, if (EOF == input_stats(twopass, &frame_stats)) break; + if (has_temporal_layers && frame_index == alt_frame_index) { + ++frame_index; + } + modified_err = calculate_modified_err(twopass, oxcf, &frame_stats); if (group_error > 0) @@ -1656,6 +1676,21 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { else rc->baseline_gf_interval = i; + // Only encode alt reference frame in temporal base layer. So + // baseline_gf_interval should be multiple of a temporal layer group + // (typically the frame distance between two base layer frames) + if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) { + int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1; + int new_gf_interval = (rc->baseline_gf_interval + count) & (~count); + int j; + for (j = 0; j < new_gf_interval - rc->baseline_gf_interval; ++j) { + if (EOF == input_stats(twopass, this_frame)) + break; + gf_group_err += calculate_modified_err(twopass, oxcf, this_frame); + } + rc->baseline_gf_interval = new_gf_interval; + } + rc->frames_till_gf_update_due = rc->baseline_gf_interval; // Should we use the alternate reference frame. @@ -1928,6 +1963,18 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { rc->next_key_frame_forced = 0; } + if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) { + int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1; + int new_frame_to_key = (rc->frames_to_key + count) & (~count); + int j; + for (j = 0; j < new_frame_to_key - rc->frames_to_key; ++j) { + if (EOF == input_stats(twopass, this_frame)) + break; + kf_group_err += calculate_modified_err(twopass, oxcf, this_frame); + } + rc->frames_to_key = new_frame_to_key; + } + // Special case for the last key frame of the file. if (twopass->stats_in >= twopass->stats_in_end) { // Accumulate kf group error. @@ -2086,7 +2133,7 @@ void configure_buffer_updates(VP9_COMP *cpi) { assert(0); break; } - if (is_spatial_svc(cpi)) { + if (is_two_pass_svc(cpi)) { if (cpi->svc.layer_context[cpi->svc.spatial_layer_id].gold_ref_idx < 0) cpi->refresh_golden_frame = 0; if (cpi->alt_ref_source == NULL) @@ -2105,7 +2152,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { FIRSTPASS_STATS this_frame_copy; int target_rate; - LAYER_CONTEXT *const lc = is_spatial_svc(cpi) ? + LAYER_CONTEXT *const lc = is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0; if (lc != NULL) { @@ -2188,15 +2235,18 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { if (lc != NULL) { if (cpi->svc.spatial_layer_id == 0) { lc->is_key_frame = (cm->frame_type == KEY_FRAME); - if (lc->is_key_frame) + if (lc->is_key_frame) { cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); + lc->frames_from_key_frame = 0; + } } else { cm->frame_type = INTER_FRAME; lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame; if (lc->is_key_frame) { cpi->ref_frame_flags &= (~VP9_LAST_FLAG); + lc->frames_from_key_frame = 0; } } } diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index b926a58f4..b607c8559 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -1235,7 +1235,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { cm->frame_type = KEY_FRAME; rc->source_alt_ref_active = 0; - if (is_spatial_svc(cpi)) { + if (is_two_pass_svc(cpi)) { cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1; cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); @@ -1247,7 +1247,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { } else { cm->frame_type = INTER_FRAME; - if (is_spatial_svc(cpi)) { + if (is_two_pass_svc(cpi)) { LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; if (cpi->svc.spatial_layer_id == 0) { lc->is_key_frame = 0; diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 9bd9792bd..7545d87b3 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -19,12 +19,12 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; int layer; int layer_end; - int alt_ref_idx = svc->number_spatial_layers; + int alt_ref_idx = svc->number_spatial_layers * svc->number_temporal_layers; svc->spatial_layer_id = 0; svc->temporal_layer_id = 0; - if (svc->number_temporal_layers > 1) { + if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { layer_end = svc->number_temporal_layers; } else { layer_end = svc->number_spatial_layers; @@ -36,6 +36,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { int i; lc->current_video_frame_in_layer = 0; lc->layer_size = 0; + lc->frames_from_key_frame = 0; lc->last_frame_type = FRAME_TYPES; lrc->ni_av_qi = oxcf->worst_allowed_q; lrc->total_actual_bits = 0; @@ -51,7 +52,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { lrc->rate_correction_factors[i] = 1.0; } - if (svc->number_temporal_layers > 1) { + if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; @@ -76,7 +77,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { } // Still have extra buffer for base layer golden frame - if (svc->number_spatial_layers > 1 && alt_ref_idx < REF_FRAMES) + if (!(svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) + && alt_ref_idx < REF_FRAMES) svc->layer_context[0].gold_ref_idx = alt_ref_idx; } @@ -90,7 +92,7 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, int layer_end; float bitrate_alloc = 1.0; - if (svc->number_temporal_layers > 1) { + if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { layer_end = svc->number_temporal_layers; } else { layer_end = svc->number_spatial_layers; @@ -100,7 +102,7 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; - if (svc->number_temporal_layers > 1) { + if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; } else { lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; @@ -116,7 +118,7 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size); lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); // Update framerate-related quantities. - if (svc->number_temporal_layers > 1) { + if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; } else { lc->framerate = cpi->framerate; @@ -129,16 +131,16 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, } } -static LAYER_CONTEXT *get_layer_context(SVC *svc) { - return svc->number_temporal_layers > 1 ? - &svc->layer_context[svc->temporal_layer_id] : - &svc->layer_context[svc->spatial_layer_id]; +static LAYER_CONTEXT *get_layer_context(VP9_COMP *const cpi) { + return (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ? + &cpi->svc.layer_context[cpi->svc.temporal_layer_id] : + &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; } void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; - LAYER_CONTEXT *const lc = get_layer_context(svc); + LAYER_CONTEXT *const lc = get_layer_context(cpi); RATE_CONTROL *const lrc = &lc->rc; const int layer = svc->temporal_layer_id; @@ -160,7 +162,7 @@ void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; - LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + LAYER_CONTEXT *const lc = get_layer_context(cpi); RATE_CONTROL *const lrc = &lc->rc; lc->framerate = framerate; @@ -173,7 +175,7 @@ void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { } void vp9_restore_layer_context(VP9_COMP *const cpi) { - LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + LAYER_CONTEXT *const lc = get_layer_context(cpi); const int old_frame_since_key = cpi->rc.frames_since_key; const int old_frame_to_key = cpi->rc.frames_to_key; @@ -191,7 +193,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { void vp9_save_layer_context(VP9_COMP *const cpi) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; - LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + LAYER_CONTEXT *const lc = get_layer_context(cpi); lc->rc = cpi->rc; lc->twopass = cpi->twopass; @@ -215,15 +217,17 @@ void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { svc->spatial_layer_id = 0; } -void vp9_inc_frame_in_layer(SVC *svc) { - LAYER_CONTEXT *const lc = (svc->number_temporal_layers > 1) - ? &svc->layer_context[svc->temporal_layer_id] - : &svc->layer_context[svc->spatial_layer_id]; +void vp9_inc_frame_in_layer(VP9_COMP *const cpi) { + LAYER_CONTEXT *const lc = + (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ? + &cpi->svc.layer_context[cpi->svc.temporal_layer_id] : + &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; ++lc->current_video_frame_in_layer; + ++lc->frames_from_key_frame; } int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) { - return is_spatial_svc(cpi) && + return is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0 && cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame; } @@ -258,6 +262,7 @@ static int copy_svc_params(VP9_COMP *const cpi, struct lookahead_entry *buf) { int layer_id; vpx_svc_parameters_t *layer_param; LAYER_CONTEXT *lc; + int count = 1 << (cpi->svc.number_temporal_layers - 1); // Find the next layer to be encoded for (layer_id = 0; layer_id < cpi->svc.number_spatial_layers; ++layer_id) { @@ -275,17 +280,36 @@ static int copy_svc_params(VP9_COMP *const cpi, struct lookahead_entry *buf) { lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; - cpi->lst_fb_idx = cpi->svc.spatial_layer_id; + cpi->svc.temporal_layer_id = 0; + while ((lc->current_video_frame_in_layer % count) != 0) { + ++cpi->svc.temporal_layer_id; + count >>= 1; + } - if (cpi->svc.spatial_layer_id < 1) + cpi->lst_fb_idx = + cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + + cpi->svc.temporal_layer_id; + if (lc->frames_from_key_frame < cpi->svc.number_temporal_layers) + cpi->ref_frame_flags &= ~VP9_LAST_FLAG; + + if (cpi->svc.spatial_layer_id == 0) { + if (cpi->svc.temporal_layer_id == 0) cpi->gld_fb_idx = lc->gold_ref_idx >= 0 ? lc->gold_ref_idx : cpi->lst_fb_idx; - else - cpi->gld_fb_idx = cpi->svc.spatial_layer_id - 1; + else + cpi->gld_fb_idx = cpi->lst_fb_idx - 1; + } else { + if (cpi->svc.temporal_layer_id == 0) + cpi->gld_fb_idx = cpi->svc.spatial_layer_id - + cpi->svc.number_temporal_layers; + else + cpi->gld_fb_idx = cpi->lst_fb_idx - 1; + } if (lc->current_video_frame_in_layer == 0) { if (cpi->svc.spatial_layer_id >= 2) { - cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2; + cpi->alt_fb_idx = + cpi->svc.spatial_layer_id - 2 * cpi->svc.number_temporal_layers; } else { cpi->alt_fb_idx = cpi->lst_fb_idx; cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_ALT_FLAG); @@ -307,7 +331,8 @@ static int copy_svc_params(VP9_COMP *const cpi, struct lookahead_entry *buf) { lc_lower->alt_ref_source != NULL) cpi->alt_fb_idx = lc_lower->alt_ref_idx; else if (cpi->svc.spatial_layer_id >= 2) - cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2; + cpi->alt_fb_idx = + cpi->svc.spatial_layer_id - 2 * cpi->svc.number_temporal_layers; else cpi->alt_fb_idx = cpi->lst_fb_idx; } @@ -326,7 +351,7 @@ static int copy_svc_params(VP9_COMP *const cpi, struct lookahead_entry *buf) { vp9_set_high_precision_mv(cpi, 1); - cpi->alt_ref_source = get_layer_context(&cpi->svc)->alt_ref_source; + cpi->alt_ref_source = get_layer_context(cpi)->alt_ref_source; return 0; } diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index d475d5fcd..1fc43a427 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -28,6 +28,7 @@ typedef struct { vpx_fixed_buf_t rc_twopass_stats_in; unsigned int current_video_frame_in_layer; int is_key_frame; + int frames_from_key_frame; FRAME_TYPE last_frame_type; vpx_svc_parameters_t svc_params_received; struct lookahead_entry *alt_ref_source; @@ -81,7 +82,7 @@ void vp9_save_layer_context(struct VP9_COMP *const cpi); void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi); // Increment number of video frames in layer -void vp9_inc_frame_in_layer(SVC *svc); +void vp9_inc_frame_in_layer(struct VP9_COMP *const cpi); // Check if current layer is key frame in spatial upper layer int vp9_is_upper_layer_key_frame(const struct VP9_COMP *const cpi); diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index cc901b573..18a6a91d8 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -450,7 +450,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) { } // Setup scaling factors. Scaling on each of the arnr frames is not supported - if (is_spatial_svc(cpi)) { + if (is_two_pass_svc(cpi)) { // In spatial svc the scaling factors might be less then 1/2. So we will use // non-normative scaling. int frame_used = 0; diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index f49eb5803..31a0cd51d 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -163,22 +163,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, } RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS); - -#if CONFIG_SPATIAL_SVC - if (cfg->ss_number_layers > 1) { - unsigned int i, alt_ref_sum = 0; - for (i = 0; i < cfg->ss_number_layers; ++i) { - if (cfg->ss_enable_auto_alt_ref[i]) - ++alt_ref_sum; - } - if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers) - ERROR("Not enough ref buffers for svc alt ref frames"); - } - if (cfg->ss_number_layers > 3 && cfg->g_error_resilient == 0) - ERROR("Multiple frame contexts are not supported for more than 3 layers"); -#endif - RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS); + if (cfg->ts_number_layers > 1) { unsigned int i; for (i = 1; i < cfg->ts_number_layers; ++i) @@ -191,6 +177,28 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, ERROR("ts_rate_decimator factors are not powers of 2"); } +#if CONFIG_SPATIAL_SVC + if (cfg->ss_number_layers * cfg->ts_number_layers > REF_FRAMES) + ERROR("Too many layers. Maximum 8 layers could be set"); + + if ((cfg->ss_number_layers > 1 || cfg->ts_number_layers > 1) && + cfg->g_pass == VPX_RC_LAST_PASS) { + unsigned int i, alt_ref_sum = 0; + for (i = 0; i < cfg->ss_number_layers; ++i) { + if (cfg->ss_enable_auto_alt_ref[i]) + ++alt_ref_sum; + } + if (alt_ref_sum > + REF_FRAMES - cfg->ss_number_layers * cfg->ts_number_layers) + ERROR("Not enough ref buffers for svc alt ref frames"); + if ((cfg->ss_number_layers > 3 || + cfg->ss_number_layers * cfg->ts_number_layers > 4) && + cfg->g_error_resilient == 0) + ERROR("Multiple frame context are not supported for more than 3 spatial " + "layers or more than 4 spatial x temporal layers"); + } +#endif + // VP9 does not support a lower bound on the keyframe interval in // automatic keyframe placement mode. if (cfg->kf_mode != VPX_KF_DISABLED && @@ -228,7 +236,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, if (cfg->rc_twopass_stats_in.sz % packet_sz) ERROR("rc_twopass_stats_in.sz indicates truncated packet."); - if (cfg->ss_number_layers > 1) { + if (cfg->ss_number_layers > 1 || cfg->ts_number_layers > 1) { int i; unsigned int n_packets_per_layer[VPX_SS_MAX_LAYERS] = {0}; @@ -423,6 +431,9 @@ static vpx_codec_err_t set_encoder_config( } } else if (oxcf->ss_number_layers == 1) { oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth; +#if CONFIG_SPATIAL_SVC + oxcf->ss_play_alternate[0] = extra_cfg->enable_auto_alt_ref; +#endif } oxcf->ts_number_layers = cfg->ts_number_layers; @@ -809,7 +820,7 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi, if (lib_flags & FRAMEFLAGS_KEY #if CONFIG_SPATIAL_SVC - || (is_spatial_svc(cpi) && cpi->svc.layer_context[0].is_key_frame) + || (is_two_pass_svc(cpi) && cpi->svc.layer_context[0].is_key_frame) #endif ) flags |= VPX_FRAME_IS_KEY; @@ -923,14 +934,14 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, vpx_codec_cx_pkt_t pkt; #if CONFIG_SPATIAL_SVC - if (is_spatial_svc(cpi)) + if (is_two_pass_svc(cpi)) cpi->svc.layer_context[cpi->svc.spatial_layer_id].layer_size += size; #endif // Pack invisible frames with the next visible frame if (!cpi->common.show_frame #if CONFIG_SPATIAL_SVC - || (is_spatial_svc(cpi) && + || (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1) #endif ) { @@ -972,7 +983,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, cx_data += size; cx_data_sz -= size; #if CONFIG_SPATIAL_SVC - if (is_spatial_svc(cpi)) { + if (is_two_pass_svc(cpi)) { vpx_codec_cx_pkt_t pkt; int i; vp9_zero(pkt); diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c index 45b0dca5c..8911e8304 100644 --- a/vpx/src/svc_encodeframe.c +++ b/vpx/src/svc_encodeframe.c @@ -384,8 +384,10 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { res = VPX_CODEC_INVALID_PARAM; break; } - if (strcmp("layers", option_name) == 0) { + if (strcmp("spatial-layers", option_name) == 0) { svc_ctx->spatial_layers = atoi(option_value); + } else if (strcmp("temporal-layers", option_name) == 0) { + svc_ctx->temporal_layers = atoi(option_value); } else if (strcmp("scale-factors", option_name) == 0) { res = parse_scale_factors(svc_ctx, option_value); if (res != VPX_CODEC_OK) break; @@ -406,7 +408,9 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { } free(input_string); - if (si->use_multiple_frame_contexts && svc_ctx->spatial_layers > 3) + if (si->use_multiple_frame_contexts && + (svc_ctx->spatial_layers > 3 || + svc_ctx->spatial_layers * svc_ctx->temporal_layers > 4)) res = VPX_CODEC_INVALID_PARAM; return res; @@ -488,6 +492,16 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, res = parse_options(svc_ctx, si->options); if (res != VPX_CODEC_OK) return res; + if (svc_ctx->spatial_layers < 1) + svc_ctx->spatial_layers = 1; + if (svc_ctx->spatial_layers > VPX_SS_MAX_LAYERS) + svc_ctx->spatial_layers = VPX_SS_MAX_LAYERS; + + if (svc_ctx->temporal_layers < 1) + svc_ctx->temporal_layers = 1; + if (svc_ctx->temporal_layers > VPX_TS_MAX_LAYERS) + svc_ctx->temporal_layers = VPX_TS_MAX_LAYERS; + si->layers = svc_ctx->spatial_layers; // Assign target bitrate for each layer. We calculate the ratio @@ -523,9 +537,18 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, enc_cfg->ss_enable_auto_alt_ref[i] = si->enable_auto_alt_ref[i]; #endif + if (svc_ctx->temporal_layers > 1) { + int i; + for (i = 0; i < svc_ctx->temporal_layers; ++i) { + enc_cfg->ts_target_bitrate[i] = enc_cfg->rc_target_bitrate / + svc_ctx->temporal_layers; + enc_cfg->ts_rate_decimator[i] = 1 << (svc_ctx->temporal_layers - 1 - i); + } + } + // modify encoder configuration enc_cfg->ss_number_layers = si->layers; - enc_cfg->ts_number_layers = 1; // Temporal layers not used in this encoder. + enc_cfg->ts_number_layers = svc_ctx->temporal_layers; // TODO(ivanmaltz): determine if these values need to be set explicitly for // svc, or if the normal default/override mechanism can be used diff --git a/vpx/svc_context.h b/vpx/svc_context.h index e0de2630a..eea3b131a 100644 --- a/vpx/svc_context.h +++ b/vpx/svc_context.h @@ -31,7 +31,8 @@ typedef enum SVC_LOG_LEVEL { typedef struct { // public interface to svc_command options - int spatial_layers; // number of layers + int spatial_layers; // number of spatial layers + int temporal_layers; // number of temporal layers SVC_LOG_LEVEL log_level; // amount of information to display int log_print; // when set, printf log messages instead of returning the // message with svc_get_message