vp9-svc: Frame dropper for SVC.

If a given spatial layer decides to drop, due to the buffer/overshoot conditions for that layer, then drop that current spatial layer and all spatial layers above. In the current implementation the svc frame counter (and hence the pattern for the non-flexible SVC case) are updated on frame drops. Also add last spatial layer encoded to the pkt. This is useful for RTC applications that enable frame dropping for SVC. Update to the SVC datarate tests: enabled frame dropper on all SVC datarate tests, and made a fix to properly set the temporal_layer_id, which works now even on frame drops. Change-Id: If828c193f3cb6b1839803fd52fe9fbbda5b5a039
2018-03-14 10:03:31 -07:00 · 2018-03-14 10:03:31 -07:00 · 2640f25072
commit 2640f25072
parent d07a5bfbf8
10 changed files with 76 additions and 38 deletions
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@ -1366,6 +1366,8 @@ class DatarateOnePassCbrSvc
    last_pts_ref_ = 0;
    middle_bitrate_ = 0;
    top_bitrate_ = 0;
+    superframe_count_ = -1;
+    key_frame_spacing_ = 9999;
  }
  virtual void BeginPassHook(unsigned int /*pass*/) {}

@ -1450,6 +1452,17 @@ class DatarateOnePassCbrSvc
      encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_);
    }

+    superframe_count_++;
+    temporal_layer_id_ = 0;
+    if (number_temporal_layers_ == 2)
+      temporal_layer_id_ = (superframe_count_ % 2 != 0);
+    else if (number_temporal_layers_ == 3) {
+      if (superframe_count_ % 2 != 0) temporal_layer_id_ = 2;
+      if (superframe_count_ > 1) {
+        if ((superframe_count_ - 2) % 4 == 0) temporal_layer_id_ = 1;
+      }
+    }
+
    if (update_pattern_ && video->frame() >= 100) {
      vpx_svc_layer_id_t layer_id;
      if (video->frame() == 100) {
@ -1459,6 +1472,7 @@ class DatarateOnePassCbrSvc
      // Set layer id since the pattern changed.
      layer_id.spatial_layer_id = 0;
      layer_id.temporal_layer_id = (video->frame() % 2 != 0);
+      temporal_layer_id_ = layer_id.temporal_layer_id;
      encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
      set_frame_flags_bypass_mode(layer_id.temporal_layer_id,
                                  number_spatial_layers_, 0, &ref_frame_config);
@ -1528,14 +1542,7 @@ class DatarateOnePassCbrSvc
    duration_ = 0;
  }

-  virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {
-    vpx_svc_layer_id_t layer_id;
-    encoder->Control(VP9E_GET_SVC_LAYER_ID, &layer_id);
-    spatial_layer_id_ = layer_id.spatial_layer_id;
-    temporal_layer_id_ = layer_id.temporal_layer_id;
-    // Update buffer with per-layer target frame bandwidth, this is done
-    // for every frame passed to the encoder (encoded or dropped).
-    // For temporal layers, update the cumulative buffer level.
+  virtual void PostEncodeFrameHook() {
    for (int sl = 0; sl < number_spatial_layers_; ++sl) {
      for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) {
        const int layer = sl * number_temporal_layers_ + tl;
@ -1585,9 +1592,14 @@ class DatarateOnePassCbrSvc
    last_pts_ = pkt->data.frame.pts;
    const bool key_frame =
        (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
+    if (key_frame) {
+      temporal_layer_id_ = 0;
+      superframe_count_ = 0;
+    }
    parse_superframe_index(static_cast<const uint8_t *>(pkt->data.frame.buf),
                           pkt->data.frame.sz, sizes, &count);
-    if (!dynamic_drop_layer_) ASSERT_EQ(count, number_spatial_layers_);
+    // Count may be less than number of spatial layers because of frame drops.
+    ASSERT_LE(count, number_spatial_layers_);
    for (int sl = 0; sl < number_spatial_layers_; ++sl) {
      sizes[sl] = sizes[sl] << 3;
      // Update the total encoded bits per layer.
@ -1599,7 +1611,8 @@ class DatarateOnePassCbrSvc
        bits_in_buffer_model_[layer] -= static_cast<int64_t>(sizes[sl]);
        // There should be no buffer underrun, except on the base
        // temporal layer, since there may be key frames there.
-        if (!key_frame && tl > 0) {
+        // Fo short key frame spacing, buffer can underrun on individual frames.
+        if (!key_frame && tl > 0 && key_frame_spacing_ < 100) {
          ASSERT_GE(bits_in_buffer_model_[layer], 0)
              << "Buffer Underrun at frame " << pkt->data.frame.pts;
        }
@ -1663,6 +1676,8 @@ class DatarateOnePassCbrSvc
  vpx_codec_pts_t last_pts_ref_;
  int middle_bitrate_;
  int top_bitrate_;
+  int superframe_count_;
+  int key_frame_spacing_;
 };

 // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1
@ -1728,7 +1743,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL) {
  svc_params_.scaling_factor_den[0] = 288;
  svc_params_.scaling_factor_num[1] = 288;
  svc_params_.scaling_factor_den[1] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
  cfg_.kf_max_dist = 9999;
  number_spatial_layers_ = cfg_.ss_number_layers;
  number_temporal_layers_ = cfg_.ts_number_layers;
@ -1780,7 +1795,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLDenoiserOn) {
  svc_params_.scaling_factor_den[0] = 288;
  svc_params_.scaling_factor_num[1] = 288;
  svc_params_.scaling_factor_den[1] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
  cfg_.kf_max_dist = 9999;
  number_spatial_layers_ = cfg_.ss_number_layers;
  number_temporal_layers_ = cfg_.ts_number_layers;
@ -1850,6 +1865,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLSmallKf) {
  // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
  for (int j = 64; j <= 67; j++) {
    cfg_.kf_max_dist = j;
+    key_frame_spacing_ = j;
    ResetModel();
    assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
                          cfg_.ts_number_layers, cfg_.temporal_layering_mode,
@ -1883,7 +1899,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL4Threads) {
  svc_params_.scaling_factor_den[0] = 288;
  svc_params_.scaling_factor_num[1] = 288;
  svc_params_.scaling_factor_den[1] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
  cfg_.kf_max_dist = 9999;
  number_spatial_layers_ = cfg_.ss_number_layers;
  number_temporal_layers_ = cfg_.ts_number_layers;
@ -1931,7 +1947,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL) {
  svc_params_.scaling_factor_den[1] = 288;
  svc_params_.scaling_factor_num[2] = 288;
  svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
  cfg_.kf_max_dist = 9999;
  number_spatial_layers_ = cfg_.ss_number_layers;
  number_temporal_layers_ = cfg_.ts_number_layers;
@ -1980,7 +1996,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLDynamicBitrateChange) {
  svc_params_.scaling_factor_den[1] = 288;
  svc_params_.scaling_factor_num[2] = 288;
  svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
  cfg_.kf_max_dist = 9999;
  number_spatial_layers_ = cfg_.ss_number_layers;
  number_temporal_layers_ = cfg_.ts_number_layers;
@ -2031,7 +2047,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL2TLDynamicPatternChange) {
  svc_params_.scaling_factor_den[1] = 288;
  svc_params_.scaling_factor_num[2] = 288;
  svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
  cfg_.kf_max_dist = 9999;
  number_spatial_layers_ = cfg_.ss_number_layers;
  number_temporal_layers_ = cfg_.ts_number_layers;
@ -2062,7 +2078,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL2TLDynamicPatternChange) {
 // the fly switching to 1 and then 2 and back to 3 spatial layers. This switch
 // is done by setting spatial layer bitrates to 0, and then back to non-zero,
 // during the sequence.
-TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL_dynamic) {
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL_DisableEnableLayers) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
@ -2082,7 +2098,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL_dynamic) {
  svc_params_.scaling_factor_den[1] = 288;
  svc_params_.scaling_factor_num[2] = 288;
  svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
  cfg_.kf_max_dist = 9999;
  number_spatial_layers_ = cfg_.ss_number_layers;
  number_temporal_layers_ = cfg_.ts_number_layers;
@ -2139,6 +2155,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) {
  // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
  for (int j = 32; j <= 35; j++) {
    cfg_.kf_max_dist = j;
+    key_frame_spacing_ = j;
    ResetModel();
    assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
                          cfg_.ts_number_layers, cfg_.temporal_layering_mode,
@ -2174,7 +2191,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4threads) {
  svc_params_.scaling_factor_den[1] = 288;
  svc_params_.scaling_factor_num[2] = 288;
  svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_dropframe_thresh = 30;
  cfg_.kf_max_dist = 9999;
  number_spatial_layers_ = cfg_.ss_number_layers;
  number_temporal_layers_ = cfg_.ts_number_layers;
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@ -201,7 +201,7 @@ void EncoderTest::RunLoop(VideoSource *video) {
      PreEncodeFrameHook(video, encoder.get());
      encoder->EncodeFrame(video, frame_flags_);

-      PostEncodeFrameHook(encoder.get());
+      PostEncodeFrameHook();

      CxDataIterator iter = encoder->GetCxData();

--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@ -221,7 +221,7 @@ class EncoderTest {
  virtual void PreEncodeFrameHook(VideoSource * /*video*/,
                                  Encoder * /*encoder*/) {}

-  virtual void PostEncodeFrameHook(Encoder * /*encoder*/) {}
+  virtual void PostEncodeFrameHook() {}

  // Hook to be called on every compressed data packet.
  virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@ -917,6 +917,7 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
        pkt.data.frame.flags = lib_flags << 16;
        pkt.data.frame.width[0] = cpi->common.Width;
        pkt.data.frame.height[0] = cpi->common.Height;
+        pkt.data.frame.last_spatial_layer_encoded = 0;

        if (lib_flags & FRAMEFLAGS_KEY) {
          pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@ -4499,7 +4499,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,

  // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
  if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
-      !cpi->svc.rc_drop_superframe && cpi->oxcf.target_bandwidth == 0) {
+      cpi->oxcf.target_bandwidth == 0) {
    cpi->svc.skip_enhancement_layer = 1;
    vp9_rc_postencode_update_drop_frame(cpi);
    vp9_inc_frame_in_layer(cpi);
@ -4591,23 +4591,29 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
  }

  // For 1 pass CBR, check if we are dropping this frame.
-  // For spatial layers, for now only check for frame-dropping on first spatial
-  // layer, and if decision is to drop, we drop whole super-frame.
+  // For spatial layers, for now if we decide to drop current spatial
+  // layer then we will also drop all upper spatial layers.
+  // TODO(marpan): Allow for the case of dropping single layer only without
+  // dropping all upper layers.
  if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&
      cm->frame_type != KEY_FRAME) {
    if (vp9_rc_drop_frame(cpi) ||
-        (is_one_pass_cbr_svc(cpi) && cpi->svc.rc_drop_superframe == 1)) {
+        (is_one_pass_cbr_svc(cpi) &&
+         cpi->svc.rc_drop_spatial_layer[cpi->svc.spatial_layer_id] == 1)) {
      vp9_rc_postencode_update_drop_frame(cpi);
      cpi->ext_refresh_frame_flags_pending = 0;
-      cpi->svc.rc_drop_superframe = 1;
      cpi->last_frame_dropped = 1;
-      // TODO(marpan): Advancing the svc counters on dropped frames can break
-      // the referencing scheme for the fixed svc patterns defined in
-      // vp9_one_pass_cbr_svc_start_layer(). Look into fixing this issue, but
-      // for now, don't advance the svc frame counters on dropped frame.
-      // if (cpi->use_svc)
-      //   vp9_inc_frame_in_layer(cpi);
-
+      if (cpi->use_svc) {
+        int i;
+        // If we are dropping this spatial layer, then we will drop all
+        // upper spatial layers.
+        for (i = cpi->svc.spatial_layer_id; i < cpi->svc.number_spatial_layers;
+             i++)
+          cpi->svc.rc_drop_spatial_layer[i] = 1;
+        vp9_inc_frame_in_layer(cpi);
+        if (cpi->svc.rc_drop_spatial_layer[0] == 0)
+          cpi->svc.skip_enhancement_layer = 1;
+      }
      return;
    }
  }
@ -4626,6 +4632,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
  }

  cpi->last_frame_dropped = 0;
+  cpi->svc.last_layer_encoded = cpi->svc.spatial_layer_id;

  // Disable segmentation if it decrease rate/distortion ratio
  if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@ -398,7 +398,7 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
  RATE_CONTROL *const rc = &cpi->rc;
  if (!oxcf->drop_frames_water_mark ||
      (is_one_pass_cbr_svc(cpi) &&
-       cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode)) {
+       cpi->svc.rc_drop_spatial_layer[cpi->svc.spatial_layer_id] == 1)) {
    return 0;
  } else {
    if (rc->buffer_level < 0) {
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@ -30,7 +30,6 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
  svc->spatial_layer_id = 0;
  svc->temporal_layer_id = 0;
  svc->first_spatial_layer_to_encode = 0;
-  svc->rc_drop_superframe = 0;
  svc->force_zero_mode_spatial_ref = 0;
  svc->use_base_mv = 0;
  svc->use_partition_reuse = 0;
@ -39,9 +38,11 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
  svc->current_superframe = 0;
  svc->non_reference_frame = 0;
  svc->skip_enhancement_layer = 0;
+  svc->last_layer_encoded = 0;

  for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
  for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+    svc->rc_drop_spatial_layer[sl] = 0;
    svc->ext_frame_flags[sl] = 0;
    svc->ext_lst_fb_idx[sl] = 0;
    svc->ext_gld_fb_idx[sl] = 1;
@ -648,8 +649,12 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
    }
  }

-  if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode)
-    cpi->svc.rc_drop_superframe = 0;
+  // Reset the drop flags for all spatial lauyers, on the base layer.
+  if (cpi->svc.spatial_layer_id == 0) {
+    int i;
+    for (i = 0; i < cpi->svc.number_spatial_layers; i++)
+      cpi->svc.rc_drop_spatial_layer[i] = 0;
+  }

  lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
                                   cpi->svc.number_temporal_layers +
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@ -57,7 +57,7 @@ typedef struct SVC {

  int spatial_layer_to_encode;
  int first_spatial_layer_to_encode;
-  int rc_drop_superframe;
+  int rc_drop_spatial_layer[VPX_MAX_LAYERS];

  // Workaround for multiple frame contexts
  enum { ENCODED = 0, ENCODING, NEED_TO_ENCODE } encode_empty_frame_state;
@ -106,6 +106,8 @@ typedef struct SVC {
  int skip_enhancement_layer;

  int lower_layer_qindex;
+
+  int last_layer_encoded;
 } SVC;

 struct VP9_COMP;
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@ -1205,6 +1205,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
          cx_data_sz -= size;
          pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
          pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
+          pkt.data.frame.last_spatial_layer_encoded =
+              cpi->svc.last_layer_encoded;

          if (ctx->output_cx_pkt_cb.output_cx_pkt) {
            pkt.kind = VPX_CODEC_CX_FRAME_PKT;
@ -1233,6 +1235,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
        pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
        pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
        pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
+        pkt.data.frame.last_spatial_layer_encoded = cpi->svc.last_layer_encoded;

        if (ctx->pending_cx_data) {
          if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@ -181,6 +181,9 @@ typedef struct vpx_codec_cx_pkt {
       * first one.*/
      unsigned int width[VPX_SS_MAX_LAYERS];  /**< frame width */
      unsigned int height[VPX_SS_MAX_LAYERS]; /**< frame height */
+      /*!\brief Last spatial layer frame in this packet. VP8 will always be set
+       * to 0.*/
+      unsigned int last_spatial_layer_encoded;
    } frame;                            /**< data for compressed frame packet */
    vpx_fixed_buf_t twopass_stats;      /**< data for two-pass packet */
    vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */