vp9: Skip encoding of enhancement layers on the fly.

For SVC: if an enhancement layer (spatial_layer > 0) has 0 bandwidth, skip/drop the encoding of the layer. This allows the application to dynamically disable higher layers for SVC. Add flag to signal the skip encoding, this is needed to modify the packing of the superframe when the top layer is skipped/dropped. Also moved some updates (current_video_frame counter and the last_avg_frame_bandwidth) to the postencode_update_drop_frame(). Added datarate unittest for dynamically going from 3 to 2 and then back to 3 spatial layers. Change-Id: Idaccdb4aca25ba1d822ed1b4219f94e2e8640d43
2018-01-09 16:35:29 -08:00 · 2018-01-09 16:35:29 -08:00 · f8639b1554
commit f8639b1554
parent 1633786bfb
6 changed files with 79 additions and 4 deletions
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@ -1227,6 +1227,7 @@ class DatarateOnePassCbrSvc
    memset(bits_in_buffer_model_, 0, sizeof(bits_in_buffer_model_));
    memset(bits_total_, 0, sizeof(bits_total_));
    memset(layer_target_avg_bandwidth_, 0, sizeof(layer_target_avg_bandwidth_));
+    dynamic_drop_layer_ = false;
  }
  virtual void BeginPassHook(unsigned int /*pass*/) {}
  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
@ -1253,6 +1254,22 @@ class DatarateOnePassCbrSvc
      encoder->Control(VP8E_SET_STATIC_THRESHOLD, 1);
      encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_);
    }
+
+    if (dynamic_drop_layer_) {
+      if (video->frame() == 100) {
+        // Change layer bitrates to set top layer to 0. This will trigger skip
+        // encoding/dropping of top spatial layer.
+        cfg_.rc_target_bitrate -= cfg_.layer_target_bitrate[2];
+        cfg_.layer_target_bitrate[2] = 0;
+        encoder->Config(&cfg_);
+      } else if (video->frame() == 300) {
+        // Change layer bitrate on top layer to non-zero to start encoding it
+        // again.
+        cfg_.layer_target_bitrate[2] = 500;
+        cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[2];
+        encoder->Config(&cfg_);
+      }
+    }
    const vpx_rational_t tb = video->timebase();
    timebase_ = static_cast<double>(tb.num) / tb.den;
    duration_ = 0;
@ -1317,7 +1334,7 @@ class DatarateOnePassCbrSvc
        (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
    parse_superframe_index(static_cast<const uint8_t *>(pkt->data.frame.buf),
                           pkt->data.frame.sz, sizes, &count);
-    ASSERT_EQ(count, number_spatial_layers_);
+    if (!dynamic_drop_layer_) ASSERT_EQ(count, number_spatial_layers_);
    for (int sl = 0; sl < number_spatial_layers_; ++sl) {
      sizes[sl] = sizes[sl] << 3;
      // Update the total encoded bits per layer.
@ -1375,6 +1392,7 @@ class DatarateOnePassCbrSvc
  int number_spatial_layers_;
  int number_temporal_layers_;
  int layer_target_avg_bandwidth_[VPX_MAX_LAYERS];
+  bool dynamic_drop_layer_;
 };
 static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
                                  const vpx_svc_extra_cfg_t *svc_params,
@ -1721,6 +1739,49 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL) {
 #endif
 }

+// Check basic rate targeting for 1 pass CBR SVC with 3 spatial layers and on
+// the fly switching to 2 spatial layers and then back to 3. This switch is done
+// by setting top spatial layer bitrate to 0, and then back to non-zero, during
+// the sequence.
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL_to_2SL_dynamic) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.ss_number_layers = 3;
+  cfg_.ts_number_layers = 1;
+  cfg_.ts_rate_decimator[0] = 1;
+  cfg_.g_error_resilient = 1;
+  cfg_.g_threads = 1;
+  cfg_.temporal_layering_mode = 0;
+  svc_params_.scaling_factor_num[0] = 72;
+  svc_params_.scaling_factor_den[0] = 288;
+  svc_params_.scaling_factor_num[1] = 144;
+  svc_params_.scaling_factor_den[1] = 288;
+  svc_params_.scaling_factor_num[2] = 288;
+  svc_params_.scaling_factor_den[2] = 288;
+  cfg_.rc_dropframe_thresh = 0;
+  cfg_.kf_max_dist = 9999;
+  number_spatial_layers_ = cfg_.ss_number_layers;
+  number_temporal_layers_ = cfg_.ts_number_layers;
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
+  cfg_.rc_target_bitrate = 800;
+  ResetModel();
+  dynamic_drop_layer_ = true;
+  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+                        cfg_.ts_number_layers, cfg_.temporal_layering_mode,
+                        layer_target_avg_bandwidth_, bits_in_buffer_model_);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Don't check rate targeting on top spatial layer since it will be skipped
+  // for part of the sequence.
+  CheckLayerRateTargeting(&cfg_, number_spatial_layers_ - 1,
+                          number_temporal_layers_, file_datarate_, 0.78, 1.15);
+}
+
 // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3
 // temporal layers. Run CIF clip with 1 thread, and few short key frame periods.
 TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) {
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@ -4360,6 +4360,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
  struct segmentation *const seg = &cm->seg;
  TX_SIZE t;

+  // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
+  if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
+      !cpi->svc.rc_drop_superframe && cpi->oxcf.target_bandwidth == 0) {
+    cpi->svc.skip_enhancement_layer = 1;
+    vp9_rc_postencode_update_drop_frame(cpi);
+    cpi->ext_refresh_frame_flags_pending = 0;
+    return;
+  }
+
  set_ext_overrides(cpi);
  vpx_clear_system_state();

@ -4451,7 +4460,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
    if (vp9_rc_drop_frame(cpi) ||
        (is_one_pass_cbr_svc(cpi) && cpi->svc.rc_drop_superframe == 1)) {
      vp9_rc_postencode_update_drop_frame(cpi);
-      ++cm->current_video_frame;
      cpi->ext_refresh_frame_flags_pending = 0;
      cpi->svc.rc_drop_superframe = 1;
      cpi->last_frame_dropped = 1;
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@ -1495,10 +1495,12 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
 void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
  // Update buffer level with zero size, update frame counters, and return.
  update_buffer_level(cpi, 0);
+  cpi->common.current_video_frame++;
  cpi->rc.frames_since_key++;
  cpi->rc.frames_to_key--;
  cpi->rc.rc_2_frame = 0;
  cpi->rc.rc_1_frame = 0;
+  cpi->rc.last_avg_frame_bandwidth = cpi->rc.avg_frame_bandwidth;
 }

 static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@ -37,6 +37,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
  svc->scaled_one_half = 0;
  svc->current_superframe = 0;
  svc->non_reference_frame = 0;
+  svc->skip_enhancement_layer = 0;

  for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
  for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
@ -605,6 +606,7 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
 int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
  int width = 0, height = 0;
  LAYER_CONTEXT *lc = NULL;
+  cpi->svc.skip_enhancement_layer = 0;
  if (cpi->svc.number_spatial_layers > 1) cpi->svc.use_base_mv = 1;
  cpi->svc.force_zero_mode_spatial_ref = 1;
  cpi->svc.mi_stride[cpi->svc.spatial_layer_id] = cpi->common.mi_stride;
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@ -101,6 +101,8 @@ typedef struct SVC {
  int mi_stride[VPX_MAX_LAYERS];

  int first_layer_denoise;
+
+  int skip_enhancement_layer;
 } SVC;

 struct VP9_COMP;
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@ -1213,7 +1213,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
           -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
                                         &dst_time_stamp, &dst_end_time_stamp,
                                         !img)) {
-      if (size) {
+      if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) {
        vpx_codec_cx_pkt_t pkt;

 #if CONFIG_SPATIAL_SVC
@ -1264,7 +1264,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
        pkt.data.frame.height = cpi->common.height;

        if (ctx->pending_cx_data) {
-          ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+          if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
          ctx->pending_frame_magnitude |= size;
          ctx->pending_cx_data_sz += size;
          // write the superframe only for the case when