Merge "[svc] Finalize spatial svc first pass rate control"

2014-03-20 15:12:14 -07:00
parent 03781ff22d d205335060
commit 03d75182f7
9 changed files with 173 additions and 65 deletions
--- a/examples/vp9_spatial_scalable_encoder.c
+++ b/examples/vp9_spatial_scalable_encoder.c
@@ -264,6 +264,7 @@ int main(int argc, const char **argv) {
  int pts = 0;            /* PTS starts at 0 */
  int frame_duration = 1; /* 1 timebase tick per frame */
  FILE *infile = NULL;
+  int end_of_stream = 0;

  memset(&svc_ctx, 0, sizeof(svc_ctx));
  svc_ctx.log_print = 1;
@@ -305,12 +306,15 @@ int main(int argc, const char **argv) {
    vpx_img_read(&raw, infile);

  // Encode frames
-  while (frame_cnt < app_input.frames_to_code) {
-    if (!vpx_img_read(&raw, infile))
-      break;
+  while (!end_of_stream) {
+    if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) {
+      // We need one extra vpx_svc_encode call at end of stream to flush
+      // encoder and get remaining data
+      end_of_stream = 1;
+    }

-    res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration,
-                         VPX_DL_REALTIME);
+    res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
+                         pts, frame_duration, VPX_DL_REALTIME);
    printf("%s", vpx_svc_get_message(&svc_ctx));
    if (res != VPX_CODEC_OK) {
      die_codec(&codec, "Failed to encode frame");
@@ -328,9 +332,11 @@ int main(int argc, const char **argv) {
                  vpx_svc_get_rc_stats_buffer(&svc_ctx),
                  vpx_svc_get_rc_stats_buffer_size(&svc_ctx));
    }
+    if (!end_of_stream) {
      ++frame_cnt;
      pts += frame_duration;
    }
+  }

  printf("Processed %d frames\n", frame_cnt);

--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -362,4 +362,39 @@ TEST_F(SvcTest, GetLayerResolution) {
  EXPECT_EQ(kHeight * 8 / 16, layer_height);
 }

+TEST_F(SvcTest, FirstPassEncode) {
+  svc_.spatial_layers = 2;
+  codec_enc_.g_pass = VPX_RC_FIRST_PASS;
+  vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
+  vpx_svc_set_quantizers(&svc_, "40,30", 0);
+
+  vpx_codec_err_t res =
+      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  codec_initialized_ = true;
+
+  libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+                                     codec_enc_.g_timebase.den,
+                                     codec_enc_.g_timebase.num, 0, 30);
+  // FRAME 0
+  video.Begin();
+  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+                       video.duration(), VPX_DL_GOOD_QUALITY);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U);
+
+  // FRAME 1
+  video.Next();
+  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+                       video.duration(), VPX_DL_GOOD_QUALITY);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U);
+
+  // Flush encoder and test EOS packet
+  res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(),
+                       video.duration(), VPX_DL_GOOD_QUALITY);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U);
+}
+
 }  // namespace
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -181,11 +181,13 @@ static void zero_stats(FIRSTPASS_STATS *section) {
  section->new_mv_count = 0.0;
  section->count      = 0.0;
  section->duration   = 1.0;
+  section->spatial_layer_id = 0;
 }

 static void accumulate_stats(FIRSTPASS_STATS *section,
                             const FIRSTPASS_STATS *frame) {
  section->frame += frame->frame;
+  section->spatial_layer_id = frame->spatial_layer_id;
  section->intra_error += frame->intra_error;
  section->coded_error += frame->coded_error;
  section->sr_coded_error += frame->sr_coded_error;
@@ -342,7 +344,15 @@ void vp9_init_first_pass(VP9_COMP *cpi) {
 }

 void vp9_end_first_pass(VP9_COMP *cpi) {
+  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    int i;
+    for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
+      output_stats(&cpi->svc.layer_context[i].twopass.total_stats,
+                   cpi->output_pkt_list);
+    }
+  } else {
    output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
+  }
 }

 static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
@@ -464,11 +474,11 @@ void vp9_first_pass(VP9_COMP *cpi) {

  int recon_yoffset, recon_uvoffset;
  YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
-  YV12_BUFFER_CONFIG *const gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+  YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
  YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
-  const int recon_y_stride = lst_yv12->y_stride;
-  const int recon_uv_stride = lst_yv12->uv_stride;
-  const int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height);
+  int recon_y_stride = lst_yv12->y_stride;
+  int recon_uv_stride = lst_yv12->uv_stride;
+  int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height);
  int64_t intra_error = 0;
  int64_t coded_error = 0;
  int64_t sr_coded_error = 0;
@@ -484,13 +494,43 @@ void vp9_first_pass(VP9_COMP *cpi) {
  int new_mv_count = 0;
  int sum_in_vectors = 0;
  uint32_t lastmv_as_int = 0;
-  struct twopass_rc *const twopass = &cpi->twopass;
+  struct twopass_rc *twopass = &cpi->twopass;
  const MV zero_mv = {0, 0};
+  const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;

  vp9_clear_system_state();

+  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    MV_REFERENCE_FRAME ref_frame = LAST_FRAME;
+    const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL;
+    twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass;
+
+    vp9_scale_references(cpi);
+
+    // Use either last frame or alt frame for motion search.
+    if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
+      scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME);
+      ref_frame = LAST_FRAME;
+    } else if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
+      scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, ALTREF_FRAME);
+      ref_frame = ALTREF_FRAME;
+    }
+
+    if (scaled_ref_buf != NULL) {
+      // Update the stride since we are using scaled reference buffer
+      first_ref_buf = scaled_ref_buf;
+      recon_y_stride = first_ref_buf->y_stride;
+      recon_uv_stride = first_ref_buf->uv_stride;
+      uv_mb_height = 16 >> (first_ref_buf->y_height > first_ref_buf->uv_height);
+    }
+
+    // Disable golden frame for svc first pass for now.
+    gld_yv12 = NULL;
+    set_ref_ptrs(cm, xd, ref_frame, NONE);
+  }
+
  vp9_setup_src_planes(x, cpi->Source, 0, 0);
-  vp9_setup_pre_planes(xd, 0, lst_yv12, 0, 0, NULL);
+  vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL);
  vp9_setup_dst_planes(xd, new_yv12, 0, 0);

  xd->mi_8x8 = cm->mi_grid_visible;
@@ -583,7 +623,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
        int tmp_err, motion_error;
        int_mv mv, tmp_mv;

-        xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset;
+        xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
        motion_error = zz_motion_search(x);
        // Assume 0,0 motion with no mv overhead.
        mv.as_int = tmp_mv.as_int = 0;
@@ -615,7 +655,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
        }

        // Search in an older reference frame.
-        if (cm->current_video_frame > 1) {
+        if (cm->current_video_frame > 1 && gld_yv12 != NULL) {
          // Assume 0,0 motion with no mv overhead.
          int gf_motion_error;

@@ -633,9 +673,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
            ++second_ref_count;

          // Reset to last frame as reference buffer.
-          xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset;
-          xd->plane[1].pre[0].buf = lst_yv12->u_buffer + recon_uvoffset;
-          xd->plane[2].pre[0].buf = lst_yv12->v_buffer + recon_uvoffset;
+          xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
+          xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
+          xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;

          // In accumulating a score for the older reference frame take the
          // best of the motion predicted score and the intra coded error
@@ -743,6 +783,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
    FIRSTPASS_STATS fps;

    fps.frame = cm->current_video_frame;
+    fps.spatial_layer_id = cpi->svc.spatial_layer_id;
    fps.intra_error = (double)(intra_error >> 8);
    fps.coded_error = (double)(coded_error >> 8);
    fps.sr_coded_error = (double)(sr_coded_error >> 8);
@@ -792,20 +833,28 @@ void vp9_first_pass(VP9_COMP *cpi) {
       (twopass->this_frame_stats.pcnt_inter > 0.20) &&
       ((twopass->this_frame_stats.intra_error /
         DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) {
+    if (gld_yv12 != NULL) {
      vp8_yv12_copy_frame(lst_yv12, gld_yv12);
+    }
    twopass->sr_update_lag = 1;
  } else {
    ++twopass->sr_update_lag;
  }
+
+  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    vp9_update_reference_frames(cpi);
+  } else {
    // Swap frame pointers so last frame refers to the frame we just compressed.
    swap_yv12(lst_yv12, new_yv12);
+  }

  vp9_extend_frame_borders(lst_yv12);

  // Special case for the first frame. Copy into the GF buffer as a second
  // reference.
-  if (cm->current_video_frame == 0)
+  if (cm->current_video_frame == 0 && gld_yv12 != NULL) {
    vp8_yv12_copy_frame(lst_yv12, gld_yv12);
+  }

  // Use this to see what the first pass reconstruction looks like.
  if (0) {
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -35,6 +35,7 @@ typedef struct {
  double new_mv_count;
  double duration;
  double count;
+  int spatial_layer_id;
 } FIRSTPASS_STATS;

 struct twopass_rc {
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -2450,7 +2450,7 @@ static int recode_loop_test(const VP9_COMP *cpi,
  return force_recode;
 }

-static void update_reference_frames(VP9_COMP * const cpi) {
+void vp9_update_reference_frames(VP9_COMP *cpi) {
  VP9_COMMON * const cm = &cpi->common;

  // At this point the new frame has been encoded.
@@ -2534,7 +2534,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
  vp9_extend_frame_inner_borders(cm->frame_to_show);
 }

-static void scale_references(VP9_COMP *cpi) {
+void vp9_scale_references(VP9_COMP *cpi) {
  VP9_COMMON *cm = &cpi->common;
  MV_REFERENCE_FRAME ref_frame;

@@ -2971,7 +2971,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
  } else {
    cpi->Source = cpi->un_scaled_source;
  }
-  scale_references(cpi);
+  vp9_scale_references(cpi);

  vp9_clear_system_state();

@@ -3149,7 +3149,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
    update_reference_segmentation_map(cpi);

  release_scaled_references(cpi);
-  update_reference_frames(cpi);
+  vp9_update_reference_frames(cpi);

  for (t = TX_4X4; t <= TX_32X32; t++)
    full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]);
@@ -3575,12 +3575,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
    vp9_vaq_init();
  }

-  if (cpi->use_svc) {
-    SvcEncode(cpi, size, dest, frame_flags);
-  } else if (cpi->pass == 1) {
+  if (cpi->pass == 1 &&
+      (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) {
    Pass1Encode(cpi, size, dest, frame_flags);
-  } else if (cpi->pass == 2) {
+  } else if (cpi->pass == 2 && !cpi->use_svc) {
    Pass2Encode(cpi, size, dest, frame_flags);
+  } else if (cpi->use_svc) {
+    SvcEncode(cpi, size, dest, frame_flags);
  } else {
    // One pass encode
    Pass0Encode(cpi, size, dest, frame_flags);
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -933,6 +933,10 @@ int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget);
 int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index,
                               double rate_target_ratio);

+void vp9_scale_references(VP9_COMP *cpi);
+
+void vp9_update_reference_frames(VP9_COMP *cpi);
+
 static int get_token_alloc(int mb_rows, int mb_cols) {
  return mb_rows * mb_cols * (48 * 16 + 4);
 }
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -27,6 +27,7 @@ typedef struct {
  int64_t maximum_buffer_size;
  double framerate;
  int avg_frame_size;
+  struct twopass_rc twopass;
 } LAYER_CONTEXT;

 typedef struct {
@@ -34,9 +35,10 @@ typedef struct {
  int temporal_layer_id;
  int number_spatial_layers;
  int number_temporal_layers;
-  // Layer context used for rate control in CBR mode, only defined for
-  // temporal layers for now.
-  LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS];
+  // Layer context used for rate control in temporal CBR mode or spatial
+  // two pass mode. Defined for temporal or spatial layers for now.
+  // Does not support temporal combined with spatial RC.
+  LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)];
 } SVC;

 struct VP9_COMP;
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -850,7 +850,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
  struct LayerData *layer_data;
  struct Superframe superframe;
  SvcInternal *const si = get_svc_internal(svc_ctx);
-  if (svc_ctx == NULL || codec_ctx == NULL || rawimg == NULL || si == NULL) {
+  if (svc_ctx == NULL || codec_ctx == NULL || si == NULL) {
    return VPX_CODEC_INVALID_PARAM;
  }

@@ -866,9 +866,12 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
  si->is_keyframe = (si->frame_within_gop == 0);
  si->frame_size = 0;

+  if (rawimg != NULL) {
    svc_log(svc_ctx, SVC_LOG_DEBUG,
-          "vpx_svc_encode  layers: %d, frame_count: %d, frame_within_gop: %d\n",
-          si->layers, si->encode_frame_count, si->frame_within_gop);
+            "vpx_svc_encode  layers: %d, frame_count: %d, "
+            "frame_within_gop: %d\n", si->layers, si->encode_frame_count,
+            si->frame_within_gop);
+  }

  // encode each layer
  for (si->layer = 0; si->layer < si->layers; ++si->layer) {
@@ -877,9 +880,11 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
      svc_log(svc_ctx, SVC_LOG_DEBUG, "Skip encoding layer %d\n", si->layer);
      continue;
    }
-    calculate_enc_frame_flags(svc_ctx);

+    if (rawimg != NULL) {
+      calculate_enc_frame_flags(svc_ctx);
      set_svc_parameters(svc_ctx, codec_ctx);
+    }

    res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration,
                           si->enc_frame_flags, deadline);
@@ -953,7 +958,11 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
        }
      }
    }
+    if (rawimg == NULL) {
+      break;
    }
+  }
+  if (codec_ctx->config.enc->g_pass != VPX_RC_FIRST_PASS) {
    // add superframe index to layer data list
    sf_create_index(&superframe);
    layer_data = ld_create(superframe.buffer, superframe.index_size);
@@ -961,8 +970,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,

    // get accumulated size of layer data
    si->frame_size = ld_list_get_buffer_size(cx_layer_list);
-  if (si->frame_size == 0) return VPX_CODEC_ERROR;
-
+    if (si->frame_size > 0) {
      // all layers encoded, create single buffer with concatenated layers
      if (si->frame_size > si->buffer_size) {
        free(si->buffer);
@@ -978,9 +986,11 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,

      ld_list_free(cx_layer_list);

-  svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, pts: %d\n",
-          si->encode_frame_count, si->is_keyframe, (int)si->frame_size,
-          (int)pts);
+      svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, "
+              "pts: %d\n", si->encode_frame_count, si->is_keyframe,
+              (int)si->frame_size, (int)pts);
+    }
+  }
  ++si->frame_within_gop;
  ++si->encode_frame_count;

--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -49,7 +49,7 @@ extern "C" {
 #define VPX_SS_MAX_LAYERS       5

 /*! Spatial Scalability: Default number of coding layers */
-#define VPX_SS_DEFAULT_LAYERS       3
+#define VPX_SS_DEFAULT_LAYERS       1

  /*!\brief Current ABI version number
   *