From 110a2ddc9b6c0c53ceb037a42bf24b4a4ff4597d Mon Sep 17 00:00:00 2001 From: JackyChen Date: Mon, 11 Jul 2016 17:06:10 -0700 Subject: [PATCH] vp9 svc: Reuse scaled_temp in two stage downscaling. This change eliminates redundant computation in the two stage downscaling, which saves ~1% encoding time in 3-layer svc encoding. Change-Id: Ib4b218811b68499a740af1f9b7b5a5445e28d671 --- vp9/encoder/vp9_encoder.c | 32 ++++++++++++++++++++++++++++++ vp9/encoder/vp9_svc_layercontext.c | 22 ++------------------ vp9/encoder/vp9_svc_layercontext.h | 2 ++ 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 9413a436f..25501f254 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -753,6 +753,26 @@ static void alloc_util_frame_buffers(VP9_COMP *cpi) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled source buffer"); + // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate + // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a + // target of 1/4x1/4. + if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc) { + cpi->svc.scaled_temp_is_alloc = 1; + if (vpx_realloc_frame_buffer(&cpi->svc.scaled_temp, + cm->width >> 1, + cm->height >> 1, + cm->subsampling_x, + cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_ENC_BORDER_IN_PIXELS, + cm->byte_alignment, + NULL, NULL, NULL)) + vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, + "Failed to allocate scaled_frame for svc "); + } + if (vpx_realloc_frame_buffer(&cpi->scaled_last_source, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, @@ -3262,10 +3282,22 @@ static void encode_without_recode_loop(VP9_COMP *cpi, cpi->un_scaled_source->y_height == cm->height << 2 && cpi->svc.scaled_temp.y_width == cm->width << 1 && cpi->svc.scaled_temp.y_height == cm->height << 1) { + // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take + // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2 + // result will be saved in scaled_temp and might be used later. cpi->Source = vp9_svc_twostage_scale(cm, cpi->un_scaled_source, &cpi->scaled_source, &cpi->svc.scaled_temp); + cpi->svc.scaled_one_half = 1; + } else if (is_one_pass_cbr_svc(cpi) && + cpi->un_scaled_source->y_width == cm->width << 1 && + cpi->un_scaled_source->y_height == cm->height << 1 && + cpi->svc.scaled_one_half) { + // If the spatial layer is 1/2x1/2 and the scaling is already done in the + // two-stage scaling, use the result directly. + cpi->Source = &cpi->svc.scaled_temp; + cpi->svc.scaled_one_half = 0; } else { cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source, diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 1814a32c9..824146fe7 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -33,6 +33,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->rc_drop_superframe = 0; svc->force_zero_mode_spatial_ref = 0; svc->use_base_mv = 0; + svc->scaled_temp_is_alloc = 0; + svc->scaled_one_half = 0; svc->current_superframe = 0; for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1; @@ -43,26 +45,6 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { cpi->svc.ext_alt_fb_idx[sl] = 2; } - // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate - // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a - // target of 1/4x1/4. - if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) { - if (vpx_realloc_frame_buffer(&cpi->svc.scaled_temp, - cpi->common.width >> 1, - cpi->common.height >> 1, - cpi->common.subsampling_x, - cpi->common.subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cpi->common.use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS, - cpi->common.byte_alignment, - NULL, NULL, NULL)) - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate scaled_frame for svc "); - } - - if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img, SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT, diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 9f386fb08..39094fe02 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -72,6 +72,8 @@ typedef struct { YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS]; // Temp buffer used for 2-stage down-sampling, for real-time mode. YV12_BUFFER_CONFIG scaled_temp; + int scaled_one_half; + int scaled_temp_is_alloc; // Layer context used for rate control in one pass temporal CBR mode or // two pass spatial mode.