diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c index d2b368856..271ab704b 100644 --- a/examples/vp9_spatial_svc_encoder.c +++ b/examples/vp9_spatial_svc_encoder.c @@ -30,6 +30,7 @@ #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "../vpxstats.h" +#include "vp9/encoder/vp9_encoder.h" #define OUTPUT_RC_STATS 1 static const arg_def_t skip_frames_arg = @@ -749,6 +750,7 @@ int main(int argc, const char **argv) { cx_time += vpx_usec_timer_elapsed(&timer); printf("%s", vpx_svc_get_message(&svc_ctx)); + fflush(stdout); if (res != VPX_CODEC_OK) { die_codec(&codec, "Failed to encode frame"); } @@ -756,6 +758,7 @@ int main(int argc, const char **argv) { while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) { switch (cx_pkt->kind) { case VPX_CODEC_CX_FRAME_PKT: { + SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal; if (cx_pkt->data.frame.sz > 0) { #if OUTPUT_RC_STATS uint32_t sizes[8]; @@ -851,6 +854,8 @@ int main(int argc, const char **argv) { printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received, !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY), (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts); + if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1) + si->bytes_sum[0] += (int)cx_pkt->data.frame.sz; ++frames_received; break; } diff --git a/examples/vpx_temporal_svc_encoder.c b/examples/vpx_temporal_svc_encoder.c index 5adda9eeb..16abb9deb 100644 --- a/examples/vpx_temporal_svc_encoder.c +++ b/examples/vpx_temporal_svc_encoder.c @@ -41,7 +41,7 @@ enum denoiserState { kDenoiserOnAdaptive }; -static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3}; +static int mode_to_num_layers[13] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3}; // For rate control encoding stats. struct RateControlMetrics { @@ -432,7 +432,32 @@ static void set_temporal_layer_pattern(int layering_mode, layer_flags[7] = layer_flags[3]; break; } - case 11: + case 11: { + // 3-layers structure with one reference frame. + // This works same as temporal_layering_mode 3. + // This was added to compare with vp9_spatial_svc_encoder. + + // 3-layers, 4-frame period. + int ids[4] = {0, 2, 1, 2}; + cfg->ts_periodicity = 4; + *flag_periodicity = 4; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled. + layer_flags[0] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + layer_flags[3] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + break; + } + case 12: default: { // 3-layers structure as in case 10, but no sync/refresh points for // layer 1 and 2. @@ -530,7 +555,7 @@ int main(int argc, char **argv) { } layering_mode = strtol(argv[10], NULL, 0); - if (layering_mode < 0 || layering_mode > 12) { + if (layering_mode < 0 || layering_mode > 13) { die("Invalid layering mode (0..12) %s", argv[10]); } diff --git a/test/datarate_test.cc b/test/datarate_test.cc index cec6d090d..b98f8c846 100644 --- a/test/datarate_test.cc +++ b/test/datarate_test.cc @@ -905,9 +905,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) { svc_params_.scaling_factor_den[0] = 288; svc_params_.scaling_factor_num[1] = 288; svc_params_.scaling_factor_den[1] = 288; - // TODO(wonkap/marpan): No frame drop for now, we need to implement correct - // frame dropping for SVC. - cfg_.rc_dropframe_thresh = 0; + cfg_.rc_dropframe_thresh = 10; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 200); // TODO(wonkap/marpan): Check that effective_datarate for each layer hits the @@ -949,9 +947,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc4threads) { svc_params_.scaling_factor_den[0] = 288; svc_params_.scaling_factor_num[1] = 288; svc_params_.scaling_factor_den[1] = 288; - // TODO(wonkap/marpan): No frame drop for now, we need to implement correct - // frame dropping for SVC. - cfg_.rc_dropframe_thresh = 0; + cfg_.rc_dropframe_thresh = 10; ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, 30, 1, 0, 300); cfg_.rc_target_bitrate = 800; diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c index c90b93637..65a216e36 100644 --- a/vp10/vp10_cx_iface.c +++ b/vp10/vp10_cx_iface.c @@ -1222,7 +1222,7 @@ static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx, struct vp10_extracfg extra_cfg = ctx->extra_cfg; int *const render_size = va_arg(args, int *); extra_cfg.render_width = render_size[0]; - extra_cfg.render_height = render_size[0]; + extra_cfg.render_height = render_size[1]; return update_extra_cfg(ctx, &extra_cfg); } diff --git a/vp8/common/threading.h b/vp8/common/threading.h index b2e6ded3d..bc99dbb93 100644 --- a/vp8/common/threading.h +++ b/vp8/common/threading.h @@ -12,6 +12,7 @@ #ifndef VP8_COMMON_THREADING_H_ #define VP8_COMMON_THREADING_H_ + #ifdef __cplusplus extern "C" { #endif @@ -28,8 +29,6 @@ extern "C" { #define THREAD_SPECIFIC_INDEX DWORD #define pthread_t HANDLE #define pthread_attr_t DWORD -#define pthread_create(thhandle,attr,thfunc,tharg) (int)((*thhandle=(HANDLE)_beginthreadex(NULL,0,(unsigned int (__stdcall *)(void *))thfunc,tharg,0,NULL))==NULL) -#define pthread_join(thread, result) ((WaitForSingleObject((thread),INFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread)) #define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread) #define thread_sleep(nms) Sleep(nms) #define pthread_cancel(thread) terminate_thread(thread,0) @@ -49,9 +48,6 @@ extern "C" { #define THREAD_SPECIFIC_INDEX PULONG #define pthread_t TID #define pthread_attr_t ULONG -#define pthread_create(thhandle,attr,thfunc,tharg) \ - ((int)((*(thhandle)=_beginthread(thfunc,NULL,1024*1024,tharg))==-1)) -#define pthread_join(thread, result) ((int)DosWaitThread(&(thread),0)) #define pthread_detach(thread) 0 #define thread_sleep(nms) DosSleep(nms) #define pthread_cancel(thread) DosKillThread(thread) diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c index d197f8f81..2a21943fe 100644 --- a/vp8/encoder/denoising.c +++ b/vp8/encoder/denoising.c @@ -604,10 +604,9 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, NOISE_MOTION_THRESHOLD; // If block is considered to be skin area, lower the motion threshold. - // In current version set threshold = 1, so only denoise very low - // (i.e., zero) mv on skin. + // In current version set threshold = 0, so only denoise zero mv on skin. if (x->is_skin) - motion_threshold = 1; + motion_threshold = 0; if (motion_magnitude2 < denoiser->denoise_pars.scale_increase_filter * NOISE_MOTION_THRESHOLD) diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index d3febb4f8..1b3024af3 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -59,6 +59,8 @@ void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { } // Check if we should turn off cyclic refresh based on bitrate condition. +// TODO(marpan): May be better in some cases to just reduce the amount/delta-qp +// instead of completely shutting off. static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm, const RATE_CONTROL *rc) { // Turn off cyclic refresh if bits available per frame is not sufficiently @@ -66,10 +68,10 @@ static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm, // with number of seg blocks, so compare available bits to number of blocks. // Average bits available per frame = avg_frame_bandwidth // Number of (8x8) blocks in frame = mi_rows * mi_cols; - const float factor = 0.25; + const float factor = 0.15f; const int number_blocks = cm->mi_rows * cm->mi_cols; // The condition below corresponds to turning off at target bitrates: - // (at 30fps), ~12kbps for CIF, 36kbps for VGA, 100kps for HD/720p. + // (at 30fps), ~8kbps for CIF, 20kbps for VGA, 60kps for HD/720p. // Also turn off at very small frame sizes, to avoid too large fraction of // superblocks to be refreshed per frame. Threshold below is less than QCIF. if (rc->avg_frame_bandwidth < factor * number_blocks || diff --git a/vp9/encoder/vp9_cost.h b/vp9/encoder/vp9_cost.h index eac74c40b..d8bf23f1e 100644 --- a/vp9/encoder/vp9_cost.h +++ b/vp9/encoder/vp9_cost.h @@ -19,6 +19,9 @@ extern "C" { extern const unsigned int vp9_prob_cost[256]; +// The factor to scale from cost in bits to cost in vp9_prob_cost units. +#define VP9_PROB_COST_SHIFT 8 + #define vp9_cost_zero(prob) (vp9_prob_cost[prob]) #define vp9_cost_one(prob) vp9_cost_zero(vpx_complement(prob)) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index b1d8dc33d..916f6f9bf 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1104,7 +1104,7 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/, }; - ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]]; + ++cpi->mode_chosen_counts[kf_mode_index[mi->mode]]; } else { // Note how often each mode chosen as best ++cpi->mode_chosen_counts[ctx->best_mode_index]; @@ -1754,7 +1754,9 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td, } } - if (cm->use_prev_frame_mvs) { + if (cm->use_prev_frame_mvs || + (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 + && cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) { MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; int w, h; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 42e265287..028d1da4d 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -50,7 +50,9 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { pd->dst.buf, pd->dst.stride); } -#define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF) +#define RDTRUNC(RM, DM, R, D) \ + (((1 << (VP9_PROB_COST_SHIFT - 1)) + (R) * (RM)) & \ + ((1 << VP9_PROB_COST_SHIFT) - 1)) typedef struct vp9_token_state { int rate; diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index f31ada919..8f4d80cbb 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c @@ -138,7 +138,8 @@ static int update_mv(vpx_writer *w, const unsigned int ct[2], vpx_prob *cur_p, vpx_prob upd_p) { const vpx_prob new_p = get_binary_prob(ct[0], ct[1]) | 1; const int update = cost_branch256(ct, *cur_p) + vp9_cost_zero(upd_p) > - cost_branch256(ct, new_p) + vp9_cost_one(upd_p) + 7 * 256; + cost_branch256(ct, new_p) + vp9_cost_one(upd_p) + + (7 << VP9_PROB_COST_SHIFT); vpx_write(w, update, upd_p); if (update) { *cur_p = new_p; diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 039ed3347..8c646938c 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -3318,6 +3318,11 @@ static void encode_without_recode_loop(VP9_COMP *cpi, cpi->oxcf.content == VP9E_CONTENT_SCREEN) vp9_avg_source_sad(cpi); + // TODO(wonkap/marpan): For 1 pass SVC, since only ZERMOV is allowed for + // upsampled reference frame (i.e, svc->force_zero_mode_spatial_ref = 0), + // we should be able to avoid this frame-level upsampling. + // Keeping it for now as there is an asan error in the multi-threaded SVC + // rate control test if this upsampling is removed. if (frame_is_intra_only(cm) == 0) { vp9_scale_references(cpi); } diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index d00e6b965..755323dc6 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -40,12 +40,13 @@ typedef struct { int in_use; } PRED_BUFFER; -static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x, +static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm, + const MACROBLOCK *x, const MACROBLOCKD *xd, const TileInfo *const tile, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, - int mi_row, int mi_col) { + int_mv *mv_ref_list, int_mv *base_mv, + int mi_row, int mi_col, int use_base_mv) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; @@ -109,6 +110,20 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x, } } } + if (use_base_mv && + !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && + ref_frame == LAST_FRAME) { + // Get base layer mv. + MV_REF *candidate = + &cm->prev_frame->mvs[(mi_col>>1) + (mi_row>>1) * (cm->mi_cols>>1)]; + if (candidate->mv[0].as_int != INVALID_MV) { + base_mv->as_mv.row = (candidate->mv[0].as_mv.row * 2); + base_mv->as_mv.col = (candidate->mv[0].as_mv.col * 2); + clamp_mv_ref(&base_mv->as_mv, xd); + } else { + base_mv->as_int = INVALID_MV; + } + } Done: @@ -124,7 +139,7 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x, static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, - int64_t best_rd_sofar) { + int64_t best_rd_sofar, int use_base_mv) { MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; @@ -133,6 +148,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, MV mvp_full; const int ref = mi->ref_frame[0]; const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv; + MV center_mv; int dis; int rate_mode; const int tmp_col_min = x->mv_col_min; @@ -163,9 +179,14 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.col >>= 3; mvp_full.row >>= 3; + if (!use_base_mv) + center_mv = ref_mv; + else + center_mv = tmp_mv->as_mv; + vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, cond_cost_list(cpi, cost_list), - &ref_mv, &tmp_mv->as_mv, INT_MAX, 0); + ¢er_mv, &tmp_mv->as_mv, INT_MAX, 0); x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; @@ -681,8 +702,8 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist, } if (*skippable == 0) { - *rate <<= 10; - *rate += (eob_cost << 8); + *rate <<= (2 + VP9_PROB_COST_SHIFT); + *rate += (eob_cost << VP9_PROB_COST_SHIFT); } } #endif @@ -1085,8 +1106,50 @@ int set_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize) { cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac; } +static INLINE void find_predictors(VP9_COMP *cpi, MACROBLOCK *x, + MV_REFERENCE_FRAME ref_frame, + int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], + int const_motion[MAX_REF_FRAMES], + int *ref_frame_skip_mask, + const int flag_list[4], + TileDataEnc *tile_data, + int mi_row, int mi_col, + struct buf_2d yv12_mb[4][MAX_MB_PLANE], + BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); + TileInfo *const tile_info = &tile_data->tile_info; // TODO(jingning) placeholder for inter-frame non-RD mode decision. + x->pred_mv_sad[ref_frame] = INT_MAX; + frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; + frame_mv[ZEROMV][ref_frame].as_int = 0; // this needs various further optimizations. to be continued.. + if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) { + int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame]; + const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; + vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, + sf, sf); + if (cm->use_prev_frame_mvs) + vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame, + candidates, mi_row, mi_col, + x->mbmi_ext->mode_context); + else + const_motion[ref_frame] = + mv_refs_rt(cpi, cm, x, xd, tile_info, xd->mi[0], ref_frame, + candidates, &frame_mv[NEWMV][ref_frame], mi_row, mi_col, + (int)(cpi->svc.use_base_mv && cpi->svc.spatial_layer_id)); + vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, + &frame_mv[NEARESTMV][ref_frame], + &frame_mv[NEARMV][ref_frame]); + if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8) { + vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, + ref_frame, bsize); + } + } else { + *ref_frame_skip_mask |= (1 << ref_frame); + } +} void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int mi_row, int mi_col, RD_COST *rd_cost, @@ -1094,7 +1157,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; const SVC *const svc = &cpi->svc; - TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; struct macroblockd_plane *const pd = &xd->plane[0]; @@ -1113,7 +1175,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, unsigned int var_y = UINT_MAX; unsigned int sse_y = UINT_MAX; const int intra_cost_penalty = set_intra_cost_penalty(cpi, bsize); - const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv, + int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv, intra_cost_penalty, 0); const int *const rd_threshes = cpi->rd.threshes[mi->segment_id][bsize]; const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize]; @@ -1144,6 +1206,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int best_early_term = 0; int ref_frame_cost[MAX_REF_FRAMES]; int svc_force_zero_mode[3] = {0}; + int perform_intra_pred = 1; #if CONFIG_VP9_TEMPORAL_DENOISING int64_t zero_last_cost_orig = INT64_MAX; #endif @@ -1209,38 +1272,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) { - const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); - - x->pred_mv_sad[ref_frame] = INT_MAX; - frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; - frame_mv[ZEROMV][ref_frame].as_int = 0; - - if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) { - int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame]; - const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; - - vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, - sf, sf); - - if (cm->use_prev_frame_mvs) - vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame, - candidates, mi_row, mi_col, x->mbmi_ext->mode_context); - else - const_motion[ref_frame] = mv_refs_rt(cm, x, xd, tile_info, - xd->mi[0], - ref_frame, candidates, - mi_row, mi_col); - - vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, - &frame_mv[NEARESTMV][ref_frame], - &frame_mv[NEARMV][ref_frame]); - - if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8) - vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, - ref_frame, bsize); - } else { - ref_frame_skip_mask |= (1 << ref_frame); - } + find_predictors(cpi, x, ref_frame, frame_mv, const_motion, + &ref_frame_skip_mask, flag_list, tile_data, mi_row, mi_col, + yv12_mb, bsize); } for (idx = 0; idx < RT_INTER_MODES; ++idx) { @@ -1330,8 +1364,36 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0); + } else if (svc->use_base_mv && svc->spatial_layer_id) { + if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV && + frame_mv[NEWMV][ref_frame].as_int != 0) { + const int pre_stride = xd->plane[0].pre[0].stride; + int base_mv_sad = INT_MAX; + const uint8_t * const pre_buf = xd->plane[0].pre[0].buf + + (frame_mv[NEWMV][ref_frame].as_mv.row >> 3) * pre_stride + + (frame_mv[NEWMV][ref_frame].as_mv.col >> 3); + base_mv_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, + x->plane[0].src.stride, + pre_buf, pre_stride); + + // TODO(wonkap): make the decision to use base layer mv on RD; + // not just SAD. + if (base_mv_sad < x->pred_mv_sad[ref_frame]) { + // Base layer mv is good. + if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 1)) { + continue; + } + } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 0)) { + continue; + } + } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 0)) { + continue; + } } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost)) { + &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 0)) { continue; } } @@ -1593,11 +1655,20 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->mi[0]->bmi[0].as_mv[0].as_int = mi->mv[0].as_int; x->skip_txfm[0] = best_mode_skip_txfm; + // Perform intra prediction only if base layer is chosen as the reference. + if (cpi->svc.spatial_layer_id) { + perform_intra_pred = + cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame || + (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame + && svc_force_zero_mode[best_ref_frame]); + inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh; + } // Perform intra prediction search, if the best SAD is above a certain // threshold. - if (best_rdc.rdcost == INT64_MAX || + if (perform_intra_pred && + ((best_rdc.rdcost == INT64_MAX || (!x->skip && best_rdc.rdcost > inter_mode_thresh && - bsize <= cpi->sf.max_intra_bsize)) { + bsize <= cpi->sf.max_intra_bsize)))) { struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 }; int i; TX_SIZE best_intra_tx_size = TX_SIZES; diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 78718fe53..ad2027c1c 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -409,7 +409,7 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var; const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10); model_rd_norm(xsq_q10, &r_q10, &d_q10); - *rate = ((r_q10 << n_log2) + 2) >> 2; + *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT); *dist = (var * (int64_t)d_q10 + 512) >> 10; } } diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index 28385c981..5e6e773a1 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -17,6 +17,7 @@ #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_context_tree.h" +#include "vp9/encoder/vp9_cost.h" #ifdef __cplusplus extern "C" { @@ -25,7 +26,7 @@ extern "C" { #define RDDIV_BITS 7 #define RDCOST(RM, DM, R, D) \ - (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM)) + (ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM)) #define QIDX_SKIP_THRESH 115 #define MV_COST_WEIGHT 108 diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index c48b4ecc5..ff3112ce9 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -248,7 +248,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int quantizer = (pd->dequant[1] >> dequant_shift); if (quantizer < 120) - rate = (square_error * (280 - quantizer)) >> 8; + rate = (square_error * (280 - quantizer)) >> (16 - VP9_PROB_COST_SHIFT); else rate = 0; dist = (square_error * quantizer) >> 8; diff --git a/vp9/encoder/vp9_subexp.c b/vp9/encoder/vp9_subexp.c index 7aa8fc3f6..1a8719940 100644 --- a/vp9/encoder/vp9_subexp.c +++ b/vp9/encoder/vp9_subexp.c @@ -80,7 +80,7 @@ static int remap_prob(int v, int m) { static int prob_diff_update_cost(vpx_prob newp, vpx_prob oldp) { int delp = remap_prob(newp, oldp); - return update_bits[delp] * 256; + return update_bits[delp] << VP9_PROB_COST_SHIFT; } static void encode_uniform(vpx_writer *w, int v) { diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 30a7d1013..e0236aa6e 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -33,6 +33,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->first_spatial_layer_to_encode = 0; svc->rc_drop_superframe = 0; svc->force_zero_mode_spatial_ref = 0; + svc->use_base_mv = 0; svc->current_superframe = 0; for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1; @@ -416,7 +417,9 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { cpi->ref_frame_flags = VP9_LAST_FLAG; } else if (cpi->svc.layer_context[temporal_id].is_key_frame) { // base layer is a key frame. - cpi->ref_frame_flags = VP9_GOLD_FLAG; + cpi->ref_frame_flags = VP9_LAST_FLAG; + cpi->ext_refresh_last_frame = 0; + cpi->ext_refresh_golden_frame = 1; } else { cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } @@ -431,7 +434,13 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { } else { if (frame_num_within_temporal_struct == 1) { // the first tl2 picture - if (!spatial_id) { + if (spatial_id == cpi->svc.number_spatial_layers - 1) { // top layer + cpi->ext_refresh_frame_flags_pending = 1; + if (!spatial_id) + cpi->ref_frame_flags = VP9_LAST_FLAG; + else + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } else if (!spatial_id) { cpi->ext_refresh_frame_flags_pending = 1; cpi->ext_refresh_alt_ref_frame = 1; cpi->ref_frame_flags = VP9_LAST_FLAG; @@ -439,32 +448,38 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { cpi->ext_refresh_frame_flags_pending = 1; cpi->ext_refresh_alt_ref_frame = 1; cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; - } else { // Top layer - cpi->ext_refresh_frame_flags_pending = 0; - cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } } else { // The second tl2 picture - if (!spatial_id) { + if (spatial_id == cpi->svc.number_spatial_layers - 1) { // top layer + cpi->ext_refresh_frame_flags_pending = 1; + if (!spatial_id) + cpi->ref_frame_flags = VP9_LAST_FLAG; + else + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } else if (!spatial_id) { cpi->ext_refresh_frame_flags_pending = 1; cpi->ref_frame_flags = VP9_LAST_FLAG; - cpi->ext_refresh_last_frame = 1; - } else if (spatial_id < cpi->svc.number_spatial_layers - 1) { + cpi->ext_refresh_alt_ref_frame = 1; + } else { // top layer cpi->ext_refresh_frame_flags_pending = 1; cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; - cpi->ext_refresh_last_frame = 1; - } else { // top layer - cpi->ext_refresh_frame_flags_pending = 0; - cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + cpi->ext_refresh_alt_ref_frame = 1; } } } if (temporal_id == 0) { cpi->lst_fb_idx = spatial_id; - if (spatial_id) + if (spatial_id) { + if (cpi->svc.layer_context[temporal_id].is_key_frame) { + cpi->lst_fb_idx = spatial_id - 1; + cpi->gld_fb_idx = spatial_id; + } else { cpi->gld_fb_idx = spatial_id - 1; - else + } + } else { cpi->gld_fb_idx = 0; + } cpi->alt_fb_idx = 0; } else if (temporal_id == 1) { cpi->lst_fb_idx = spatial_id; @@ -477,7 +492,7 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { } else { cpi->lst_fb_idx = cpi->svc.number_spatial_layers + spatial_id; cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; - cpi->alt_fb_idx = 0; + cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; } } @@ -499,7 +514,9 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) { cpi->ref_frame_flags = VP9_LAST_FLAG; } else if (cpi->svc.layer_context[temporal_id].is_key_frame) { // base layer is a key frame. - cpi->ref_frame_flags = VP9_GOLD_FLAG; + cpi->ref_frame_flags = VP9_LAST_FLAG; + cpi->ext_refresh_last_frame = 0; + cpi->ext_refresh_golden_frame = 1; } else { cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } @@ -515,10 +532,16 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) { if (temporal_id == 0) { cpi->lst_fb_idx = spatial_id; - if (spatial_id) + if (spatial_id) { + if (cpi->svc.layer_context[temporal_id].is_key_frame) { + cpi->lst_fb_idx = spatial_id - 1; + cpi->gld_fb_idx = spatial_id; + } else { cpi->gld_fb_idx = spatial_id - 1; - else + } + } else { cpi->gld_fb_idx = 0; + } cpi->alt_fb_idx = 0; } else if (temporal_id == 1) { cpi->lst_fb_idx = spatial_id; @@ -540,20 +563,30 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering( if (!spatial_id) { cpi->ref_frame_flags = VP9_LAST_FLAG; } else if (cpi->svc.layer_context[0].is_key_frame) { - cpi->ref_frame_flags = VP9_GOLD_FLAG; + cpi->ref_frame_flags = VP9_LAST_FLAG; + cpi->ext_refresh_last_frame = 0; + cpi->ext_refresh_golden_frame = 1; } else { cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } cpi->lst_fb_idx = spatial_id; - if (spatial_id) + if (spatial_id) { + if (cpi->svc.layer_context[0].is_key_frame) { + cpi->lst_fb_idx = spatial_id - 1; + cpi->gld_fb_idx = spatial_id; + } else { cpi->gld_fb_idx = spatial_id - 1; - else + } + } else { cpi->gld_fb_idx = 0; + } } int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { int width = 0, height = 0; LAYER_CONTEXT *lc = NULL; + if (cpi->svc.number_spatial_layers > 1) + cpi->svc.use_base_mv = 1; cpi->svc.force_zero_mode_spatial_ref = 1; if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 1f446d743..4e186401f 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -86,6 +86,7 @@ typedef struct { int ref_frame_index[REF_FRAMES]; int force_zero_mode_spatial_ref; int current_superframe; + int use_base_mv; } SVC; struct VP9_COMP; diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 666f68a15..a6dd3c59f 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -428,25 +428,21 @@ static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, } static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree, - int32_t extra, uint8_t token, - uint8_t skip_eob_node, + int16_t token, EXTRABIT extra, unsigned int *counts) { + (*t)->context_tree = context_tree; (*t)->token = token; (*t)->extra = extra; - (*t)->context_tree = context_tree; - (*t)->skip_eob_node = skip_eob_node; (*t)++; ++counts[token]; } static INLINE void add_token_no_extra(TOKENEXTRA **t, const vpx_prob *context_tree, - uint8_t token, - uint8_t skip_eob_node, + int16_t token, unsigned int *counts) { - (*t)->token = token; (*t)->context_tree = context_tree; - (*t)->skip_eob_node = skip_eob_node; + (*t)->token = token; (*t)++; ++counts[token]; } @@ -501,15 +497,13 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, while (c < eob) { int v = 0; - int skip_eob = 0; v = qcoeff[scan[c]]; + ++eob_branch[band[c]][pt]; while (!v) { - add_token_no_extra(&t, coef_probs[band[c]][pt], ZERO_TOKEN, skip_eob, + add_token_no_extra(&t, coef_probs[band[c]][pt], ZERO_TOKEN, counts[band[c]][pt]); - eob_branch[band[c]][pt] += !skip_eob; - skip_eob = 1; token_cache[scan[c]] = 0; ++c; pt = get_coef_context(nb, token_cache, c); @@ -518,18 +512,17 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, vp9_get_token_extra(v, &token, &extra); - add_token(&t, coef_probs[band[c]][pt], extra, (uint8_t)token, - (uint8_t)skip_eob, counts[band[c]][pt]); - eob_branch[band[c]][pt] += !skip_eob; + add_token(&t, coef_probs[band[c]][pt], token, extra, + counts[band[c]][pt]); token_cache[scan[c]] = vp9_pt_energy_class[token]; ++c; pt = get_coef_context(nb, token_cache, c); } if (c < seg_eob) { - add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, 0, - counts[band[c]][pt]); ++eob_branch[band[c]][pt]; + add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, + counts[band[c]][pt]); } *tp = t; diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index c0f09c7b2..26da6fef1 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -36,9 +36,8 @@ typedef struct { typedef struct { const vpx_prob *context_tree; + int16_t token; EXTRABIT extra; - uint8_t token; - uint8_t skip_eob_node; } TOKENEXTRA; extern const vpx_tree_index vp9_coef_tree[]; diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c index 5c3fe93fd..628afca31 100644 --- a/vpx/src/svc_encodeframe.c +++ b/vpx/src/svc_encodeframe.c @@ -322,8 +322,7 @@ void assign_layer_bitrates(const SvcContext *svc_ctx, for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { if (si->svc_params.scaling_factor_den[sl] > 0) { - alloc_ratio[sl] = (float)(si->svc_params.scaling_factor_num[sl] * - 1.0 / si->svc_params.scaling_factor_den[sl]); + alloc_ratio[sl] = (float)( (sl+1) ); total += alloc_ratio[sl]; } } @@ -334,9 +333,9 @@ void assign_layer_bitrates(const SvcContext *svc_ctx, alloc_ratio[sl] / total); if (svc_ctx->temporal_layering_mode == 3) { enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] = - spatial_layer_target >> 1; + (spatial_layer_target*6)/10; // 60% enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] = - (spatial_layer_target >> 1) + (spatial_layer_target >> 2); + (spatial_layer_target*8)/10; // 80% enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] = spatial_layer_target; } else if (svc_ctx->temporal_layering_mode == 2 || @@ -398,11 +397,13 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, si->width = enc_cfg->g_w; si->height = enc_cfg->g_h; - if (enc_cfg->kf_max_dist < 2) { +// wonkap: why is this necessary? + /*if (enc_cfg->kf_max_dist < 2) { svc_log(svc_ctx, SVC_LOG_ERROR, "key frame distance too small: %d\n", enc_cfg->kf_max_dist); return VPX_CODEC_INVALID_PARAM; - } + }*/ + si->kf_dist = enc_cfg->kf_max_dist; if (svc_ctx->spatial_layers == 0) @@ -577,6 +578,27 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, } #endif #endif + case VPX_CODEC_PSNR_PKT: + { +#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION) + int j; + svc_log(svc_ctx, SVC_LOG_DEBUG, + "frame: %d, layer: %d, PSNR(Total/Y/U/V): " + "%2.3f %2.3f %2.3f %2.3f \n", + si->psnr_pkt_received, 0, + cx_pkt->data.layer_psnr[0].psnr[0], + cx_pkt->data.layer_psnr[0].psnr[1], + cx_pkt->data.layer_psnr[0].psnr[2], + cx_pkt->data.layer_psnr[0].psnr[3]); + for (j = 0; j < COMPONENTS; ++j) { + si->psnr_sum[0][j] += + cx_pkt->data.layer_psnr[0].psnr[j]; + si->sse_sum[0][j] += cx_pkt->data.layer_psnr[0].sse[j]; + } +#endif + } + ++si->psnr_pkt_received; + break; default: { break; }