diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index ae9f0aaa7..b4c06f5dd 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -143,6 +143,11 @@ struct macroblock { int rd_search; int skip_encode; + // Used to store sub partition's choices. + int fast_ms; + int_mv pred_mv; + int subblock_ref; + // TODO(jingning): Need to refactor the structure arrays that buffers the // coding mode decisions of each partition type. PICK_MODE_CONTEXT ab4x4_context[4][4][4]; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 48c1b3361..3dd235a1f 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1466,6 +1466,138 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); } } + + x->fast_ms = 0; + x->pred_mv.as_int = 0; + x->subblock_ref = 0; + + // Use 4 subblocks' motion estimation results to speed up current + // partition's checking. + if (cpi->sf.using_small_partition_info) { + // Only use 8x8 result for non HD videos. + // int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0; + int use_8x8 = 1; + + if (cm->frame_type && !cpi->is_src_frame_alt_ref && + ((use_8x8 && bsize == BLOCK_SIZE_MB16X16) || + bsize == BLOCK_SIZE_SB32X32 || bsize == BLOCK_SIZE_SB64X64)) { + int ref0 = 0, ref1 = 0, ref2 = 0, ref3 = 0; + + if (bsize == BLOCK_SIZE_MB16X16) { + ref0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi. + ref_frame[0]; + ref1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi. + ref_frame[0]; + ref2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi. + ref_frame[0]; + ref3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi. + ref_frame[0]; + } else if (bsize == BLOCK_SIZE_SB32X32) { + ref0 = x->mb_context[xd->sb_index][0].mic.mbmi.ref_frame[0]; + ref1 = x->mb_context[xd->sb_index][1].mic.mbmi.ref_frame[0]; + ref2 = x->mb_context[xd->sb_index][2].mic.mbmi.ref_frame[0]; + ref3 = x->mb_context[xd->sb_index][3].mic.mbmi.ref_frame[0]; + } else if (bsize == BLOCK_SIZE_SB64X64) { + ref0 = x->sb32_context[0].mic.mbmi.ref_frame[0]; + ref1 = x->sb32_context[1].mic.mbmi.ref_frame[0]; + ref2 = x->sb32_context[2].mic.mbmi.ref_frame[0]; + ref3 = x->sb32_context[3].mic.mbmi.ref_frame[0]; + } + + // Currently, only consider 4 inter ref frames. + if (ref0 && ref1 && ref2 && ref3) { + int16_t mvr0 = 0, mvc0 = 0, mvr1 = 0, mvc1 = 0, mvr2 = 0, mvc2 = 0, + mvr3 = 0, mvc3 = 0; + int d01, d23, d02, d13; // motion vector distance between 2 blocks + + // Get each subblock's motion vectors. + if (bsize == BLOCK_SIZE_MB16X16) { + mvr0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0]. + as_mv.row; + mvc0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0]. + as_mv.col; + mvr1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0]. + as_mv.row; + mvc1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0]. + as_mv.col; + mvr2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0]. + as_mv.row; + mvc2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0]. + as_mv.col; + mvr3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0]. + as_mv.row; + mvc3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0]. + as_mv.col; + } else if (bsize == BLOCK_SIZE_SB32X32) { + mvr0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.row; + mvc0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.col; + mvr1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.row; + mvc1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.col; + mvr2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.row; + mvc2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.col; + mvr3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.row; + mvc3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.col; + } else if (bsize == BLOCK_SIZE_SB64X64) { + mvr0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.row; + mvc0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.col; + mvr1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.row; + mvc1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.col; + mvr2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.row; + mvc2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.col; + mvr3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.row; + mvc3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.col; + } + + // Adjust sign if ref is alt_ref + if (cm->ref_frame_sign_bias[ref0]) { + mvr0 *= -1; + mvc0 *= -1; + } + + if (cm->ref_frame_sign_bias[ref1]) { + mvr1 *= -1; + mvc1 *= -1; + } + + if (cm->ref_frame_sign_bias[ref2]) { + mvr2 *= -1; + mvc2 *= -1; + } + + if (cm->ref_frame_sign_bias[ref3]) { + mvr3 *= -1; + mvc3 *= -1; + } + + // Calculate mv distances. + d01 = MAX(abs(mvr0 - mvr1), abs(mvc0 - mvc1)); + d23 = MAX(abs(mvr2 - mvr3), abs(mvc2 - mvc3)); + d02 = MAX(abs(mvr0 - mvr2), abs(mvc0 - mvc2)); + d13 = MAX(abs(mvr1 - mvr3), abs(mvc1 - mvc3)); + + if (d01 < 24 && d23 < 24 && d02 < 24 && d13 < 24) { + // Set fast motion search level. + x->fast_ms = 1; + + // Calculate prediction MV + x->pred_mv.as_mv.row = (mvr0 + mvr1 + mvr2 + mvr3) >> 2; + x->pred_mv.as_mv.col = (mvc0 + mvc1 + mvc2 + mvc3) >> 2; + + if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 && + d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) { + // Set fast motion search level. + x->fast_ms = 2; + + if (!d01 && !d23 && !d02 && !d13) { + x->fast_ms = 3; + x->subblock_ref = ref0; + } + } + } + } + } + } + if (!cpi->sf.use_partitions_less_than || (cpi->sf.use_partitions_less_than && bsize <= cpi->sf.less_than_block_size)) { diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 5ba9a41de..9042111f8 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -723,7 +723,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_rd_breakout = 0; sf->skip_encode_sb = 0; sf->use_uv_intra_rd_estimate = 0; - + sf->using_small_partition_info = 0; // Skip any mode not chosen at size < X for all sizes > X // Hence BLOCK_SIZE_SB64X64 (skip is off) sf->unused_mode_skip_lvl = BLOCK_SIZE_SB64X64; @@ -795,6 +795,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_rd_breakout = 1; sf->skip_encode_sb = 1; sf->use_uv_intra_rd_estimate = 1; + sf->using_small_partition_info = 1; } if (speed == 3) { sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index f63800c8d..19b1e3af5 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -268,6 +268,7 @@ typedef struct { int adjust_partitioning_from_last_frame; int last_partitioning_redo_frequency; int disable_splitmv; + int using_small_partition_info; // Implements various heuristics to skip searching modes // The heuristics selected are based on flags diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 7d44250a2..eac7b2a19 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2334,6 +2334,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; + VP9_COMMON *cm = &cpi->common; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; int bestsme = INT_MAX; @@ -2364,18 +2365,37 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, vp9_clamp_mv_min_max(x, &ref_mv); - // Work out the size of the first step in the mv step search. - // 0 here is maximum length first step. 1 is MAX >> 1 etc. - if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { - step_param = vp9_init_search_range(cpi, cpi->max_mv_magnitude); - } else { - step_param = vp9_init_search_range( - cpi, MIN(cpi->common.width, cpi->common.height)); - } + // Adjust search parameters based on small partitions' result. + if (x->fast_ms) { + // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 && + // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) { + // adjust search range + step_param = 6; + if (x->fast_ms > 1) + step_param = 8; - // mvp_full.as_int = ref_mv[0].as_int; - mvp_full.as_int = - mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int; + // Get prediction MV. + mvp_full.as_int = x->pred_mv.as_int; + + // Adjust MV sign if needed. + if (cm->ref_frame_sign_bias[ref]) { + mvp_full.as_mv.col *= -1; + mvp_full.as_mv.row *= -1; + } + } else { + // Work out the size of the first step in the mv step search. + // 0 here is maximum length first step. 1 is MAX >> 1 etc. + if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { + step_param = vp9_init_search_range(cpi, cpi->max_mv_magnitude); + } else { + step_param = vp9_init_search_range( + cpi, MIN(cpi->common.width, cpi->common.height)); + } + + // mvp_full.as_int = ref_mv[0].as_int; + mvp_full.as_int = + mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int; + } mvp_full.as_mv.col >>= 3; mvp_full.as_mv.row >>= 3; @@ -3113,9 +3133,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } // If intra is not masked off then get uv intra mode rd. - if (!cpi->sf.use_avoid_tested_higherror + if (x->fast_ms < 2 && (!cpi->sf.use_avoid_tested_higherror || (cpi->sf.use_avoid_tested_higherror - && (ref_frame_mask & (1 << INTRA_FRAME)))) { + && (ref_frame_mask & (1 << INTRA_FRAME))))) { // Note that the enumerator TXFM_MODE "matches" TX_SIZE. // Eg. ONLY_4X4 = TX_4X4, ALLOW_8X8 = TX_8X8 etc such that the MIN // operation below correctly constrains max_uvtxfm_size. @@ -3194,6 +3214,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, x->skip = 0; + // Skip some checking based on small partitions' result. + if (x->fast_ms > 1 && !ref_frame) + continue; + if (x->fast_ms > 2 && ref_frame != x->subblock_ref) + continue; + if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_SIZE_SB8X8) { if (!(ref_frame_mask & (1 << ref_frame))) { continue;