Enable sub8x8 inter block search for RTC coding mode

This commit enables sub8x8 inter block coding for RTC mode. The
use of sub8x8 blocks can be turned on by allowing
choose_partitioning function to select 4x4/4x8/8x4 block sizes.

Change-Id: Ifbf1fb3888fe4c094fc85158ac3aa89867d8494a
This commit is contained in:
Jingning Han 2014-12-22 13:38:34 -08:00
parent d0f2377027
commit dad89d5ca1
6 changed files with 316 additions and 42 deletions

View File

@ -1410,6 +1410,11 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
const int pred_ctx = vp9_get_pred_context_switchable_interp(xd);
++td->counts->switchable_interp[pred_ctx][mbmi->interp_filter];
}
if (mbmi->sb_type < BLOCK_8X8) {
mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
}
}
if (cm->use_prev_frame_mvs) {
@ -2705,9 +2710,12 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi,
hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
else if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
else
else if (bsize >= BLOCK_8X8)
vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col,
rd_cost, bsize, ctx);
else
vp9_pick_inter_mode_sub8x8(cpi, x, tile_data, mi_row, mi_col,
rd_cost, bsize, ctx);
duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
@ -3312,9 +3320,10 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi,
// Set the partition type of the 64X64 block
switch (sf->partition_search_type) {
case VAR_BASED_PARTITION:
// TODO(jingning) Only key frame coding supports sub8x8 block at this
// point. To be continued to enable sub8x8 block mode decision for
// P frames.
// TODO(jingning, marpan): The mode decision and encoding process
// support both intra and inter sub8x8 block coding for RTC mode.
// Tune the thresholds accordingly to use sub8x8 block coding for
// coding performance improvement.
choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, 1, &dummy_rdc, td->pc_root);

View File

@ -946,3 +946,253 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
*rd_cost = best_rdc;
}
void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
TileDataEnc *tile_data,
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
TileInfo *const tile_info = &tile_data->tile_info;
SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
const struct segmentation *const seg = &cm->seg;
MV_REFERENCE_FRAME ref_frame, second_ref_frame = NONE;
MV_REFERENCE_FRAME best_ref_frame = NONE;
unsigned char segment_id = mbmi->segment_id;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
int64_t best_rd = INT64_MAX;
b_mode_info bsi[MAX_REF_FRAMES][4];
int ref_frame_skip_mask = 0;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
ctx->pred_pixel_ready = 0;
for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ref_frame++) {
x->pred_mv_sad[ref_frame] = INT_MAX;
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
int_mv *const candidates = mbmi->ref_mvs[ref_frame];
const struct scale_factors *const sf =
&cm->frame_refs[ref_frame - 1].sf;
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
sf, sf);
vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0].src_mi, ref_frame,
candidates, mi_row, mi_col);
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
&frame_mv[NEARESTMV][ref_frame],
&frame_mv[NEARMV][ref_frame]);
} else {
ref_frame_skip_mask |= (1 << ref_frame);
}
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
}
mbmi->sb_type = bsize;
mbmi->tx_size = TX_4X4;
mbmi->uv_mode = DC_PRED;
mbmi->ref_frame[0] = LAST_FRAME;
mbmi->ref_frame[1] = NONE;
mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
: cm->interp_filter;
for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
int64_t this_rd = 0;
int plane;
if (ref_frame_skip_mask & (1 << ref_frame))
continue;
// TODO(jingning, agrange): Scaling reference frame not supported for
// sub8x8 blocks. Is this supported now?
if (ref_frame > INTRA_FRAME &&
vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
continue;
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
continue;
mbmi->ref_frame[0] = ref_frame;
x->skip = 0;
set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
// Select prediction reference frames.
for (plane = 0; plane < MAX_MB_PLANE; plane++)
xd->plane[plane].pre[0] = yv12_mb[ref_frame][plane];
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
int64_t b_best_rd = INT64_MAX;
const int i = idy * 2 + idx;
PREDICTION_MODE this_mode;
int b_rate = 0;
int64_t b_dist = 0;
RD_COST this_rdc;
unsigned int var_y, sse_y;
struct macroblock_plane *p = &x->plane[0];
struct macroblockd_plane *pd = &xd->plane[0];
const struct buf_2d orig_src = p->src;
const struct buf_2d orig_dst = pd->dst;
struct buf_2d orig_pre[2];
vpx_memcpy(orig_pre, xd->plane[0].pre, sizeof(orig_pre));
// set buffer pointers for sub8x8 motion search.
p->src.buf =
&p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
pd->dst.buf =
&pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)];
pd->pre[0].buf =
&pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8,
i, pd->pre[0].stride)];
frame_mv[ZEROMV][ref_frame].as_int = 0;
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile_info, i, 0, mi_row, mi_col,
&frame_mv[NEARESTMV][ref_frame],
&frame_mv[NEARMV][ref_frame]);
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
xd->mi[0].bmi[i].as_mv[0].as_int =
frame_mv[this_mode][ref_frame].as_int;
if (this_mode == NEWMV) {
const int step_param = cpi->sf.mv.fullpel_search_step_param;
MV mvp_full;
MV tmp_mv;
int cost_list[5];
const int tmp_col_min = x->mv_col_min;
const int tmp_col_max = x->mv_col_max;
const int tmp_row_min = x->mv_row_min;
const int tmp_row_max = x->mv_row_max;
int dummy_dist;
if (i == 0) {
mvp_full.row = frame_mv[NEARESTMV][ref_frame].as_mv.row >> 3;
mvp_full.col = frame_mv[NEARESTMV][ref_frame].as_mv.col >> 3;
} else {
mvp_full.row = xd->mi[0].bmi[0].as_mv[0].as_mv.row >> 3;
mvp_full.col = xd->mi[0].bmi[0].as_mv[0].as_mv.col >> 3;
}
vp9_set_mv_search_range(x, &mbmi->ref_mvs[0]->as_mv);
vp9_full_pixel_search(
cpi, x, bsize, &mvp_full, step_param, x->sadperbit4,
cond_cost_list(cpi, cost_list),
&mbmi->ref_mvs[ref_frame][0].as_mv, &tmp_mv,
INT_MAX, 0);
x->mv_col_min = tmp_col_min;
x->mv_col_max = tmp_col_max;
x->mv_row_min = tmp_row_min;
x->mv_row_max = tmp_row_max;
// calculate the bit cost on motion vector
mvp_full.row = tmp_mv.row * 8;
mvp_full.col = tmp_mv.col * 8;
b_rate += vp9_mv_bit_cost(&mvp_full,
&mbmi->ref_mvs[ref_frame][0].as_mv,
x->nmvjointcost, x->mvcost,
MV_COST_WEIGHT);
b_rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
[INTER_OFFSET(NEWMV)];
if (RDCOST(x->rdmult, x->rddiv, b_rate, 0) > b_best_rd)
continue;
cpi->find_fractional_mv_step(x, &tmp_mv,
&mbmi->ref_mvs[ref_frame][0].as_mv,
cpi->common.allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[bsize],
cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step,
cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost,
&dummy_dist,
&x->pred_sse[ref_frame], NULL, 0, 0);
xd->mi[0].bmi[i].as_mv[0].as_mv = tmp_mv;
}
vp9_build_inter_predictor(pd->pre[0].buf, pd->pre[0].stride,
pd->dst.buf, pd->dst.stride,
&xd->mi[0].bmi[i].as_mv[0].as_mv,
&xd->block_refs[0]->sf,
4 * num_4x4_blocks_wide,
4 * num_4x4_blocks_high, 0,
vp9_get_interp_kernel(mbmi->interp_filter),
MV_PRECISION_Q3,
mi_col * MI_SIZE + 4 * (i & 0x01),
mi_row * MI_SIZE + 4 * (i >> 1));
model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
&var_y, &sse_y);
this_rdc.rate += b_rate;
this_rdc.dist += b_dist;
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
this_rdc.rate, this_rdc.dist);
if (this_rdc.rdcost < b_best_rd) {
b_best_rd = this_rdc.rdcost;
bsi[ref_frame][i].as_mode = this_mode;
bsi[ref_frame][i].as_mv[0].as_mv = xd->mi[0].bmi[i].as_mv[0].as_mv;
}
} // mode search
// restore source and prediction buffer pointers.
p->src = orig_src;
pd->pre[0] = orig_pre[0];
pd->dst = orig_dst;
this_rd += b_best_rd;
xd->mi[0].bmi[i] = bsi[ref_frame][i];
if (num_4x4_blocks_wide > 1)
xd->mi[0].bmi[i + 1] = xd->mi[0].bmi[i];
if (num_4x4_blocks_high > 1)
xd->mi[0].bmi[i + 2] = xd->mi[0].bmi[i];
}
} // loop through sub8x8 blocks
if (this_rd < best_rd) {
this_rd = best_rd;
best_ref_frame = ref_frame;
}
} // reference frames
mbmi->tx_size = TX_4X4;
mbmi->ref_frame[0] = best_ref_frame;
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
const int block = idy * 2 + idx;
xd->mi[0].bmi[block] = bsi[best_ref_frame][block];
if (num_4x4_blocks_wide > 1)
xd->mi[0].bmi[block + 1] = bsi[best_ref_frame][block];
if (num_4x4_blocks_high > 1)
xd->mi[0].bmi[block + 2] = bsi[best_ref_frame][block];
}
}
mbmi->mode = xd->mi[0].bmi[3].as_mode;
ctx->mic = *(xd->mi[0].src_mi);
ctx->skip_txfm[0] = 0;
ctx->skip = 0;
// Dummy assignment for speed -5. No effect in speed -6.
rd_cost->rdcost = best_rd;
}

View File

@ -26,6 +26,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx);
void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
TileDataEnc *tile_data,
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx);
#ifdef __cplusplus
} // extern "C"
#endif

View File

@ -516,6 +516,20 @@ void vp9_setup_pred_block(const MACROBLOCKD *xd,
}
}
int vp9_raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride) {
const int bw = b_width_log2_lookup[plane_bsize];
const int y = 4 * (raster_block >> bw);
const int x = 4 * (raster_block & ((1 << bw) - 1));
return y * stride + x;
}
int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base) {
const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
}
const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
int ref_frame) {
const VP9_COMMON *const cm = &cpi->common;

View File

@ -141,6 +141,12 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
int vp9_get_switchable_rate(const struct VP9_COMP *cpi,
const MACROBLOCKD *const xd);
int vp9_raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride);
int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base);
const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi,
int ref_frame);

View File

@ -129,19 +129,6 @@ static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
{{INTRA_FRAME, NONE}},
};
static int raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride) {
const int bw = b_width_log2_lookup[plane_bsize];
const int y = 4 * (raster_block >> bw);
const int x = 4 * (raster_block & ((1 << bw) - 1));
return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base) {
const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
return base + raster_block_offset(plane_bsize, raster_block, stride);
}
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int m, int n, int min_plane, int max_plane) {
int i;
@ -761,10 +748,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
struct macroblockd_plane *pd = &xd->plane[0];
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
src_stride)];
uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
dst_stride)];
const uint8_t *src_init = &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, ib,
src_stride)];
uint8_t *dst_init = &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, ib,
dst_stride)];
ENTROPY_CONTEXT ta[2], tempa[2];
ENTROPY_CONTEXT tl[2], templ[2];
@ -808,8 +795,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
const int block = ib + idy * 2 + idx;
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
p->src_diff);
int16_t *const src_diff = vp9_raster_block_offset_int16(BLOCK_8X8,
block,
p->src_diff);
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
xd->mi[0].src_mi->bmi[block].as_mode = mode;
vp9_predict_intra_block(xd, block, 1,
@ -908,8 +896,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
const int block = ib + idy * 2 + idx;
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
p->src_diff);
int16_t *const src_diff =
vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
xd->mi[0].src_mi->bmi[block].as_mode = mode;
vp9_predict_intra_block(xd, block, 1,
@ -1341,10 +1329,10 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
int idx, idy;
const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
p->src.stride)];
uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
pd->dst.stride)];
const uint8_t *const src =
&p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
uint8_t *const dst = &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i,
pd->dst.stride)];
int64_t thisdistortion = 0, thissse = 0;
int thisrate = 0, ref;
const scan_order *so = &vp9_default_scan_orders[TX_4X4];
@ -1352,7 +1340,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
for (ref = 0; ref < 1 + is_compound; ++ref) {
const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
const uint8_t *pre = &pd->pre[ref].buf[vp9_raster_block_offset(BLOCK_8X8, i,
pd->pre[ref].stride)];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@ -1386,17 +1374,17 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_highbd_subtract_block(
height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
src, p->src.stride, dst, pd->dst.stride, xd->bd);
height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
} else {
vp9_subtract_block(
height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
src, p->src.stride, dst, pd->dst.stride);
height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
8, src, p->src.stride, dst, pd->dst.stride);
}
#else
vp9_subtract_block(height, width,
raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
src, p->src.stride, dst, pd->dst.stride);
vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
8, src, p->src.stride, dst, pd->dst.stride);
#endif // CONFIG_VP9_HIGHBITDEPTH
k = i;
@ -1407,7 +1395,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
k += (idy * 2 + idx);
coeff = BLOCK_OFFSET(p->coeff, k);
x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
x->fwd_txm4x4(vp9_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
coeff, 8);
vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
#if CONFIG_VP9_HIGHBITDEPTH
@ -1480,13 +1468,14 @@ static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
p->src.buf = &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i,
p->src.stride)];
assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
pd->pre[0].stride)];
pd->pre[0].buf = &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i,
pd->pre[0].stride)];
if (has_second_ref(mbmi))
pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
pd->pre[1].stride)];
pd->pre[1].buf = &pd->pre[1].buf[vp9_raster_block_offset(BLOCK_8X8, i,
pd->pre[1].stride)];
}
static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,