Enable bit-stream support to 8x4 and 4x8 partition

The recursive partition type search is enabled down to 4x4, 4x8 and
8x4, followed by the corresponding rate-distortion optimization for
the per-partition encoding mode decisions.

The bit-stream writing/reading synchronized in supporting the
rectangular partition of 8x8 block.

This provides above 1% coding performance gains on derf.

To do next:
1. re-design the rate-distortion loop for inter prediction below 8x8.
2. re-design the rate-distortion loop for intra prediction below 4x4.
3. make the loop-filter aware of rectangular partition of 8x8 block.
4. clean the unused probability models.
5. update default probability values.

Change-Id: Idd41a315b16879db08f045a322241f46f1d53f20
This commit is contained in:
Jingning Han 2013-05-15 22:28:36 -07:00
parent 5f3612c35e
commit 810b612c23
9 changed files with 626 additions and 312 deletions

View File

@ -417,6 +417,7 @@ typedef struct macroblockd {
static int *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) {
switch (subsize) {
case BLOCK_SIZE_SB64X64:
case BLOCK_SIZE_SB64X32:
case BLOCK_SIZE_SB32X64:
case BLOCK_SIZE_SB32X32:
@ -444,10 +445,10 @@ static int *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) {
static INLINE void update_partition_context(MACROBLOCKD *xd,
BLOCK_SIZE_TYPE sb_type,
BLOCK_SIZE_TYPE sb_size) {
int bsl = mi_width_log2(sb_size), bs = 1 << bsl;
int bwl = mi_width_log2(sb_type);
int bhl = mi_height_log2(sb_type);
int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2;
int bwl = b_width_log2(sb_type);
int bhl = b_height_log2(sb_type);
int boffset = b_width_log2(BLOCK_SIZE_SB64X64) - bsl;
int i;
#if !CONFIG_AB4X4

View File

@ -107,10 +107,10 @@ const vp9_prob vp9_partition_probs[NUM_PARTITION_CONTEXTS]
[PARTITION_TYPES - 1] = {
// FIXME(jingning,rbultje) put real probabilities here
#if CONFIG_AB4X4
{202, 162, 107},
{16, 2, 169},
{3, 246, 19},
{104, 90, 134},
{105, 88, 252},
{113, 88, 249},
{113, 106, 251},
{126, 105, 107},
#endif
{202, 162, 107},
{16, 2, 169},

View File

@ -540,8 +540,9 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
int_mv *const mv0 = &mbmi->mv[0];
int_mv *const mv1 = &mbmi->mv[1];
const int bw = 1 << mi_width_log2(mi->mbmi.sb_type);
const int bh = 1 << mi_height_log2(mi->mbmi.sb_type);
BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type;
int bw = 1 << b_width_log2(bsize);
int bh = 1 << b_height_log2(bsize);
const int use_prev_in_find_mv_refs = cm->width == cm->last_width &&
cm->height == cm->last_height &&
@ -549,6 +550,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
cm->last_show_frame;
int mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, mb_to_bottom_edge;
int j, idx, idy;
mbmi->need_to_clamp_mvs = 0;
mbmi->need_to_clamp_secondmv = 0;
@ -562,7 +564,8 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
// Distance of Mb to the various image edges.
// These specified to 8th pel as they are always compared to MV values
// that are in 1/8th pel units
set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw);
set_mi_row_col(cm, xd, mi_row, 1 << mi_height_log2(bsize),
mi_col, 1 << mi_width_log2(bsize));
mb_to_top_edge = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
mb_to_bottom_edge = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
@ -613,12 +616,12 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mbmi->mode = ZEROMV;
} else {
#if CONFIG_AB4X4
if (mbmi->sb_type >= BLOCK_SIZE_SB8X8)
if (bsize >= BLOCK_SIZE_SB8X8)
mbmi->mode = read_sb_mv_ref(r, mv_ref_p);
else
mbmi->mode = SPLITMV;
#else
mbmi->mode = mbmi->sb_type > BLOCK_SIZE_SB8X8 ?
mbmi->mode = bsize > BLOCK_SIZE_SB8X8 ?
read_sb_mv_ref(r, mv_ref_p)
: read_mv_ref(r, mv_ref_p);
#endif
@ -685,17 +688,20 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mbmi->uv_mode = DC_PRED;
switch (mbmi->mode) {
case SPLITMV: {
const int num_p = 4;
int j = 0;
case SPLITMV:
#if !CONFIG_AB4X4
bw = 1, bh = 1;
#endif
mbmi->need_to_clamp_mvs = 0;
do { // for each subset j
for (idy = 0; idy < 2; idy += bh) {
for (idx = 0; idx < 2; idx += bw) {
int_mv leftmv, abovemv, second_leftmv, second_abovemv;
int_mv blockmv, secondmv;
int mv_contz;
int blockmode;
int k = j;
int i, k;
j = idy * 2 + idx;
k = j;
leftmv.as_int = left_block_mv(xd, mi, k);
abovemv.as_int = above_block_mv(mi, k, mis);
@ -718,33 +724,33 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
decode_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
&cm->fc.NMVcount, xd->allow_high_precision_mv);
#ifdef VPX_MODE_COUNT
#ifdef VPX_MODE_COUNT
vp9_mv_cont_count[mv_contz][3]++;
#endif
#endif
break;
case LEFT4X4:
blockmv.as_int = leftmv.as_int;
if (mbmi->second_ref_frame > 0)
secondmv.as_int = second_leftmv.as_int;
#ifdef VPX_MODE_COUNT
#ifdef VPX_MODE_COUNT
vp9_mv_cont_count[mv_contz][0]++;
#endif
#endif
break;
case ABOVE4X4:
blockmv.as_int = abovemv.as_int;
if (mbmi->second_ref_frame > 0)
secondmv.as_int = second_abovemv.as_int;
#ifdef VPX_MODE_COUNT
#ifdef VPX_MODE_COUNT
vp9_mv_cont_count[mv_contz][1]++;
#endif
#endif
break;
case ZERO4X4:
blockmv.as_int = 0;
if (mbmi->second_ref_frame > 0)
secondmv.as_int = 0;
#ifdef VPX_MODE_COUNT
#ifdef VPX_MODE_COUNT
vp9_mv_cont_count[mv_contz][2]++;
#endif
#endif
break;
default:
break;
@ -752,12 +758,16 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mi->bmi[j].as_mv[0].as_int = blockmv.as_int;
if (mbmi->second_ref_frame > 0)
mi->bmi[j].as_mv[1].as_int = secondmv.as_int;
} while (++j < num_p);
for (i = 1; i < bh; ++i)
vpx_memcpy(&mi->bmi[j + i * 2], &mi->bmi[j], sizeof(mi->bmi[j]));
for (i = 1; i < bw; ++i)
vpx_memcpy(&mi->bmi[j + i], &mi->bmi[j], sizeof(mi->bmi[j]));
}
}
mv0->as_int = mi->bmi[3].as_mv[0].as_int;
mv1->as_int = mi->bmi[3].as_mv[1].as_int;
break; /* done with SPLITMV */
case NEARMV:
@ -822,14 +832,14 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mv0->as_int = 0;
#if CONFIG_AB4X4
if (mbmi->sb_type >= BLOCK_SIZE_SB8X8) {
if (bsize >= BLOCK_SIZE_SB8X8) {
mbmi->mode = read_sb_ymode(r, cm->fc.sb_ymode_prob);
cm->fc.sb_ymode_counts[mbmi->mode]++;
} else {
mbmi->mode = I4X4_PRED;
}
#else
if (mbmi->sb_type > BLOCK_SIZE_SB8X8) {
if (bsize > BLOCK_SIZE_SB8X8) {
mbmi->mode = read_sb_ymode(r, cm->fc.sb_ymode_prob);
cm->fc.sb_ymode_counts[mbmi->mode]++;
} else {
@ -840,7 +850,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
// If MB mode is I4X4_PRED read the block modes
#if CONFIG_AB4X4
if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
if (bsize < BLOCK_SIZE_SB8X8) {
#else
if (mbmi->mode == I4X4_PRED) {
#endif
@ -858,20 +868,20 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
#if CONFIG_AB4X4
if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 &&
mbmi->sb_type >= BLOCK_SIZE_SB8X8) {
bsize >= BLOCK_SIZE_SB8X8) {
#else
if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 &&
((mbmi->ref_frame == INTRA_FRAME && mbmi->mode != I4X4_PRED) ||
(mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) {
#endif
const int allow_16x16 = mbmi->sb_type >= BLOCK_SIZE_MB16X16;
const int allow_32x32 = mbmi->sb_type >= BLOCK_SIZE_SB32X32;
const int allow_16x16 = bsize >= BLOCK_SIZE_MB16X16;
const int allow_32x32 = bsize >= BLOCK_SIZE_SB32X32;
mbmi->txfm_size = select_txfm_size(cm, r, allow_16x16, allow_32x32);
} else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32 &&
} else if (bsize >= BLOCK_SIZE_SB32X32 &&
cm->txfm_mode >= ALLOW_32X32) {
mbmi->txfm_size = TX_32X32;
} else if (cm->txfm_mode >= ALLOW_16X16 &&
mbmi->sb_type >= BLOCK_SIZE_MB16X16
bsize >= BLOCK_SIZE_MB16X16
#if !CONFIG_AB4X4
&& ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) ||
(mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))
@ -880,7 +890,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mbmi->txfm_size = TX_16X16;
} else if (cm->txfm_mode >= ALLOW_8X8 &&
#if CONFIG_AB4X4
(mbmi->sb_type >= BLOCK_SIZE_SB8X8))
(bsize >= BLOCK_SIZE_SB8X8))
#else
(!(mbmi->ref_frame == INTRA_FRAME && mbmi->mode == I4X4_PRED) &&
!(mbmi->ref_frame != INTRA_FRAME && mbmi->mode == SPLITMV)))

View File

@ -413,6 +413,11 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col,
vp9_reader *r, BLOCK_SIZE_TYPE bsize) {
MACROBLOCKD *const xd = &pbi->mb;
#if CONFIG_AB4X4
if (bsize < BLOCK_SIZE_SB8X8)
if (xd->ab_index > 0)
return;
#endif
set_offsets(pbi, bsize, mi_row, mi_col);
vp9_decode_mb_mode_mv(pbi, xd, mi_row, mi_col, r);
set_refs(pbi, mi_row, mi_col);
@ -465,6 +470,7 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
}
subsize = get_subsize(bsize, partition);
*(get_sb_index(xd, subsize)) = 0;
switch (partition) {
case PARTITION_NONE:
@ -472,11 +478,13 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
break;
case PARTITION_HORZ:
decode_modes_b(pbi, mi_row, mi_col, r, subsize);
*(get_sb_index(xd, subsize)) = 1;
if (mi_row + bs < pc->mi_rows)
decode_modes_b(pbi, mi_row + bs, mi_col, r, subsize);
break;
case PARTITION_VERT:
decode_modes_b(pbi, mi_row, mi_col, r, subsize);
*(get_sb_index(xd, subsize)) = 1;
if (mi_col + bs < pc->mi_cols)
decode_modes_b(pbi, mi_row, mi_col + bs, r, subsize);
break;

View File

@ -34,7 +34,7 @@
static void recon_write_yuv_frame(const char *name,
const YV12_BUFFER_CONFIG *s,
int w, int _h) {
FILE *yuv_file = fopen((char *)name, "ab");
FILE *yuv_file = fopen(name, "ab");
const uint8_t *src = s->y_buffer;
int h = _h;

View File

@ -696,15 +696,21 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
nmvc, xd->allow_high_precision_mv);
break;
case SPLITMV: {
int j = 0;
do {
int j;
B_PREDICTION_MODE blockmode;
int_mv blockmv;
int k = -1; /* first block in subset j */
int mv_contz;
int_mv leftmv, abovemv;
int bwl = b_width_log2(mi->sb_type), bw = 1 << bwl;
int bhl = b_height_log2(mi->sb_type), bh = 1 << bhl;
int idx, idy;
#if !CONFIG_AB4X4
bw = 1, bh = 1;
#endif
for (idy = 0; idy < 2; idy += bh) {
for (idx = 0; idx < 2; idx += bw) {
j = idy * 2 + idx;
blockmode = cpi->mb.partition_info->bmi[j].mode;
blockmv = cpi->mb.partition_info->bmi[j].mv;
k = j;
@ -728,7 +734,12 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
&mi->best_second_mv.as_mv,
nmvc, xd->allow_high_precision_mv);
}
} while (++j < cpi->mb.partition_info->count);
}
}
#ifdef MODE_STATS
++count_mb_seg[mi->partitioning];
#endif
break;
}
default:
@ -837,6 +848,11 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
#if CONFIG_AB4X4
if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8)
if (xd->ab_index > 0)
return;
#endif
xd->mode_info_context = m;
set_mi_row_col(&cpi->common, xd, mi_row,
1 << mi_height_log2(m->mbmi.sb_type),
@ -891,7 +907,7 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
#if CONFIG_AB4X4
if (bsize < BLOCK_SIZE_SB8X8)
if (xd->ab_index != 0)
if (xd->ab_index > 0)
return;
#endif
@ -910,6 +926,7 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
}
subsize = get_subsize(bsize, partition);
*(get_sb_index(xd, subsize)) = 0;
switch (partition) {
case PARTITION_NONE:
@ -917,11 +934,13 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
break;
case PARTITION_HORZ:
write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col);
*(get_sb_index(xd, subsize)) = 1;
if ((mi_row + bs) < cm->mi_rows)
write_modes_b(cpi, m + bs * mis, bc, tok, tok_end, mi_row + bs, mi_col);
break;
case PARTITION_VERT:
write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col);
*(get_sb_index(xd, subsize)) = 1;
if ((mi_col + bs) < cm->mi_cols)
write_modes_b(cpi, m + bs, bc, tok, tok_end, mi_row, mi_col + bs);
break;

View File

@ -786,6 +786,12 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp,
if (sub_index != -1)
*(get_sb_index(xd, bsize)) = sub_index;
#if CONFIG_AB4X4
if (bsize < BLOCK_SIZE_SB8X8)
if (xd->ab_index > 0)
return;
#endif
set_offsets(cpi, mi_row, mi_col, bsize);
update_state(cpi, get_block_context(x, bsize), bsize, output_enabled);
encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
@ -828,13 +834,8 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
if (bsl == bwl && bsl == bhl) {
#if CONFIG_AB4X4
if (output_enabled && bsize >= BLOCK_SIZE_SB8X8) {
if (bsize > BLOCK_SIZE_SB8X8 ||
(bsize == BLOCK_SIZE_SB8X8 && c1 == bsize))
if (output_enabled && bsize >= BLOCK_SIZE_SB8X8)
cpi->partition_count[pl][PARTITION_NONE]++;
else
cpi->partition_count[pl][PARTITION_SPLIT]++;
}
#else
if (output_enabled && bsize > BLOCK_SIZE_SB8X8)
cpi->partition_count[pl][PARTITION_NONE]++;
@ -909,7 +910,6 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
return;
}
#endif
assert(mi_height_log2(bsize) == mi_width_log2(bsize));
// buffer the above/left context information of the block in search.
@ -939,7 +939,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
for (i = 0; i < 4; ++i) {
int x_idx = (i & 1) * (ms >> 1);
int y_idx = (i >> 1) * (ms >> 1);
int r, d;
int r = 0, d = 0;
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
@ -966,10 +966,13 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
// TODO(jingning): need to enable 4x8 and 8x4 partition coding
// PARTITION_HORZ
if ((mi_col + ms <= cm->mi_cols) && (mi_row + (ms >> 1) <= cm->mi_rows) &&
#if CONFIG_AB4X4
(bsize >= BLOCK_SIZE_SB8X8)) {
#else
(bsize >= BLOCK_SIZE_MB16X16)) {
#endif
int r2, d2;
int mb_skip = 0;
subsize = get_subsize(bsize, PARTITION_HORZ);
@ -978,7 +981,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
get_block_context(x, subsize));
if (mi_row + ms <= cm->mi_rows) {
int r, d;
int r = 0, d = 0;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1;
@ -992,8 +995,12 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
#if CONFIG_AB4X4
if (r2 < INT_MAX)
r2 += x->partition_cost[pl][PARTITION_HORZ];
#else
r2 += x->partition_cost[pl][PARTITION_HORZ];
#endif
if ((RDCOST(x->rdmult, x->rddiv, r2, d2) <
RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) {
srate = r2;
@ -1005,7 +1012,11 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
// PARTITION_VERT
if ((mi_row + ms <= cm->mi_rows) && (mi_col + (ms >> 1) <= cm->mi_cols) &&
#if CONFIG_AB4X4
(bsize >= BLOCK_SIZE_SB8X8)) {
#else
(bsize >= BLOCK_SIZE_MB16X16)) {
#endif
int r2, d2;
int mb_skip = 0;
subsize = get_subsize(bsize, PARTITION_VERT);
@ -1013,7 +1024,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
get_block_context(x, subsize));
if (mi_col + ms <= cm->mi_cols) {
int r, d;
int r = 0, d = 0;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1;
@ -1027,8 +1038,12 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
#if CONFIG_AB4X4
if (r2 < INT_MAX)
r2 += x->partition_cost[pl][PARTITION_VERT];
#else
r2 += x->partition_cost[pl][PARTITION_VERT];
#endif
if ((RDCOST(x->rdmult, x->rddiv, r2, d2) <
RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) {
srate = r2;

View File

@ -573,7 +573,11 @@ void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
int bhl = b_height_log2(mbmi->sb_type), bh = 1 << bhl;
int idx, idy;
#if CONFIG_AB4X4
if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
#else
if (mbmi->mode == SPLITMV) {
#endif
int i;
PARTITION_INFO *pi = x->partition_info;
#if !CONFIG_AB4X4

View File

@ -910,6 +910,11 @@ static int labels2mode(MACROBLOCK *x,
MB_MODE_INFO * mbmi = &mic->mbmi;
const int mis = xd->mode_info_stride;
int i, cost = 0, thismvcost = 0;
#if CONFIG_AB4X4
int idx, idy;
int bw = 1 << b_width_log2(mbmi->sb_type);
int bh = 1 << b_height_log2(mbmi->sb_type);
#endif
/* We have to be careful retrieving previously-encoded motion vectors.
Ones from this macroblock have to be pulled from the BLOCKD array
@ -993,6 +998,17 @@ static int labels2mode(MACROBLOCK *x,
x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
if (mbmi->second_ref_frame > 0)
x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
#if CONFIG_AB4X4
for (idy = 0; idy < bh; ++idy) {
for (idx = 0; idx < bw; ++idx) {
vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
&mic->bmi[i], sizeof(mic->bmi[i]));
vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx],
&x->partition_info->bmi[i],
sizeof(x->partition_info->bmi[i]));
}
}
#endif
}
cost += thismvcost;
@ -1007,8 +1023,15 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
int *distortion,
ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl) {
int i;
int i, k;
MACROBLOCKD *xd = &x->e_mbd;
BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
int bwl = b_width_log2(bsize), bw = 1 << bwl;
int bhl = b_height_log2(bsize), bh = 1 << bhl;
int idx, idy;
#if !CONFIG_AB4X4
bw = 1, bh = 1;
#endif
*labelyrate = 0;
*distortion = 0;
@ -1018,10 +1041,10 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
uint8_t* const src =
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
x->plane[0].src.buf, src_stride);
int16_t* const src_diff =
int16_t* src_diff =
raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i,
x->plane[0].src_diff);
int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
uint8_t* const pre =
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
xd->plane[0].pre[0].buf,
@ -1030,7 +1053,8 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
xd->plane[0].dst.buf,
xd->plane[0].dst.stride);
int thisdistortion;
int thisdistortion = 0;
int thisrate = 0;
vp9_build_inter_predictor(pre,
xd->plane[0].pre[0].stride,
@ -1038,7 +1062,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
xd->plane[0].dst.stride,
&xd->mode_info_context->bmi[i].as_mv[0],
&xd->scale_factor[0],
4, 4, 0 /* no avg */, &xd->subpix);
4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix);
// TODO(debargha): Make this work properly with the
// implicit-compoundinter-weight experiment when implicit
@ -1051,22 +1075,33 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
dst, xd->plane[0].dst.stride,
&xd->mode_info_context->bmi[i].as_mv[1],
&xd->scale_factor[1], 4, 4, 1,
&xd->scale_factor[1], 4 * bw, 4 * bh, 1,
&xd->subpix);
}
vp9_subtract_block(4, 4, src_diff, 8,
vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8,
src, src_stride,
dst, xd->plane[0].dst.stride);
k = i;
for (idy = 0; idy < bh; ++idy) {
for (idx = 0; idx < bw; ++idx) {
k += (idy * 2 + idx);
src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k,
x->plane[0].src_diff);
coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k);
x->fwd_txm4x4(src_diff, coeff, 16);
x->quantize_b_4x4(x, i, DCT_DCT, 16);
thisdistortion = vp9_block_error(coeff,
x->quantize_b_4x4(x, k, DCT_DCT, 16);
thisdistortion += vp9_block_error(coeff,
BLOCK_OFFSET(xd->plane[0].dqcoeff,
i, 16), 16);
k, 16), 16);
thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC,
ta + (k & 1),
tl + (k >> 1), TX_4X4, 16);
}
}
*distortion += thisdistortion;
*labelyrate += cost_coeffs(cm, x, 0, i, PLANE_TYPE_Y_WITH_DC,
ta + (i & 1),
tl + (i >> 1), TX_4X4, 16);
*labelyrate += thisrate;
}
}
*distortion >>= 2;
@ -1155,15 +1190,18 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
int sbr = 0, sbd = 0;
int segmentyrate = 0;
int best_eobs[4] = { 0 };
#if CONFIG_AB4X4
BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
#endif
int bwl = b_width_log2(bsize), bw = 1 << bwl;
int bhl = b_height_log2(bsize), bh = 1 << bhl;
int idx, idy;
vp9_variance_fn_ptr_t *v_fn_ptr;
ENTROPY_CONTEXT t_above[2], t_left[2];
ENTROPY_CONTEXT t_above_b[2], t_left_b[2];
ENTROPY_CONTEXT t_above[4], t_left[4];
ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
#if !CONFIG_AB4X4
bh = 1, bw = 1;
#endif
vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
@ -1181,17 +1219,190 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
label_mv_thresh = 1 * bsi->mvthresh / label_count;
// Segmentation method overheads
#if !CONFIG_AB4X4
rate += vp9_cost_mv_ref(cpi, SPLITMV,
mbmi->mb_mode_context[mbmi->ref_frame]);
this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
br += rate;
#endif
other_segment_rd = this_segment_rd;
for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) {
for (idy = 0; idy < 2; idy += bh) {
for (idx = 0; idx < 2; idx += bw) {
// TODO(jingning,rbultje): rewrite the rate-distortion optimization
// loop for 4x4/4x8/8x4 block coding
#if CONFIG_AB4X4
int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
B_PREDICTION_MODE mode_selected = ZERO4X4;
int bestlabelyrate = 0;
i = idy * 2 + idx;
// search for the best motion vector on this segment
for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) {
int64_t this_rd;
int distortion;
int labelyrate;
ENTROPY_CONTEXT t_above_s[4], t_left_s[4];
vpx_memcpy(t_above_s, t_above, sizeof(t_above_s));
vpx_memcpy(t_left_s, t_left, sizeof(t_left_s));
// motion search for newmv (single predictor case only)
if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) {
int sseshift, n;
int step_param = 0;
int further_steps;
int thissme, bestsme = INT_MAX;
const struct buf_2d orig_src = x->plane[0].src;
const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0];
/* Is the best so far sufficiently good that we cant justify doing
* and new motion search. */
if (best_label_rd < label_mv_thresh)
break;
if (cpi->compressor_speed) {
// use previous block's result as next block's MV predictor.
if (i > 0) {
bsi->mvp.as_int =
x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
if (i == 2)
bsi->mvp.as_int =
x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
step_param = 2;
}
}
further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
{
int sadpb = x->sadperbit4;
int_mv mvp_full;
mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
// find first label
n = i;
// adjust src pointer for this segment
x->plane[0].src.buf =
raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
x->plane[0].src.buf,
x->plane[0].src.stride);
assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
x->e_mbd.plane[0].pre[0].buf =
raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
x->e_mbd.plane[0].pre[0].buf,
x->e_mbd.plane[0].pre[0].stride);
bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
sadpb, further_steps, 0, v_fn_ptr,
bsi->ref_mv, &mode_mv[NEW4X4]);
sseshift = 0;
// Should we do a full search (best quality only)
if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
/* Check if mvp_full is within the range. */
clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
x->mv_row_min, x->mv_row_max);
thissme = cpi->full_search_sad(x, &mvp_full,
sadpb, 16, v_fn_ptr,
x->nmvjointcost, x->mvcost,
bsi->ref_mv,
n);
if (thissme < bestsme) {
bestsme = thissme;
mode_mv[NEW4X4].as_int =
x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int;
} else {
/* The full search result is actually worse so re-instate the
* previous best vector */
x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int =
mode_mv[NEW4X4].as_int;
}
}
}
if (bestsme < INT_MAX) {
int distortion;
unsigned int sse;
cpi->find_fractional_mv_step(x, &mode_mv[NEW4X4],
bsi->ref_mv, x->errorperbit, v_fn_ptr,
x->nmvjointcost, x->mvcost,
&distortion, &sse);
// safe motion search result for use in compound prediction
seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int;
}
// restore src pointers
x->plane[0].src = orig_src;
x->e_mbd.plane[0].pre[0] = orig_pre;
} else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) {
/* NEW4X4 */
/* motion search not completed? Then skip newmv for this block with
* comppred */
if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV ||
seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) {
continue;
}
}
rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
&second_mode_mv[this_mode], seg_mvs[i],
bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
x->mvcost, cpi);
// Trap vectors that reach beyond the UMV borders
if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
continue;
}
if (mbmi->second_ref_frame > 0 &&
mv_check_bounds(x, &second_mode_mv[this_mode]))
continue;
this_rd = encode_inter_mb_segment(&cpi->common,
x, labels, i, &labelyrate,
&distortion, t_above_s, t_left_s);
this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
rate += labelyrate;
if (this_rd < best_label_rd) {
sbr = rate;
sbd = distortion;
bestlabelyrate = labelyrate;
mode_selected = this_mode;
best_label_rd = this_rd;
for (j = 0; j < 4; j++)
if (labels[j] == i)
best_eobs[j] = x->e_mbd.plane[0].eobs[j];
vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s));
vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s));
}
} /*for each 4x4 mode*/
vpx_memcpy(t_above, t_above_b, sizeof(t_above));
vpx_memcpy(t_left, t_left_b, sizeof(t_left));
labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
&second_mode_mv[mode_selected], seg_mvs[i],
bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
x->mvcost, cpi);
#else
int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
B_PREDICTION_MODE mode_selected = ZERO4X4;
int bestlabelyrate = 0;
i = idy * 2 + idx;
// search for the best motion vector on this segment
for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) {
@ -1352,12 +1563,23 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
&second_mode_mv[mode_selected], seg_mvs[i],
bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
x->mvcost, cpi);
#endif
br += sbr;
bd += sbd;
segmentyrate += bestlabelyrate;
this_segment_rd += best_label_rd;
other_segment_rd += best_other_rd;
for (j = 1; j < bh; ++j)
vpx_memcpy(&x->partition_info->bmi[i + j * 2],
&x->partition_info->bmi[i],
sizeof(x->partition_info->bmi[i]));
for (j = 1; j < bw; ++j)
vpx_memcpy(&x->partition_info->bmi[i + j],
&x->partition_info->bmi[i],
sizeof(x->partition_info->bmi[i]));
}
} /* for each label */
if (this_segment_rd < bsi->segment_rd) {
@ -2504,12 +2726,23 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int distortion2 = 0, distortion_y = 0, distortion_uv = 0;
int skippable;
int64_t txfm_cache[NB_TXFM_MODES];
int i;
for (i = 0; i < NB_TXFM_MODES; ++i)
txfm_cache[i] = INT64_MAX;
// Test best rd so far against threshold for trying this mode.
#if CONFIG_AB4X4
if (bsize >= BLOCK_SIZE_SB8X8 &&
(best_rd < cpi->rd_threshes[mode_index] ||
cpi->rd_threshes[mode_index] == INT_MAX))
continue;
#else
if (best_rd <= cpi->rd_threshes[mode_index] ||
cpi->rd_threshes[mode_index] == INT_MAX) {
continue;
}
#endif
x->skip = 0;
this_mode = vp9_mode_order[mode_index].mode;
@ -2520,7 +2753,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
continue;
}
#if CONFIG_AB4X4
if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) {
#else
if (cpi->speed > 0) {
#endif
if (!(ref_frame_mask & (1 << ref_frame))) {
continue;
}
@ -2652,6 +2889,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
distortion2 += dist_uv[TX_4X4];
distortion_uv = dist_uv[TX_4X4];
mbmi->uv_mode = mode_uv[TX_4X4];
#if CONFIG_AB4X4
txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
for (i = 0; i < NB_TXFM_MODES; ++i)
txfm_cache[i] = txfm_cache[ONLY_4X4];
#endif
} else if (ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx;
vp9_build_intra_predictors_sby_s(xd, bsize);
@ -2785,6 +3027,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
#if CONFIG_AB4X4
txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
for (i = 0; i < NB_TXFM_MODES; ++i)
txfm_cache[i] = txfm_cache[ONLY_4X4];
#endif
if (!mode_excluded) {
if (is_comp_pred)
mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
@ -2855,7 +3103,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Is Mb level skip allowed (i.e. not coded at segment level).
mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
#if CONFIG_AB4X4
if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
#else
if (skippable) {
#endif
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
// for best_yrd calculation
@ -3001,12 +3253,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
if (!mode_excluded && this_rd != INT64_MAX) {
for (i = 0; i < NB_TXFM_MODES; i++) {
int64_t adj_rd;
int64_t adj_rd = INT64_MAX;
if (this_mode != I4X4_PRED) {
adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode];
} else {
adj_rd = this_rd;
}
if (adj_rd < best_txfm_rd[i])
best_txfm_rd[i] = adj_rd;
}
@ -3073,7 +3326,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
cpi->is_src_frame_alt_ref &&
(cpi->oxcf.arnr_max_frames == 0) &&
(best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
(best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)
#if CONFIG_AB4X4
&& bsize >= BLOCK_SIZE_SB8X8
#endif
) {
mbmi->mode = ZEROMV;
mbmi->ref_frame = ALTREF_FRAME;
mbmi->second_ref_frame = NONE;