Optimize partition search order

This commit change the partition search order to allow checking of
rectangular partition to be done after square partitions. It also
added a speed feature to skip rectangular partition check when
NONE is better than SPLIT in RD sense.

This feature roughly speed up encoder by 1.5X with loss on compression
-0.91% on cif set
-0.56% on stdhd set

Change-Id: I0d2d06993041aa9ea9073fcc39c54f73a127dfa4
This commit is contained in:
Yaowu Xu
2013-06-27 12:07:07 -07:00
parent 5ec57c91b7
commit 1374a06bd8
3 changed files with 78 additions and 69 deletions

View File

@@ -1351,71 +1351,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
if (!cpi->sf.use_partitions_less_than
|| (cpi->sf.use_partitions_less_than
&& bsize <= cpi->sf.less_than_block_size)) {
// PARTITION_HORZ
if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
int r2, r = 0;
int64_t d2, d = 0;
subsize = get_subsize(bsize, PARTITION_HORZ);
*(get_sb_index(xd, subsize)) = 0;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
get_block_context(x, subsize));
if (mi_row + (ms >> 1) < cm->mi_rows) {
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1;
pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &r, &d, subsize,
get_block_context(x, subsize));
r2 += r;
d2 += d;
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
if (r2 < INT_MAX)
r2 += x->partition_cost[pl][PARTITION_HORZ];
if (RDCOST(x->rdmult, x->rddiv, r2, d2)
< RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
srate = r2;
sdist = d2;
*(get_sb_partitioning(x, bsize)) = subsize;
}
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
// PARTITION_VERT
if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) {
int r2;
int64_t d2;
subsize = get_subsize(bsize, PARTITION_VERT);
*(get_sb_index(xd, subsize)) = 0;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
get_block_context(x, subsize));
if (mi_col + (ms >> 1) < cm->mi_cols) {
int r = 0;
int64_t d = 0;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1;
pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &r, &d, subsize,
get_block_context(x, subsize));
r2 += r;
d2 += d;
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
if (r2 < INT_MAX)
r2 += x->partition_cost[pl][PARTITION_VERT];
if (RDCOST(x->rdmult, x->rddiv, r2, d2)
< RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
srate = r2;
sdist = d2;
*(get_sb_partitioning(x, bsize)) = subsize;
}
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
int larger_is_better = 0;
// PARTITION_NONE
if ((mi_row + (ms >> 1) < cm->mi_rows) &&
(mi_col + (ms >> 1) < cm->mi_cols)) {
@@ -1433,10 +1369,77 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
< RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
srate = r;
sdist = d;
larger_is_better = 1;
if (bsize >= BLOCK_SIZE_SB8X8)
*(get_sb_partitioning(x, bsize)) = bsize;
}
}
if (!cpi->sf.less_rectangular_check || !larger_is_better) {
// PARTITION_HORZ
if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
int r2, r = 0;
int64_t d2, d = 0;
subsize = get_subsize(bsize, PARTITION_HORZ);
*(get_sb_index(xd, subsize)) = 0;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
get_block_context(x, subsize));
if (mi_row + (ms >> 1) < cm->mi_rows) {
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1;
pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, tp, &r, &d, subsize,
get_block_context(x, subsize));
r2 += r;
d2 += d;
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
if (r2 < INT_MAX)
r2 += x->partition_cost[pl][PARTITION_HORZ];
if (RDCOST(x->rdmult, x->rddiv, r2, d2)
< RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
srate = r2;
sdist = d2;
*(get_sb_partitioning(x, bsize)) = subsize;
}
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
// PARTITION_VERT
if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) {
int r2;
int64_t d2;
subsize = get_subsize(bsize, PARTITION_VERT);
*(get_sb_index(xd, subsize)) = 0;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
get_block_context(x, subsize));
if (mi_col + (ms >> 1) < cm->mi_cols) {
int r = 0;
int64_t d = 0;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1;
pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), tp, &r, &d, subsize,
get_block_context(x, subsize));
r2 += r;
d2 += d;
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
if (r2 < INT_MAX)
r2 += x->partition_cost[pl][PARTITION_VERT];
if (RDCOST(x->rdmult, x->rddiv, r2, d2)
< RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
srate = r2;
sdist = d2;
*(get_sb_partitioning(x, bsize)) = subsize;
}
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
}
}
*rate = srate;
*dist = sdist;