Enable per transformed block zero coeffs forcing
This commit enables forcing all coefficients zero per transformed block, when its rate-distortion cost is lower than regular coeff quantization. The overall performance improvement (including its parent patch on calculating rd cost per transformed block) at speed 1: derf: 0.298% yt: 0.452% hd: 0.741% stdhd: 0.006% Change-Id: I66005fe0fd7af192c3eba32e02fd6d77952accb5
This commit is contained in:
@@ -34,6 +34,7 @@ typedef struct {
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
MODE_INFO mic;
|
MODE_INFO mic;
|
||||||
PARTITION_INFO partition_info;
|
PARTITION_INFO partition_info;
|
||||||
|
unsigned char zcoeff_blk[256];
|
||||||
int skip;
|
int skip;
|
||||||
int_mv best_ref_mv;
|
int_mv best_ref_mv;
|
||||||
int_mv second_best_ref_mv;
|
int_mv second_best_ref_mv;
|
||||||
@@ -136,6 +137,7 @@ struct macroblock {
|
|||||||
int mv_row_min;
|
int mv_row_min;
|
||||||
int mv_row_max;
|
int mv_row_max;
|
||||||
|
|
||||||
|
unsigned char zcoeff_blk[TX_SIZES][256];
|
||||||
int skip;
|
int skip;
|
||||||
|
|
||||||
int encode_breakout;
|
int encode_breakout;
|
||||||
|
|||||||
@@ -390,6 +390,9 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
x->skip = ctx->skip;
|
x->skip = ctx->skip;
|
||||||
|
vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
|
||||||
|
sizeof(ctx->zcoeff_blk));
|
||||||
|
|
||||||
if (!output_enabled)
|
if (!output_enabled)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@@ -2744,7 +2747,6 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
|
|||||||
setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
|
setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
|
||||||
&xd->scale_factor[1]);
|
&xd->scale_factor[1]);
|
||||||
|
|
||||||
|
|
||||||
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
|
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -482,6 +482,14 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
|
|||||||
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||||
uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,
|
uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,
|
||||||
pd->dst.buf, pd->dst.stride);
|
pd->dst.buf, pd->dst.stride);
|
||||||
|
|
||||||
|
// TODO(jingning): per transformed block zero forcing only enabled for
|
||||||
|
// luma component. will integrate chroma components as well.
|
||||||
|
if (x->zcoeff_blk[tx_size][block] && plane == 0) {
|
||||||
|
pd->eobs[block] = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
|
vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
|
||||||
|
|
||||||
if (x->optimize)
|
if (x->optimize)
|
||||||
|
|||||||
@@ -624,7 +624,12 @@ static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
|
|||||||
rate_block(plane, block, plane_bsize, tx_size, args);
|
rate_block(plane, block, plane_bsize, tx_size, args);
|
||||||
rd1 = RDCOST(x->rdmult, x->rddiv, args->rate[block], args->dist[block]);
|
rd1 = RDCOST(x->rdmult, x->rddiv, args->rate[block], args->dist[block]);
|
||||||
rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse[block]);
|
rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse[block]);
|
||||||
|
|
||||||
|
// TODO(jingning): temporarily enabled only for luma component
|
||||||
rd = MIN(rd1, rd2);
|
rd = MIN(rd1, rd2);
|
||||||
|
if (plane == 0)
|
||||||
|
x->zcoeff_blk[tx_size][block] = rd1 > rd2;
|
||||||
|
|
||||||
args->this_rate += args->rate[block];
|
args->this_rate += args->rate[block];
|
||||||
args->this_dist += args->dist[block];
|
args->this_dist += args->dist[block];
|
||||||
args->this_sse += args->sse[block];
|
args->this_sse += args->sse[block];
|
||||||
@@ -2234,6 +2239,9 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
|
|||||||
ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
|
ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
|
||||||
ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
|
ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
|
||||||
|
|
||||||
|
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[xd->this_mi->mbmi.tx_size],
|
||||||
|
sizeof(ctx->zcoeff_blk));
|
||||||
|
|
||||||
// FIXME(rbultje) does this memcpy the whole array? I believe sizeof()
|
// FIXME(rbultje) does this memcpy the whole array? I believe sizeof()
|
||||||
// doesn't actually work this way
|
// doesn't actually work this way
|
||||||
memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
|
memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
|
||||||
@@ -3153,8 +3161,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
|
const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
|
||||||
const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
|
const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
|
||||||
int best_skip2 = 0;
|
int best_skip2 = 0;
|
||||||
|
unsigned char best_zcoeff_blk[256] = { 0 };
|
||||||
|
|
||||||
x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
|
x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
|
||||||
|
vpx_memset(x->zcoeff_blk, 0, sizeof(x->zcoeff_blk));
|
||||||
|
vpx_memset(ctx->zcoeff_blk, 0, sizeof(ctx->zcoeff_blk));
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
for (i = 0; i < 4; i++) {
|
||||||
int j;
|
int j;
|
||||||
@@ -3826,6 +3837,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
best_mbmode = *mbmi;
|
best_mbmode = *mbmi;
|
||||||
best_skip2 = this_skip2;
|
best_skip2 = this_skip2;
|
||||||
best_partition = *x->partition_info;
|
best_partition = *x->partition_info;
|
||||||
|
vpx_memcpy(best_zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
|
||||||
|
sizeof(best_zcoeff_blk));
|
||||||
|
|
||||||
if (this_mode == RD_I4X4_PRED || this_mode == RD_SPLITMV)
|
if (this_mode == RD_I4X4_PRED || this_mode == RD_SPLITMV)
|
||||||
for (i = 0; i < 4; i++)
|
for (i = 0; i < 4; i++)
|
||||||
@@ -4021,6 +4034,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
mbmi->mv[1].as_int = xd->this_mi->bmi[3].as_mv[1].as_int;
|
mbmi->mv[1].as_int = xd->this_mi->bmi[3].as_mv[1].as_int;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], best_zcoeff_blk,
|
||||||
|
sizeof(best_zcoeff_blk));
|
||||||
|
|
||||||
for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
|
for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
|
||||||
if (best_pred_rd[i] == INT64_MAX)
|
if (best_pred_rd[i] == INT64_MAX)
|
||||||
best_pred_diff[i] = INT_MIN;
|
best_pred_diff[i] = INT_MIN;
|
||||||
|
|||||||
Reference in New Issue
Block a user