Shares one set of RD costs tables between all encoding threads

RD costs were local to MACROBLOCK data and had to be copied all the
time to each thread's MACROBLOCK data. Tables moved to a common place
and only pointers are setup for each encoding thread.

vp8_cost_tokens() generates 'int' costs so changed all types to be
int (i.e. removed unsigned).

NOTE: Could do some more cleaning in vp8cx_init_mbrthread_data().

Change-Id: Ifa4de4c6286dffaca7ed3082041fe5af1345ddc0
This commit is contained in:
Attila Nagy 2012-04-17 10:40:56 +03:00 committed by Yunqing Wang
parent 11876faa11
commit b41c17d625
8 changed files with 90 additions and 65 deletions

View File

@ -90,16 +90,17 @@ typedef struct macroblock
signed int act_zbin_adj;
signed int last_act_zbin_adj;
int mvcosts[2][MVvals+1];
int *mvcost[2];
int mvsadcosts[2][MVfpvals+1];
int *mvsadcost[2];
int mbmode_cost[2][MB_MODE_COUNT];
int intra_uv_mode_cost[2][MB_MODE_COUNT];
unsigned int bmode_costs[10][10][10];
unsigned int inter_bmode_costs[B_MODE_COUNT];
int (*mbmode_cost)[MB_MODE_COUNT];
int (*intra_uv_mode_cost)[MB_MODE_COUNT];
int (*bmode_costs)[10][10];
int *inter_bmode_costs;
int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
[MAX_ENTROPY_TOKENS];
// These define limits to motion vector components to prevent them from extending outside the UMV borders
// These define limits to motion vector components to prevent
// them from extending outside the UMV borders
int mv_col_min;
int mv_col_max;
int mv_row_min;
@ -115,7 +116,6 @@ typedef struct macroblock
unsigned char *active_ptr;
MV_CONTEXT *mvc;
unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
int optimize;
int q_index;

View File

@ -337,21 +337,16 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->src.v_buffer = x->src.v_buffer;
*/
z->mvcost[0] = x->mvcost[0];
z->mvcost[1] = x->mvcost[1];
z->mvsadcost[0] = x->mvsadcost[0];
z->mvsadcost[1] = x->mvsadcost[1];
vpx_memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts));
z->mvcost[0] = &z->mvcosts[0][mv_max+1];
z->mvcost[1] = &z->mvcosts[1][mv_max+1];
z->mvsadcost[0] = &z->mvsadcosts[0][mvfp_max+1];
z->mvsadcost[1] = &z->mvsadcosts[1][mvfp_max+1];
vpx_memcpy(z->token_costs, x->token_costs, sizeof(x->token_costs));
vpx_memcpy(z->inter_bmode_costs, x->inter_bmode_costs, sizeof(x->inter_bmode_costs));
//memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts));
//memcpy(z->mvcost, x->mvcost, sizeof(x->mvcost));
vpx_memcpy(z->mbmode_cost, x->mbmode_cost, sizeof(x->mbmode_cost));
vpx_memcpy(z->intra_uv_mode_cost, x->intra_uv_mode_cost, sizeof(x->intra_uv_mode_cost));
vpx_memcpy(z->bmode_costs, x->bmode_costs, sizeof(x->bmode_costs));
z->token_costs = x->token_costs;
z->inter_bmode_costs = x->inter_bmode_costs;
z->mbmode_cost = x->mbmode_cost;
z->intra_uv_mode_cost = x->intra_uv_mode_cost;
z->bmode_costs = x->bmode_costs;
for (i = 0; i < 25; i++)
{
@ -359,17 +354,15 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->block[i].quant_fast = x->block[i].quant_fast;
z->block[i].quant_shift = x->block[i].quant_shift;
z->block[i].zbin = x->block[i].zbin;
z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
z->block[i].round = x->block[i].round;
z->q_index = x->q_index;
z->act_zbin_adj = x->act_zbin_adj;
z->last_act_zbin_adj = x->last_act_zbin_adj;
/*
z->block[i].src = x->block[i].src;
*/
z->block[i].src_stride = x->block[i].src_stride;
z->block[i].src_stride = x->block[i].src_stride;
}
z->q_index = x->q_index;
z->act_zbin_adj = x->act_zbin_adj;
z->last_act_zbin_adj = x->last_act_zbin_adj;
{
MACROBLOCKD *xd = &x->e_mbd;
MACROBLOCKD *zd = &z->e_mbd;
@ -401,9 +394,11 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
zd->subpixel_predict16x16 = xd->subpixel_predict16x16;
zd->segmentation_enabled = xd->segmentation_enabled;
zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data,
sizeof(xd->segment_feature_data));
vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc,
sizeof(xd->dequant_y1_dc));
vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));

View File

@ -18,6 +18,8 @@
void vp8_init_mode_costs(VP8_COMP *c)
{
VP8_COMMON *x = &c->common;
struct rd_costs_struct *rd_costs = &c->rd_costs;
{
const vp8_tree_p T = vp8_bmode_tree;
@ -29,19 +31,24 @@ void vp8_init_mode_costs(VP8_COMP *c)
do
{
vp8_cost_tokens((int *)c->mb.bmode_costs[i][j], vp8_kf_bmode_prob[i][j], T);
vp8_cost_tokens(rd_costs->bmode_costs[i][j],
vp8_kf_bmode_prob[i][j], T);
}
while (++j < VP8_BINTRAMODES);
}
while (++i < VP8_BINTRAMODES);
vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.bmode_prob, T);
vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.bmode_prob, T);
}
vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.sub_mv_ref_prob, vp8_sub_mv_ref_tree);
vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.sub_mv_ref_prob,
vp8_sub_mv_ref_tree);
vp8_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree);
vp8_cost_tokens(c->mb.mbmode_cost[0], vp8_kf_ymode_prob, vp8_kf_ymode_tree);
vp8_cost_tokens(rd_costs->mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree);
vp8_cost_tokens(rd_costs->mbmode_cost[0], vp8_kf_ymode_prob,
vp8_kf_ymode_tree);
vp8_cost_tokens(c->mb.intra_uv_mode_cost[1], x->fc.uv_mode_prob, vp8_uv_mode_tree);
vp8_cost_tokens(c->mb.intra_uv_mode_cost[0], vp8_kf_uv_mode_prob, vp8_uv_mode_tree);
vp8_cost_tokens(rd_costs->intra_uv_mode_cost[1], x->fc.uv_mode_prob,
vp8_uv_mode_tree);
vp8_cost_tokens(rd_costs->intra_uv_mode_cost[0], vp8_kf_uv_mode_prob,
vp8_uv_mode_tree);
}

View File

@ -1883,13 +1883,6 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->gf_rate_correction_factor = 1.0;
cpi->twopass.est_max_qcorrection_factor = 1.0;
cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max+1];
cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max+1];
cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max+1];
cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max+1];
cal_mvsadcosts(cpi->mb.mvsadcost);
for (i = 0; i < KEY_FRAME_CONTEXT; i++)
{
cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
@ -2023,13 +2016,29 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->common.error.setjmp = 0;
#if CONFIG_MULTI_RES_ENCODING
/* Calculate # of MBs in a row in lower-resolution level image. */
if (cpi->oxcf.mr_encoder_id > 0)
vp8_cal_low_res_mb_cols(cpi);
#endif
return cpi;
/* setup RD costs to MACROBLOCK struct */
cpi->mb.mvcost[0] = &cpi->rd_costs.mvcosts[0][mv_max+1];
cpi->mb.mvcost[1] = &cpi->rd_costs.mvcosts[1][mv_max+1];
cpi->mb.mvsadcost[0] = &cpi->rd_costs.mvsadcosts[0][mvfp_max+1];
cpi->mb.mvsadcost[1] = &cpi->rd_costs.mvsadcosts[1][mvfp_max+1];
cal_mvsadcosts(cpi->mb.mvsadcost);
cpi->mb.mbmode_cost = cpi->rd_costs.mbmode_cost;
cpi->mb.intra_uv_mode_cost = cpi->rd_costs.intra_uv_mode_cost;
cpi->mb.bmode_costs = cpi->rd_costs.bmode_costs;
cpi->mb.inter_bmode_costs = cpi->rd_costs.inter_bmode_costs;
cpi->mb.token_costs = cpi->rd_costs.token_costs;
return cpi;
}

View File

@ -693,6 +693,17 @@ typedef struct VP8_COMP
int mr_low_res_mb_cols;
#endif
struct rd_costs_struct
{
int mvcosts[2][MVvals+1];
int mvsadcosts[2][MVfpvals+1];
int mbmode_cost[2][MB_MODE_COUNT];
int intra_uv_mode_cost[2][MB_MODE_COUNT];
int bmode_costs[10][10][10];
int inter_bmode_costs[B_MODE_COUNT];
int token_costs[BLOCK_TYPES][COEF_BANDS]
[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
} rd_costs;
} VP8_COMP;
void control_data_rate(VP8_COMP *cpi);

View File

@ -132,7 +132,7 @@ static int pick_intra4x4block(
MACROBLOCK *x,
int ib,
B_PREDICTION_MODE *best_mode,
unsigned int *mode_costs,
const int *mode_costs,
int *bestrate,
int *bestdistortion)
@ -185,7 +185,7 @@ static int pick_intra4x4mby_modes
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int error;
int distortion = 0;
unsigned int *bmode_costs;
const int *bmode_costs;
intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);

View File

@ -235,7 +235,7 @@ void vp8_save_coding_context(VP8_COMP *cpi)
cc->frames_since_golden = cpi->common.frames_since_golden;
vp8_copy(cc->mvc, cpi->common.fc.mvc);
vp8_copy(cc->mvcosts, cpi->mb.mvcosts);
vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts);
vp8_copy(cc->ymode_prob, cpi->common.fc.ymode_prob);
vp8_copy(cc->uv_mode_prob, cpi->common.fc.uv_mode_prob);
@ -272,7 +272,7 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
vp8_copy(cpi->common.fc.mvc, cc->mvc);
vp8_copy(cpi->mb.mvcosts, cc->mvcosts);
vp8_copy(cpi->rd_costs.mvcosts, cc->mvcosts);
vp8_copy(cpi->common.fc.ymode_prob, cc->ymode_prob);
vp8_copy(cpi->common.fc.uv_mode_prob, cc->uv_mode_prob);

View File

@ -149,8 +149,8 @@ const int vp8_ref_frame_order[MAX_MODES] =
};
static void fill_token_costs(
unsigned int c [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS],
const vp8_prob p [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]
int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]
)
{
int i, j, k;
@ -159,21 +159,24 @@ static void fill_token_costs(
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < COEF_BANDS; j++)
for (k = 0; k < PREV_COEF_CONTEXTS; k++)
// check for pt=0 and band > 1 if block type 0 and 0 if blocktype 1
if(k==0 && j>(i==0) )
vp8_cost_tokens2((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree,2);
if (k == 0 && j > (i == 0))
vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2);
else
vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree);
vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree);
}
static int rd_iifactor [ 32 ] = { 4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
static const int rd_iifactor[32] =
{
4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0
};
/* values are now correlated to quantizer */
static int sad_per_bit16lut[QINDEX_RANGE] =
static const int sad_per_bit16lut[QINDEX_RANGE] =
{
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
@ -192,7 +195,7 @@ static int sad_per_bit16lut[QINDEX_RANGE] =
11, 11, 11, 11, 12, 12, 12, 12,
12, 12, 13, 13, 13, 13, 14, 14
};
static int sad_per_bit4lut[QINDEX_RANGE] =
static const int sad_per_bit4lut[QINDEX_RANGE] =
{
2, 2, 2, 2, 2, 2, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
@ -637,7 +640,7 @@ static int rd_pick_intra4x4block(
BLOCK *be,
BLOCKD *b,
B_PREDICTION_MODE *best_mode,
unsigned int *bmode_costs,
const int *bmode_costs,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
@ -717,7 +720,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
unsigned int *bmode_costs;
const int *bmode_costs;
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));