Merge changes from topic 'tile-thread-cleanup'

* changes:
  vp9/decode_tiles_mt: move frame count accum from loop
  VP9Decoder: remove duplicate tile_worker_info
  vp9/decode_tiles_mt: move some inits from inner loop
  vp9_accumulate_frame_counts: pass counts directly
This commit is contained in:
James Zern 2015-09-17 22:00:23 +00:00 committed by Gerrit Code Review
commit 9d8decc162
6 changed files with 45 additions and 48 deletions

View File

@ -318,21 +318,21 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) {
}
// Accumulate frame counts.
void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts,
int is_dec) {
void vp9_accumulate_frame_counts(FRAME_COUNTS *accum,
const FRAME_COUNTS *counts, int is_dec) {
int i, j, k, l, m;
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
for (j = 0; j < INTRA_MODES; j++)
cm->counts.y_mode[i][j] += counts->y_mode[i][j];
accum->y_mode[i][j] += counts->y_mode[i][j];
for (i = 0; i < INTRA_MODES; i++)
for (j = 0; j < INTRA_MODES; j++)
cm->counts.uv_mode[i][j] += counts->uv_mode[i][j];
accum->uv_mode[i][j] += counts->uv_mode[i][j];
for (i = 0; i < PARTITION_CONTEXTS; i++)
for (j = 0; j < PARTITION_TYPES; j++)
cm->counts.partition[i][j] += counts->partition[i][j];
accum->partition[i][j] += counts->partition[i][j];
if (is_dec) {
int n;
@ -341,10 +341,10 @@ void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts,
for (k = 0; k < REF_TYPES; k++)
for (l = 0; l < COEF_BANDS; l++)
for (m = 0; m < COEFF_CONTEXTS; m++) {
cm->counts.eob_branch[i][j][k][l][m] +=
accum->eob_branch[i][j][k][l][m] +=
counts->eob_branch[i][j][k][l][m];
for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
cm->counts.coef[i][j][k][l][m][n] +=
accum->coef[i][j][k][l][m][n] +=
counts->coef[i][j][k][l][m][n];
}
} else {
@ -353,64 +353,64 @@ void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts,
for (k = 0; k < REF_TYPES; k++)
for (l = 0; l < COEF_BANDS; l++)
for (m = 0; m < COEFF_CONTEXTS; m++)
cm->counts.eob_branch[i][j][k][l][m] +=
accum->eob_branch[i][j][k][l][m] +=
counts->eob_branch[i][j][k][l][m];
// In the encoder, cm->counts.coef is only updated at frame
// In the encoder, coef is only updated at frame
// level, so not need to accumulate it here.
// for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
// cm->counts.coef[i][j][k][l][m][n] +=
// accum->coef[i][j][k][l][m][n] +=
// counts->coef[i][j][k][l][m][n];
}
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
for (j = 0; j < SWITCHABLE_FILTERS; j++)
cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j];
accum->switchable_interp[i][j] += counts->switchable_interp[i][j];
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
for (j = 0; j < INTER_MODES; j++)
cm->counts.inter_mode[i][j] += counts->inter_mode[i][j];
accum->inter_mode[i][j] += counts->inter_mode[i][j];
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
for (j = 0; j < 2; j++)
cm->counts.intra_inter[i][j] += counts->intra_inter[i][j];
accum->intra_inter[i][j] += counts->intra_inter[i][j];
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
for (j = 0; j < 2; j++)
cm->counts.comp_inter[i][j] += counts->comp_inter[i][j];
accum->comp_inter[i][j] += counts->comp_inter[i][j];
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < 2; j++)
for (k = 0; k < 2; k++)
cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k];
accum->single_ref[i][j][k] += counts->single_ref[i][j][k];
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < 2; j++)
cm->counts.comp_ref[i][j] += counts->comp_ref[i][j];
accum->comp_ref[i][j] += counts->comp_ref[i][j];
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
for (j = 0; j < TX_SIZES; j++)
cm->counts.tx.p32x32[i][j] += counts->tx.p32x32[i][j];
accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j];
for (j = 0; j < TX_SIZES - 1; j++)
cm->counts.tx.p16x16[i][j] += counts->tx.p16x16[i][j];
accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j];
for (j = 0; j < TX_SIZES - 2; j++)
cm->counts.tx.p8x8[i][j] += counts->tx.p8x8[i][j];
accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j];
}
for (i = 0; i < TX_SIZES; i++)
cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i];
accum->tx.tx_totals[i] += counts->tx.tx_totals[i];
for (i = 0; i < SKIP_CONTEXTS; i++)
for (j = 0; j < 2; j++)
cm->counts.skip[i][j] += counts->skip[i][j];
accum->skip[i][j] += counts->skip[i][j];
for (i = 0; i < MV_JOINTS; i++)
cm->counts.mv.joints[i] += counts->mv.joints[i];
accum->mv.joints[i] += counts->mv.joints[i];
for (k = 0; k < 2; k++) {
nmv_component_counts *comps = &cm->counts.mv.comps[k];
nmv_component_counts *comps_t = &counts->mv.comps[k];
nmv_component_counts *const comps = &accum->mv.comps[k];
const nmv_component_counts *const comps_t = &counts->mv.comps[k];
for (i = 0; i < 2; i++) {
comps->sign[i] += comps_t->sign[i];

View File

@ -55,8 +55,8 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
VPxWorker *workers, int num_workers,
VP9LfSync *lf_sync);
void vp9_accumulate_frame_counts(struct VP9Common *cm,
struct FRAME_COUNTS *counts, int is_dec);
void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
const struct FRAME_COUNTS *counts, int is_dec);
#ifdef __cplusplus
} // extern "C"

View File

@ -1563,9 +1563,10 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
return vpx_reader_find_end(&tile_data->bit_reader);
}
static int tile_worker_hook(TileWorkerData *const tile_data,
const TileInfo *const tile) {
static int tile_worker_hook(TileWorkerData *const tile_data, void *unused) {
const TileInfo *const tile = &tile_data->xd.tile;
int mi_row, mi_col;
(void)unused;
if (setjmp(tile_data->error_info.jmp)) {
tile_data->error_info.setjmp = 0;
@ -1628,8 +1629,6 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
CHECK_MEM_ERROR(cm, pbi->tile_worker_data,
vpx_memalign(32, num_threads *
sizeof(*pbi->tile_worker_data)));
CHECK_MEM_ERROR(cm, pbi->tile_worker_info,
vpx_malloc(num_threads * sizeof(*pbi->tile_worker_info)));
for (i = 0; i < num_threads; ++i) {
VPxWorker *const worker = &pbi->tile_workers[i];
++pbi->num_tile_workers;
@ -1645,10 +1644,15 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
// Reset tile decoding hook
for (n = 0; n < num_workers; ++n) {
VPxWorker *const worker = &pbi->tile_workers[n];
TileWorkerData *const tile_data = &pbi->tile_worker_data[n];
winterface->sync(worker);
tile_data->pbi = pbi;
tile_data->xd = pbi->mb;
tile_data->xd.counts =
cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts;
worker->hook = (VPxWorkerHook)tile_worker_hook;
worker->data1 = &pbi->tile_worker_data[n];
worker->data2 = &pbi->tile_worker_info[n];
worker->data1 = tile_data;
worker->data2 = NULL;
}
// Note: this memset assumes above_context[0], [1] and [2]
@ -1698,16 +1702,10 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
for (i = 0; i < num_workers && n < tile_cols; ++i) {
VPxWorker *const worker = &pbi->tile_workers[i];
TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
TileInfo *const tile = (TileInfo*)worker->data2;
TileBuffer *const buf = &tile_buffers[0][n];
tile_data->pbi = pbi;
tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
0 : &tile_data->counts;
vp9_zero(tile_data->dqcoeff);
vp9_tile_init(tile, cm, 0, buf->col);
vp9_tile_init(&tile_data->xd.tile, cm, 0, buf->col);
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
&tile_data->bit_reader, pbi->decrypt_cb,
@ -1742,14 +1740,15 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader);
final_worker = -1;
}
}
// Accumulate thread frame counts.
if (n >= tile_cols && !cm->frame_parallel_decoding_mode) {
for (i = 0; i < num_workers; ++i) {
TileWorkerData *const tile_data =
(TileWorkerData*)pbi->tile_workers[i].data1;
vp9_accumulate_frame_counts(cm, &tile_data->counts, 1);
}
// Accumulate thread frame counts.
if (!cm->frame_parallel_decoding_mode) {
int i;
for (i = 0; i < num_workers; ++i) {
TileWorkerData *const tile_data =
(TileWorkerData*)pbi->tile_workers[i].data1;
vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1);
}
}

View File

@ -134,7 +134,6 @@ void vp9_decoder_remove(VP9Decoder *pbi) {
vpx_get_worker_interface()->end(worker);
}
vpx_free(pbi->tile_worker_data);
vpx_free(pbi->tile_worker_info);
vpx_free(pbi->tile_workers);
if (pbi->num_tile_workers > 0) {

View File

@ -65,7 +65,6 @@ typedef struct VP9Decoder {
VPxWorker lf_worker;
VPxWorker *tile_workers;
TileWorkerData *tile_worker_data;
TileInfo *tile_worker_info;
int num_tile_workers;
TileData *tile_data;

View File

@ -192,7 +192,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
// Accumulate counters.
if (i < cpi->num_workers - 1) {
vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
accumulate_rd_opt(&cpi->td, thread_data->td);
}
}