Merge changes from topic 'tile-thread-cleanup'
* changes: vp9/decode_tiles_mt: move frame count accum from loop VP9Decoder: remove duplicate tile_worker_info vp9/decode_tiles_mt: move some inits from inner loop vp9_accumulate_frame_counts: pass counts directly
This commit is contained in:
commit
9d8decc162
@ -318,21 +318,21 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) {
|
||||
}
|
||||
|
||||
// Accumulate frame counts.
|
||||
void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts,
|
||||
int is_dec) {
|
||||
void vp9_accumulate_frame_counts(FRAME_COUNTS *accum,
|
||||
const FRAME_COUNTS *counts, int is_dec) {
|
||||
int i, j, k, l, m;
|
||||
|
||||
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
|
||||
for (j = 0; j < INTRA_MODES; j++)
|
||||
cm->counts.y_mode[i][j] += counts->y_mode[i][j];
|
||||
accum->y_mode[i][j] += counts->y_mode[i][j];
|
||||
|
||||
for (i = 0; i < INTRA_MODES; i++)
|
||||
for (j = 0; j < INTRA_MODES; j++)
|
||||
cm->counts.uv_mode[i][j] += counts->uv_mode[i][j];
|
||||
accum->uv_mode[i][j] += counts->uv_mode[i][j];
|
||||
|
||||
for (i = 0; i < PARTITION_CONTEXTS; i++)
|
||||
for (j = 0; j < PARTITION_TYPES; j++)
|
||||
cm->counts.partition[i][j] += counts->partition[i][j];
|
||||
accum->partition[i][j] += counts->partition[i][j];
|
||||
|
||||
if (is_dec) {
|
||||
int n;
|
||||
@ -341,10 +341,10 @@ void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts,
|
||||
for (k = 0; k < REF_TYPES; k++)
|
||||
for (l = 0; l < COEF_BANDS; l++)
|
||||
for (m = 0; m < COEFF_CONTEXTS; m++) {
|
||||
cm->counts.eob_branch[i][j][k][l][m] +=
|
||||
accum->eob_branch[i][j][k][l][m] +=
|
||||
counts->eob_branch[i][j][k][l][m];
|
||||
for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
|
||||
cm->counts.coef[i][j][k][l][m][n] +=
|
||||
accum->coef[i][j][k][l][m][n] +=
|
||||
counts->coef[i][j][k][l][m][n];
|
||||
}
|
||||
} else {
|
||||
@ -353,64 +353,64 @@ void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts,
|
||||
for (k = 0; k < REF_TYPES; k++)
|
||||
for (l = 0; l < COEF_BANDS; l++)
|
||||
for (m = 0; m < COEFF_CONTEXTS; m++)
|
||||
cm->counts.eob_branch[i][j][k][l][m] +=
|
||||
accum->eob_branch[i][j][k][l][m] +=
|
||||
counts->eob_branch[i][j][k][l][m];
|
||||
// In the encoder, cm->counts.coef is only updated at frame
|
||||
// In the encoder, coef is only updated at frame
|
||||
// level, so not need to accumulate it here.
|
||||
// for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
|
||||
// cm->counts.coef[i][j][k][l][m][n] +=
|
||||
// accum->coef[i][j][k][l][m][n] +=
|
||||
// counts->coef[i][j][k][l][m][n];
|
||||
}
|
||||
|
||||
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
|
||||
for (j = 0; j < SWITCHABLE_FILTERS; j++)
|
||||
cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j];
|
||||
accum->switchable_interp[i][j] += counts->switchable_interp[i][j];
|
||||
|
||||
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
|
||||
for (j = 0; j < INTER_MODES; j++)
|
||||
cm->counts.inter_mode[i][j] += counts->inter_mode[i][j];
|
||||
accum->inter_mode[i][j] += counts->inter_mode[i][j];
|
||||
|
||||
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
|
||||
for (j = 0; j < 2; j++)
|
||||
cm->counts.intra_inter[i][j] += counts->intra_inter[i][j];
|
||||
accum->intra_inter[i][j] += counts->intra_inter[i][j];
|
||||
|
||||
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
|
||||
for (j = 0; j < 2; j++)
|
||||
cm->counts.comp_inter[i][j] += counts->comp_inter[i][j];
|
||||
accum->comp_inter[i][j] += counts->comp_inter[i][j];
|
||||
|
||||
for (i = 0; i < REF_CONTEXTS; i++)
|
||||
for (j = 0; j < 2; j++)
|
||||
for (k = 0; k < 2; k++)
|
||||
cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k];
|
||||
accum->single_ref[i][j][k] += counts->single_ref[i][j][k];
|
||||
|
||||
for (i = 0; i < REF_CONTEXTS; i++)
|
||||
for (j = 0; j < 2; j++)
|
||||
cm->counts.comp_ref[i][j] += counts->comp_ref[i][j];
|
||||
accum->comp_ref[i][j] += counts->comp_ref[i][j];
|
||||
|
||||
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
|
||||
for (j = 0; j < TX_SIZES; j++)
|
||||
cm->counts.tx.p32x32[i][j] += counts->tx.p32x32[i][j];
|
||||
accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j];
|
||||
|
||||
for (j = 0; j < TX_SIZES - 1; j++)
|
||||
cm->counts.tx.p16x16[i][j] += counts->tx.p16x16[i][j];
|
||||
accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j];
|
||||
|
||||
for (j = 0; j < TX_SIZES - 2; j++)
|
||||
cm->counts.tx.p8x8[i][j] += counts->tx.p8x8[i][j];
|
||||
accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j];
|
||||
}
|
||||
|
||||
for (i = 0; i < TX_SIZES; i++)
|
||||
cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i];
|
||||
accum->tx.tx_totals[i] += counts->tx.tx_totals[i];
|
||||
|
||||
for (i = 0; i < SKIP_CONTEXTS; i++)
|
||||
for (j = 0; j < 2; j++)
|
||||
cm->counts.skip[i][j] += counts->skip[i][j];
|
||||
accum->skip[i][j] += counts->skip[i][j];
|
||||
|
||||
for (i = 0; i < MV_JOINTS; i++)
|
||||
cm->counts.mv.joints[i] += counts->mv.joints[i];
|
||||
accum->mv.joints[i] += counts->mv.joints[i];
|
||||
|
||||
for (k = 0; k < 2; k++) {
|
||||
nmv_component_counts *comps = &cm->counts.mv.comps[k];
|
||||
nmv_component_counts *comps_t = &counts->mv.comps[k];
|
||||
nmv_component_counts *const comps = &accum->mv.comps[k];
|
||||
const nmv_component_counts *const comps_t = &counts->mv.comps[k];
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
comps->sign[i] += comps_t->sign[i];
|
||||
|
@ -55,8 +55,8 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
|
||||
VPxWorker *workers, int num_workers,
|
||||
VP9LfSync *lf_sync);
|
||||
|
||||
void vp9_accumulate_frame_counts(struct VP9Common *cm,
|
||||
struct FRAME_COUNTS *counts, int is_dec);
|
||||
void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
|
||||
const struct FRAME_COUNTS *counts, int is_dec);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
@ -1563,9 +1563,10 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
|
||||
return vpx_reader_find_end(&tile_data->bit_reader);
|
||||
}
|
||||
|
||||
static int tile_worker_hook(TileWorkerData *const tile_data,
|
||||
const TileInfo *const tile) {
|
||||
static int tile_worker_hook(TileWorkerData *const tile_data, void *unused) {
|
||||
const TileInfo *const tile = &tile_data->xd.tile;
|
||||
int mi_row, mi_col;
|
||||
(void)unused;
|
||||
|
||||
if (setjmp(tile_data->error_info.jmp)) {
|
||||
tile_data->error_info.setjmp = 0;
|
||||
@ -1628,8 +1629,6 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
|
||||
CHECK_MEM_ERROR(cm, pbi->tile_worker_data,
|
||||
vpx_memalign(32, num_threads *
|
||||
sizeof(*pbi->tile_worker_data)));
|
||||
CHECK_MEM_ERROR(cm, pbi->tile_worker_info,
|
||||
vpx_malloc(num_threads * sizeof(*pbi->tile_worker_info)));
|
||||
for (i = 0; i < num_threads; ++i) {
|
||||
VPxWorker *const worker = &pbi->tile_workers[i];
|
||||
++pbi->num_tile_workers;
|
||||
@ -1645,10 +1644,15 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
|
||||
// Reset tile decoding hook
|
||||
for (n = 0; n < num_workers; ++n) {
|
||||
VPxWorker *const worker = &pbi->tile_workers[n];
|
||||
TileWorkerData *const tile_data = &pbi->tile_worker_data[n];
|
||||
winterface->sync(worker);
|
||||
tile_data->pbi = pbi;
|
||||
tile_data->xd = pbi->mb;
|
||||
tile_data->xd.counts =
|
||||
cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts;
|
||||
worker->hook = (VPxWorkerHook)tile_worker_hook;
|
||||
worker->data1 = &pbi->tile_worker_data[n];
|
||||
worker->data2 = &pbi->tile_worker_info[n];
|
||||
worker->data1 = tile_data;
|
||||
worker->data2 = NULL;
|
||||
}
|
||||
|
||||
// Note: this memset assumes above_context[0], [1] and [2]
|
||||
@ -1698,16 +1702,10 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
|
||||
for (i = 0; i < num_workers && n < tile_cols; ++i) {
|
||||
VPxWorker *const worker = &pbi->tile_workers[i];
|
||||
TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
|
||||
TileInfo *const tile = (TileInfo*)worker->data2;
|
||||
TileBuffer *const buf = &tile_buffers[0][n];
|
||||
|
||||
tile_data->pbi = pbi;
|
||||
tile_data->xd = pbi->mb;
|
||||
tile_data->xd.corrupted = 0;
|
||||
tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
|
||||
0 : &tile_data->counts;
|
||||
vp9_zero(tile_data->dqcoeff);
|
||||
vp9_tile_init(tile, cm, 0, buf->col);
|
||||
vp9_tile_init(&tile_data->xd.tile, cm, 0, buf->col);
|
||||
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
|
||||
&tile_data->bit_reader, pbi->decrypt_cb,
|
||||
@ -1742,14 +1740,15 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
|
||||
bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader);
|
||||
final_worker = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Accumulate thread frame counts.
|
||||
if (n >= tile_cols && !cm->frame_parallel_decoding_mode) {
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
TileWorkerData *const tile_data =
|
||||
(TileWorkerData*)pbi->tile_workers[i].data1;
|
||||
vp9_accumulate_frame_counts(cm, &tile_data->counts, 1);
|
||||
}
|
||||
// Accumulate thread frame counts.
|
||||
if (!cm->frame_parallel_decoding_mode) {
|
||||
int i;
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
TileWorkerData *const tile_data =
|
||||
(TileWorkerData*)pbi->tile_workers[i].data1;
|
||||
vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -134,7 +134,6 @@ void vp9_decoder_remove(VP9Decoder *pbi) {
|
||||
vpx_get_worker_interface()->end(worker);
|
||||
}
|
||||
vpx_free(pbi->tile_worker_data);
|
||||
vpx_free(pbi->tile_worker_info);
|
||||
vpx_free(pbi->tile_workers);
|
||||
|
||||
if (pbi->num_tile_workers > 0) {
|
||||
|
@ -65,7 +65,6 @@ typedef struct VP9Decoder {
|
||||
VPxWorker lf_worker;
|
||||
VPxWorker *tile_workers;
|
||||
TileWorkerData *tile_worker_data;
|
||||
TileInfo *tile_worker_info;
|
||||
int num_tile_workers;
|
||||
|
||||
TileData *tile_data;
|
||||
|
@ -192,7 +192,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
|
||||
|
||||
// Accumulate counters.
|
||||
if (i < cpi->num_workers - 1) {
|
||||
vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
|
||||
vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
|
||||
accumulate_rd_opt(&cpi->td, thread_data->td);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user