diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 7fc573333..01f7b6284 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -1460,7 +1460,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, TileBuffer tile_buffers[4][1 << 6]; int tile_row, tile_col; int mi_row, mi_col; - TileData *tile_data = NULL; + TileWorkerData *tile_data = NULL; if (cm->lf.filter_level && !cm->skip_loop_filter && pbi->lf_worker.data1 == NULL) { @@ -1496,28 +1496,17 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); - if (pbi->tile_data == NULL || - (tile_cols * tile_rows) != pbi->total_tiles) { - vpx_free(pbi->tile_data); - CHECK_MEM_ERROR( - cm, - pbi->tile_data, - vpx_memalign(32, tile_cols * tile_rows * (sizeof(*pbi->tile_data)))); - pbi->total_tiles = tile_rows * tile_cols; - } - // Load all tile information into tile_data. for (tile_row = 0; tile_row < tile_rows; ++tile_row) { for (tile_col = 0; tile_col < tile_cols; ++tile_col) { const TileBuffer *const buf = &tile_buffers[tile_row][tile_col]; - tile_data = pbi->tile_data + tile_cols * tile_row + tile_col; - tile_data->cm = cm; + tile_data = pbi->tile_worker_data + tile_cols * tile_row + tile_col; tile_data->xd = pbi->mb; tile_data->xd.corrupted = 0; - tile_data->xd.counts = cm->frame_parallel_decoding_mode ? - NULL : &cm->counts; + tile_data->xd.counts = + cm->frame_parallel_decoding_mode ? NULL : &cm->counts; vp9_zero(tile_data->dqcoeff); - vp9_tile_init(&tile_data->xd.tile, tile_data->cm, tile_row, tile_col); + vp9_tile_init(&tile_data->xd.tile, cm, tile_row, tile_col); setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &tile_data->bit_reader, pbi->decrypt_cb, pbi->decrypt_state); @@ -1533,8 +1522,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, for (tile_col = 0; tile_col < tile_cols; ++tile_col) { const int col = pbi->inv_tile_order ? tile_cols - tile_col - 1 : tile_col; - tile_data = pbi->tile_data + tile_cols * tile_row + col; - vp9_tile_set_col(&tile, tile_data->cm, col); + tile_data = pbi->tile_worker_data + tile_cols * tile_row + col; + vp9_tile_set_col(&tile, cm, col); vp9_zero(tile_data->xd.left_context); vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; @@ -1586,7 +1575,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, } // Get last tile data. - tile_data = pbi->tile_data + tile_cols * tile_rows - 1; + tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1; if (pbi->frame_parallel_decode) vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX); @@ -1671,12 +1660,6 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const int num_threads = pbi->max_threads; CHECK_MEM_ERROR(cm, pbi->tile_workers, vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); - // Ensure tile data offsets will be properly aligned. This may fail on - // platforms without DECLARE_ALIGNED(). - assert((sizeof(*pbi->tile_worker_data) % 16) == 0); - CHECK_MEM_ERROR(cm, pbi->tile_worker_data, - vpx_memalign(32, num_threads * - sizeof(*pbi->tile_worker_data))); for (n = 0; n < num_threads; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; ++pbi->num_tile_workers; @@ -1692,7 +1675,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, // Reset tile decoding hook for (n = 0; n < num_workers; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; - TileWorkerData *const tile_data = &pbi->tile_worker_data[n]; + TileWorkerData *const tile_data = + &pbi->tile_worker_data[n + pbi->total_tiles]; winterface->sync(worker); tile_data->xd = pbi->mb; tile_data->xd.counts = @@ -2221,6 +2205,19 @@ void vp9_decode_frame(VP9Decoder *pbi, vp9_frameworker_unlock_stats(worker); } + if (pbi->tile_worker_data == NULL || + (tile_cols * tile_rows) != pbi->total_tiles) { + const int num_tile_workers = tile_cols * tile_rows + + ((pbi->max_threads > 1) ? pbi->max_threads : 0); + const size_t twd_size = num_tile_workers * sizeof(*pbi->tile_worker_data); + // Ensure tile data offsets will be properly aligned. This may fail on + // platforms without DECLARE_ALIGNED(). + assert((sizeof(*pbi->tile_worker_data) % 16) == 0); + vpx_free(pbi->tile_worker_data); + CHECK_MEM_ERROR(cm, pbi->tile_worker_data, vpx_memalign(32, twd_size)); + pbi->total_tiles = tile_rows * tile_cols; + } + if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) { // Multi-threaded tile decoder *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index f5da07ea0..68dfabeaf 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -131,11 +131,12 @@ void vp9_decoder_remove(VP9Decoder *pbi) { vpx_get_worker_interface()->end(&pbi->lf_worker); vpx_free(pbi->lf_worker.data1); - vpx_free(pbi->tile_data); + for (i = 0; i < pbi->num_tile_workers; ++i) { VPxWorker *const worker = &pbi->tile_workers[i]; vpx_get_worker_interface()->end(worker); } + vpx_free(pbi->tile_worker_data); vpx_free(pbi->tile_workers); diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index afa400941..7111a36d3 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -27,15 +27,6 @@ extern "C" { #endif -// TODO(hkuang): combine this with TileWorkerData. -typedef struct TileData { - VP9_COMMON *cm; - vpx_reader bit_reader; - DECLARE_ALIGNED(16, MACROBLOCKD, xd); - /* dqcoeff are shared by all the planes. So planes must be decoded serially */ - DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); -} TileData; - typedef struct TileBuffer { const uint8_t *data; size_t size; @@ -74,8 +65,6 @@ typedef struct VP9Decoder { TileWorkerData *tile_worker_data; TileBuffer tile_buffers[64]; int num_tile_workers; - - TileData *tile_data; int total_tiles; VP9LfSync lf_row_sync;