vp9: add multi-threaded tile decoder
tiles are decoded in parallel within a single frame Change-Id: I7aca87cb1c239b74eceef72bdc9f672faebac373
This commit is contained in:
parent
6b00202f1b
commit
fb484524bd
@ -8,16 +8,19 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp9/decoder/vp9_thread.h"
|
||||
#include <string>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/codec_factory.h"
|
||||
#include "test/decode_test_driver.h"
|
||||
#include "test/md5_helper.h"
|
||||
#include "test/webm_video_source.h"
|
||||
#include "vp9/decoder/vp9_thread.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using std::string;
|
||||
|
||||
class VP9WorkerThreadTest : public ::testing::TestWithParam<bool> {
|
||||
protected:
|
||||
virtual ~VP9WorkerThreadTest() {}
|
||||
@ -91,19 +94,26 @@ TEST_P(VP9WorkerThreadTest, HookFailure) {
|
||||
EXPECT_FALSE(worker_.had_error);
|
||||
}
|
||||
|
||||
TEST(VP9DecodeMTTest, MTDecode) {
|
||||
libvpx_test::WebMVideoSource video("vp90-2-03-size-226x226.webm");
|
||||
// -----------------------------------------------------------------------------
|
||||
// Multi-threaded decode tests
|
||||
|
||||
// Decodes |filename| with |num_threads|. Returns the md5 of the decoded frames.
|
||||
string DecodeFile(const string& filename, int num_threads) {
|
||||
libvpx_test::WebMVideoSource video(filename);
|
||||
video.Init();
|
||||
|
||||
vpx_codec_dec_cfg_t cfg = {0};
|
||||
cfg.threads = 2;
|
||||
cfg.threads = num_threads;
|
||||
libvpx_test::VP9Decoder decoder(cfg, 0);
|
||||
|
||||
libvpx_test::MD5 md5;
|
||||
for (video.Begin(); video.cxdata(); video.Next()) {
|
||||
const vpx_codec_err_t res =
|
||||
decoder.DecodeFrame(video.cxdata(), video.frame_size());
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
|
||||
if (res != VPX_CODEC_OK) {
|
||||
EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
|
||||
break;
|
||||
}
|
||||
|
||||
libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
|
||||
const vpx_image_t *img = NULL;
|
||||
@ -113,7 +123,32 @@ TEST(VP9DecodeMTTest, MTDecode) {
|
||||
md5.Add(img);
|
||||
}
|
||||
}
|
||||
EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", md5.Get());
|
||||
return string(md5.Get());
|
||||
}
|
||||
|
||||
TEST(VP9DecodeMTTest, MTDecode) {
|
||||
// no tiles or frame parallel; this exercises loop filter threading.
|
||||
EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc",
|
||||
DecodeFile("vp90-2-03-size-226x226.webm", 2).c_str());
|
||||
}
|
||||
|
||||
TEST(VP9DecodeMTTest, MTDecode2) {
|
||||
static const struct {
|
||||
const char *name;
|
||||
const char *expected_md5;
|
||||
} files[] = {
|
||||
{ "vp90-2-08-tile_1x2_frame_parallel.webm",
|
||||
"68ede6abd66bae0a2edf2eb9232241b6" },
|
||||
{ "vp90-2-08-tile_1x4_frame_parallel.webm",
|
||||
"368ebc6ebf3a5e478d85b2c3149b2848" },
|
||||
};
|
||||
|
||||
for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) {
|
||||
for (int t = 2; t <= 4; ++t) {
|
||||
EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str())
|
||||
<< "threads = " << t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Synchronous, VP9WorkerThreadTest, ::testing::Bool());
|
||||
|
@ -37,6 +37,12 @@
|
||||
#include "vp9/decoder/vp9_thread.h"
|
||||
#include "vp9/decoder/vp9_treereader.h"
|
||||
|
||||
typedef struct TileWorkerData {
|
||||
VP9_COMMON *cm;
|
||||
vp9_reader bit_reader;
|
||||
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
|
||||
} TileWorkerData;
|
||||
|
||||
static int read_be32(const uint8_t *p) {
|
||||
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
|
||||
}
|
||||
@ -917,6 +923,106 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
|
||||
return vp9_reader_find_end(&residual_bc);
|
||||
}
|
||||
|
||||
static int tile_worker_hook(void *arg1, void *arg2) {
|
||||
TileWorkerData *const tile_data = (TileWorkerData*)arg1;
|
||||
const TileInfo *const tile = (TileInfo*)arg2;
|
||||
int mi_row, mi_col;
|
||||
|
||||
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
|
||||
mi_row += MI_BLOCK_SIZE) {
|
||||
vp9_zero(tile_data->xd.left_context);
|
||||
vp9_zero(tile_data->xd.left_seg_context);
|
||||
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
|
||||
mi_col += MI_BLOCK_SIZE)
|
||||
decode_modes_sb(tile_data->cm, &tile_data->xd, tile,
|
||||
mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64, 0);
|
||||
}
|
||||
return !tile_data->xd.corrupted;
|
||||
}
|
||||
|
||||
static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
|
||||
VP9_COMMON *const cm = &pbi->common;
|
||||
const uint8_t *const data_end = pbi->source + pbi->source_sz;
|
||||
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
|
||||
const int tile_cols = 1 << cm->log2_tile_cols;
|
||||
const int tile_rows = 1 << cm->log2_tile_rows;
|
||||
const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols);
|
||||
int tile_col = 0;
|
||||
|
||||
assert(tile_rows == 1);
|
||||
(void)tile_rows;
|
||||
|
||||
if (num_workers > pbi->num_tile_workers) {
|
||||
int i;
|
||||
CHECK_MEM_ERROR(cm, pbi->tile_workers,
|
||||
vpx_realloc(pbi->tile_workers,
|
||||
num_workers * sizeof(*pbi->tile_workers)));
|
||||
for (i = pbi->num_tile_workers; i < num_workers; ++i) {
|
||||
VP9Worker *const worker = &pbi->tile_workers[i];
|
||||
++pbi->num_tile_workers;
|
||||
|
||||
vp9_worker_init(worker);
|
||||
worker->hook = (VP9WorkerHook)tile_worker_hook;
|
||||
CHECK_MEM_ERROR(cm, worker->data1, vpx_malloc(sizeof(TileWorkerData)));
|
||||
CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
|
||||
if (i < num_workers - 1 && !vp9_worker_reset(worker)) {
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
|
||||
"Tile decoder thread creation failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Note: this memset assumes above_context[0], [1] and [2]
|
||||
// are allocated as part of the same buffer.
|
||||
vpx_memset(pbi->above_context[0], 0,
|
||||
sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
|
||||
2 * aligned_mi_cols);
|
||||
vpx_memset(pbi->above_seg_context, 0,
|
||||
sizeof(*pbi->above_seg_context) * aligned_mi_cols);
|
||||
|
||||
while (tile_col < tile_cols) {
|
||||
int i;
|
||||
for (i = 0; i < num_workers && tile_col < tile_cols; ++i) {
|
||||
VP9Worker *const worker = &pbi->tile_workers[i];
|
||||
TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
|
||||
TileInfo *const tile = (TileInfo*)worker->data2;
|
||||
const size_t size =
|
||||
get_tile(data_end, tile_col == tile_cols - 1, &cm->error, &data);
|
||||
|
||||
tile_data->cm = cm;
|
||||
tile_data->xd = pbi->mb;
|
||||
tile_data->xd.corrupted = 0;
|
||||
vp9_tile_init(tile, tile_data->cm, 0, tile_col);
|
||||
|
||||
setup_token_decoder(data, data_end, size, &cm->error,
|
||||
&tile_data->bit_reader);
|
||||
setup_tile_context(pbi, &tile_data->xd, tile_col);
|
||||
|
||||
worker->had_error = 0;
|
||||
if (i == num_workers - 1 || tile_col == tile_cols - 1) {
|
||||
vp9_worker_execute(worker);
|
||||
} else {
|
||||
vp9_worker_launch(worker);
|
||||
}
|
||||
|
||||
data += size;
|
||||
++tile_col;
|
||||
}
|
||||
|
||||
for (; i > 0; --i) {
|
||||
VP9Worker *const worker = &pbi->tile_workers[i - 1];
|
||||
pbi->mb.corrupted |= !vp9_worker_sync(worker);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const int final_worker = (tile_cols + num_workers - 1) % num_workers;
|
||||
TileWorkerData *const tile_data =
|
||||
(TileWorkerData*)pbi->tile_workers[final_worker].data1;
|
||||
return vp9_reader_find_end(&tile_data->bit_reader);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_sync_code(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
|
||||
if (vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_0 ||
|
||||
vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_1 ||
|
||||
@ -1157,6 +1263,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
|
||||
struct vp9_read_bit_buffer rb = { data, data_end, 0, cm, error_handler };
|
||||
const size_t first_partition_size = read_uncompressed_header(pbi, &rb);
|
||||
const int keyframe = cm->frame_type == KEY_FRAME;
|
||||
const int tile_rows = 1 << cm->log2_tile_rows;
|
||||
const int tile_cols = 1 << cm->log2_tile_cols;
|
||||
YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
|
||||
|
||||
@ -1208,7 +1315,14 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
|
||||
xd->corrupted = 0;
|
||||
new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
|
||||
|
||||
*p_data_end = decode_tiles(pbi, data + first_partition_size);
|
||||
// TODO(jzern): remove frame_parallel_decoding_mode restriction for
|
||||
// single-frame tile decoding.
|
||||
if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
|
||||
cm->frame_parallel_decoding_mode) {
|
||||
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size);
|
||||
} else {
|
||||
*p_data_end = decode_tiles(pbi, data + first_partition_size);
|
||||
}
|
||||
|
||||
cm->last_width = cm->width;
|
||||
cm->last_height = cm->height;
|
||||
|
@ -147,6 +147,7 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
|
||||
}
|
||||
|
||||
void vp9_remove_decompressor(VP9D_PTR ptr) {
|
||||
int i;
|
||||
VP9D_COMP *const pbi = (VP9D_COMP *)ptr;
|
||||
|
||||
if (!pbi)
|
||||
@ -155,6 +156,13 @@ void vp9_remove_decompressor(VP9D_PTR ptr) {
|
||||
vp9_remove_common(&pbi->common);
|
||||
vp9_worker_end(&pbi->lf_worker);
|
||||
vpx_free(pbi->lf_worker.data1);
|
||||
for (i = 0; i < pbi->num_tile_workers; ++i) {
|
||||
VP9Worker *const worker = &pbi->tile_workers[i];
|
||||
vp9_worker_end(worker);
|
||||
vpx_free(worker->data1);
|
||||
vpx_free(worker->data2);
|
||||
}
|
||||
vpx_free(pbi->tile_workers);
|
||||
vpx_free(pbi->mi_streams);
|
||||
vpx_free(pbi->above_context[0]);
|
||||
vpx_free(pbi->above_seg_context);
|
||||
|
@ -40,6 +40,9 @@ typedef struct VP9Decompressor {
|
||||
int do_loopfilter_inline; // apply loopfilter to available rows immediately
|
||||
VP9Worker lf_worker;
|
||||
|
||||
VP9Worker *tile_workers;
|
||||
int num_tile_workers;
|
||||
|
||||
/* Each tile column has its own MODE_INFO stream. This array indexes them by
|
||||
tile column index. */
|
||||
MODE_INFO **mi_streams;
|
||||
|
Loading…
Reference in New Issue
Block a user