vp9/decoder: threaded row-based loop filter

Currently the only threaded option for vp9 decode. Enabled when the
decoder config thread count is > 1.

Change-Id: I082959abac9e31aa4a38ed9fd68b94680e57f4df
This commit is contained in:
James Zern 2013-07-31 16:15:10 -07:00
parent 183b77d5ab
commit a0ffa2794b
6 changed files with 93 additions and 4 deletions

View File

@ -11,6 +11,10 @@
#include "vp9/decoder/vp9_thread.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/decode_test_driver.h"
#include "test/md5_helper.h"
#include "test/webm_video_source.h"
namespace {
@ -77,4 +81,29 @@ TEST_F(VP9WorkerThreadTest, HookFailure) {
EXPECT_FALSE(worker_.had_error);
}
TEST(VP9DecodeMTTest, MTDecode) {
libvpx_test::WebMVideoSource video("vp90-2-03-size-226x226.webm");
video.Init();
vpx_codec_dec_cfg_t cfg = {0};
cfg.threads = 2;
libvpx_test::VP9Decoder decoder(cfg, 0);
libvpx_test::MD5 md5;
for (video.Begin(); video.cxdata(); video.Next()) {
const vpx_codec_err_t res =
decoder.DecodeFrame(video.cxdata(), video.frame_size());
ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
const vpx_image_t *img = NULL;
// Get decompressed data
while ((img = dec_iter.Next())) {
md5.Add(img);
}
}
EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", md5.Get());
}
} // namespace

View File

@ -376,3 +376,11 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd,
vp9_loop_filter_rows(cm->frame_to_show, cm, xd,
0, cm->mi_rows, y_only);
}
int vp9_loop_filter_worker(void *arg1, void *arg2) {
LFWorkerData *const lf_data = (LFWorkerData*)arg1;
(void)arg2;
vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
lf_data->start, lf_data->stop, lf_data->y_only);
return 1;
}

View File

@ -64,4 +64,18 @@ void vp9_loop_filter_frame(struct VP9Common *cm,
void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
struct VP9Common *cm, struct macroblockd *xd,
int start, int stop, int y_only);
typedef struct LoopFilterWorkerData {
const YV12_BUFFER_CONFIG *frame_buffer;
struct VP9Common *cm;
struct macroblockd xd; // TODO(jzern): most of this is unnecessary to the
// loopfilter. the planes are necessary as their state
// is changed during decode.
int start;
int stop;
int y_only;
} LFWorkerData;
// Operates on the rows described by LFWorkerData passed as 'arg1'.
int vp9_loop_filter_worker(void *arg1, void *arg2);
#endif // VP9_COMMON_VP9_LOOPFILTER_H_

View File

@ -34,6 +34,7 @@
#include "vp9/decoder/vp9_idct_blk.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
#include "vp9/decoder/vp9_thread.h"
#include "vp9/decoder/vp9_treereader.h"
static int read_be32(const uint8_t *p) {
@ -585,10 +586,18 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi,
}
static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
const int num_threads = pbi->oxcf.max_threads;
VP9_COMMON *const pc = &pbi->common;
int mi_row, mi_col;
if (pbi->do_loopfilter_inline) {
if (num_threads > 1) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
lf_data->frame_buffer = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
lf_data->cm = pc;
lf_data->xd = pbi->mb;
lf_data->y_only = 0;
}
vp9_loop_filter_frame_init(pc, &pbi->mb, pbi->mb.lf.filter_level);
}
@ -603,17 +612,33 @@ static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) {
}
if (pbi->do_loopfilter_inline) {
YV12_BUFFER_CONFIG *const fb =
&pbi->common.yv12_fb[pbi->common.new_fb_idx];
// delay the loopfilter by 1 macroblock row.
const int lf_start = mi_row - MI_BLOCK_SIZE;
if (lf_start < 0) continue;
vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0);
if (num_threads > 1) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
vp9_worker_sync(&pbi->lf_worker);
lf_data->start = lf_start;
lf_data->stop = mi_row;
pbi->lf_worker.hook = vp9_loop_filter_worker;
vp9_worker_launch(&pbi->lf_worker);
} else {
YV12_BUFFER_CONFIG *const fb =
&pbi->common.yv12_fb[pbi->common.new_fb_idx];
vp9_loop_filter_rows(fb, pc, &pbi->mb, lf_start, mi_row, 0);
}
}
}
if (pbi->do_loopfilter_inline) {
YV12_BUFFER_CONFIG *const fb = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
if (num_threads > 1) {
// TODO(jzern): since the loop filter is delayed one mb row, this will be
// forced to wait for the last row scheduled in the for loop.
vp9_worker_sync(&pbi->lf_worker);
}
vp9_loop_filter_rows(fb, pc, &pbi->mb,
mi_row - MI_BLOCK_SIZE, pc->mi_rows, 0);
}

View File

@ -141,6 +141,16 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
pbi->common.error.setjmp = 0;
pbi->decoded_key_frame = 0;
if (pbi->oxcf.max_threads > 1) {
vp9_worker_init(&pbi->lf_worker);
pbi->lf_worker.data1 = vpx_malloc(sizeof(LFWorkerData));
pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
if (pbi->lf_worker.data1 == NULL || !vp9_worker_reset(&pbi->lf_worker)) {
vp9_remove_decompressor(pbi);
return NULL;
}
}
return pbi;
}
@ -154,6 +164,8 @@ void vp9_remove_decompressor(VP9D_PTR ptr) {
vpx_free(pbi->common.last_frame_seg_map);
vp9_remove_common(&pbi->common);
vp9_worker_end(&pbi->lf_worker);
vpx_free(pbi->lf_worker.data1);
vpx_free(pbi);
}

View File

@ -14,8 +14,8 @@
#include "./vpx_config.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/decoder/vp9_onyxd.h"
#include "vp9/decoder/vp9_thread.h"
typedef struct VP9Decompressor {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
@ -38,6 +38,7 @@ typedef struct VP9Decompressor {
int initial_height;
int do_loopfilter_inline; // apply loopfilter to available rows immediately
VP9Worker lf_worker;
} VP9D_COMP;
#endif // VP9_DECODER_VP9_TREEREADER_H_