vpx/vp9/decoder/vp9_onyxd_int.h
Yunqing Wang 903801f1ef vp9 decoder: row-based multi-threaded loopfilter
Implemented parallel loopfiltering, which uses existing tile-
decoding threads. Each thread works on one row, and when that row
is loopfiltered, it moves to next unattended row. To ensure the
correct filtering order, threads are synchronized and one
superblock is filtered only if the superblocks it depends on are
filtered already.

To reduce synchronization overhead and speed up the decoder, we use
nsync > 1 for high resolution.

Performance tests:
1. on desktop:
8-tile 4k video using 8 threads, speedup: 70% - 80%
4-tile HD video using 4 threads, speedup: ~35%
2. on mobile device(Nexus 7):
4-tile 1080p video using 4 threads, speedup: 18% - 25%
4-tile 1080p video using 2 threads, speedup: 10% - 15%

Change-Id: If54b4a11960dd706c22d5ad145ad94156031f36a
2014-01-31 14:44:53 -08:00

68 lines
1.6 KiB
C

/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_DECODER_VP9_ONYXD_INT_H_
#define VP9_DECODER_VP9_ONYXD_INT_H_
#include "./vpx_config.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/decoder/vp9_dthread.h"
#include "vp9/decoder/vp9_onyxd.h"
#include "vp9/decoder/vp9_thread.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct VP9Decompressor {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
DECLARE_ALIGNED(16, VP9_COMMON, common);
DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
VP9D_CONFIG oxcf;
const uint8_t *source;
size_t source_sz;
int64_t last_time_stamp;
int ready_for_new_data;
int refresh_frame_flags;
int decoded_key_frame;
int initial_width;
int initial_height;
int do_loopfilter_inline; // apply loopfilter to available rows immediately
VP9Worker lf_worker;
VP9Worker *tile_workers;
int num_tile_workers;
VP9LfSync lf_row_sync;
/* Each tile column has its own MODE_INFO stream. This array indexes them by
tile column index. */
MODE_INFO **mi_streams;
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
PARTITION_CONTEXT *above_seg_context;
} VP9D_COMP;
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP9_DECODER_VP9_ONYXD_INT_H_