vpx/vp9/decoder/vp9_dthread.c
hkuang bde04ce503 Merge branch 'frame-parallel' to enable frame parallel decode in master branch.
In frame parallel decode, libvpx decoder decodes several frames on all
cpus in parallel fashion. If not being flushed, it will only return frame
when all the cpus are busy. If getting flushed, it will return all the
frames in the decoder. Compare with current serial decode mode in which
libvpx decoder is idle between decode calls, libvpx decoder is busy
between decode calls. VP9 frame parallel decode is >30% faster than serial
decode with tile parallel threading which will makes devices play 1080P
VP9 videos more easily.

* frame-parallel:
  Add error handling for frame parallel decode and unit test for that.
  Fix a bug in frame parallel decode and add a unit test for that.
  Add two test vectors to test frame parallel decode.
  Add key frame seeking to webmdec and webm_video_source.
  Implement frame parallel decode for VP9.
  Increase the thread test range to cover 5, 6, 7, 8 threads.
  Fix a bug in adding frame parallel unit test.
  Add VP9 frame-parallel unit test.
  Manually pick "Make the api behavior conform to api spec." from master branch.
  Move vp9_dec_build_inter_predictors_* to decoder folder.
  Add segmentation map array for current and last frame segmentation.
  Include the right header for VP9 worker thread.
  Move vp9_thread.* to common.
  ctrl_get_reference does not need user_priv.
  Seperate the frame buffers from VP9 encoder/decoder structure.
  Revert "Revert "Revert "Revert 3 patches from Hangyu to get Chrome to build:"""

 Conflicts:
       test/codec_factory.h
       test/decode_test_driver.cc
       test/decode_test_driver.h
       test/invalid_file_test.cc
       test/test-data.sha1
       test/test.mk
       test/test_vectors.cc
       vp8/vp8_dx_iface.c
       vp9/common/vp9_alloccommon.c
       vp9/common/vp9_entropymode.c
       vp9/common/vp9_loopfilter_thread.c
       vp9/common/vp9_loopfilter_thread.h
       vp9/common/vp9_mvref_common.c
       vp9/common/vp9_onyxc_int.h
       vp9/common/vp9_reconinter.c
       vp9/decoder/vp9_decodeframe.c
       vp9/decoder/vp9_decodeframe.h
       vp9/decoder/vp9_decodemv.c
       vp9/decoder/vp9_decoder.c
       vp9/decoder/vp9_decoder.h
       vp9/encoder/vp9_encoder.c
       vp9/encoder/vp9_pickmode.c
       vp9/encoder/vp9_rdopt.c
       vp9/vp9_cx_iface.c
       vp9/vp9_dx_iface.c

Change-Id: Ib92eb35851c172d0624970e312ed515054e5ca64
2015-01-22 18:18:53 -08:00

188 lines
6.6 KiB
C

/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vpx_config.h"
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/decoder/vp9_dthread.h"
#include "vp9/decoder/vp9_decoder.h"
// TODO(hkuang): Clean up all the #ifdef in this file.
void vp9_frameworker_lock_stats(VP9Worker *const worker) {
#if CONFIG_MULTITHREAD
FrameWorkerData *const worker_data = worker->data1;
pthread_mutex_lock(&worker_data->stats_mutex);
#else
(void)worker;
#endif
}
void vp9_frameworker_unlock_stats(VP9Worker *const worker) {
#if CONFIG_MULTITHREAD
FrameWorkerData *const worker_data = worker->data1;
pthread_mutex_unlock(&worker_data->stats_mutex);
#else
(void)worker;
#endif
}
void vp9_frameworker_signal_stats(VP9Worker *const worker) {
#if CONFIG_MULTITHREAD
FrameWorkerData *const worker_data = worker->data1;
// TODO(hkuang): Investigate using broadcast or signal.
pthread_cond_signal(&worker_data->stats_cond);
#else
(void)worker;
#endif
}
// TODO(hkuang): Remove worker parameter as it is only used in debug code.
void vp9_frameworker_wait(VP9Worker *const worker, RefCntBuffer *const ref_buf,
int row) {
#if CONFIG_MULTITHREAD
if (!ref_buf)
return;
// Enabling the following line of code will get harmless tsan error but
// will get best performance.
// if (ref_buf->row >= row && ref_buf->buf.corrupted != 1) return;
{
// Find the worker thread that owns the reference frame. If the reference
// frame has been fully decoded, it may not have owner.
VP9Worker *const ref_worker = ref_buf->frame_worker_owner;
FrameWorkerData *const ref_worker_data =
(FrameWorkerData *)ref_worker->data1;
const VP9Decoder *const pbi = ref_worker_data->pbi;
#ifdef DEBUG_THREAD
{
FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
printf("%d %p worker is waiting for %d %p worker (%d) ref %d \r\n",
worker_data->worker_id, worker, ref_worker_data->worker_id,
ref_buf->frame_worker_owner, row, ref_buf->row);
}
#endif
vp9_frameworker_lock_stats(ref_worker);
while (ref_buf->row < row && pbi->cur_buf == ref_buf &&
ref_buf->buf.corrupted != 1) {
pthread_cond_wait(&ref_worker_data->stats_cond,
&ref_worker_data->stats_mutex);
}
if (ref_buf->buf.corrupted == 1) {
FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
vp9_frameworker_unlock_stats(ref_worker);
vpx_internal_error(&worker_data->pbi->common.error,
VPX_CODEC_CORRUPT_FRAME,
"Worker %p failed to decode frame", worker);
}
vp9_frameworker_unlock_stats(ref_worker);
}
#else
(void)worker;
(void)ref_buf;
(void)row;
(void)ref_buf;
#endif // CONFIG_MULTITHREAD
}
void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row) {
#if CONFIG_MULTITHREAD
VP9Worker *worker = buf->frame_worker_owner;
#ifdef DEBUG_THREAD
{
FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id,
buf->frame_worker_owner, row);
}
#endif
vp9_frameworker_lock_stats(worker);
buf->row = row;
vp9_frameworker_signal_stats(worker);
vp9_frameworker_unlock_stats(worker);
#else
(void)buf;
(void)row;
#endif // CONFIG_MULTITHREAD
}
void vp9_frameworker_copy_context(VP9Worker *const dst_worker,
VP9Worker *const src_worker) {
#if CONFIG_MULTITHREAD
FrameWorkerData *const src_worker_data = (FrameWorkerData *)src_worker->data1;
FrameWorkerData *const dst_worker_data = (FrameWorkerData *)dst_worker->data1;
VP9_COMMON *const src_cm = &src_worker_data->pbi->common;
VP9_COMMON *const dst_cm = &dst_worker_data->pbi->common;
int i;
// Wait until source frame's context is ready.
vp9_frameworker_lock_stats(src_worker);
while (!src_worker_data->frame_context_ready) {
pthread_cond_wait(&src_worker_data->stats_cond,
&src_worker_data->stats_mutex);
}
// src worker may have already finished decoding a frame and swapped the mi.
// TODO(hkuang): Remove following code after implenment no ModeInfo decoding.
if (src_worker_data->frame_decoded) {
dst_cm->prev_mip = src_cm->prev_mip;
dst_cm->prev_mi = src_cm->prev_mi;
dst_cm->last_frame_seg_map = src_cm->last_frame_seg_map;
} else {
dst_cm->prev_mip = src_cm->mip;
dst_cm->prev_mi = src_cm->mi;
dst_cm->last_frame_seg_map = src_cm->current_frame_seg_map;
}
dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync;
vp9_frameworker_unlock_stats(src_worker);
dst_worker_data->pbi->prev_buf =
src_worker_data->pbi->common.show_existing_frame ?
NULL : src_worker_data->pbi->cur_buf;
dst_cm->last_width = !src_cm->show_existing_frame ?
src_cm->width : src_cm->last_width;
dst_cm->last_height = !src_cm->show_existing_frame ?
src_cm->height : src_cm->last_height;
dst_cm->display_width = src_cm->display_width;
dst_cm->display_height = src_cm->display_height;
dst_cm->subsampling_x = src_cm->subsampling_x;
dst_cm->subsampling_y = src_cm->subsampling_y;
dst_cm->last_show_frame = !src_cm->show_existing_frame ?
src_cm->show_frame : src_cm->last_show_frame;
dst_cm->last_frame_type = src_cm->last_frame_type;
dst_cm->frame_type = src_cm->frame_type;
dst_cm->y_dc_delta_q = src_cm->y_dc_delta_q;
dst_cm->uv_dc_delta_q = src_cm->uv_dc_delta_q;
dst_cm->uv_ac_delta_q = src_cm->uv_ac_delta_q;
dst_cm->base_qindex = src_cm->base_qindex;
for (i = 0; i < REF_FRAMES; ++i)
dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i];
memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr,
(MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh));
dst_cm->lf.last_sharpness_level = src_cm->lf.sharpness_level;
dst_cm->lf.filter_level = src_cm->lf.filter_level;
memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, MAX_REF_LF_DELTAS);
memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
dst_cm->seg = src_cm->seg;
memcpy(dst_cm->frame_contexts, src_cm->frame_contexts,
FRAME_CONTEXTS * sizeof(dst_cm->frame_contexts[0]));
#else
(void) dst_worker;
(void) src_worker;
#endif // CONFIG_MULTITHREAD
}