Merge changes from topic 'rm-dec-frame-parallel'

* changes:
  VP9_COMMON: rm frame_parallel_decode
  VP9Decoder: rm frame_parallel_decode
  vp9_dx: rm worker thread creation
This commit is contained in:
James Zern
2017-07-05 23:53:22 +00:00
committed by Gerrit Code Review
8 changed files with 26 additions and 142 deletions

View File

@@ -62,8 +62,7 @@ static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) {
cm->prev_seg_map_idx = 1;
cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
if (!cm->frame_parallel_decode)
cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
return 0;
}
@@ -77,10 +76,7 @@ static void free_seg_map(VP9_COMMON *cm) {
}
cm->current_frame_seg_map = NULL;
if (!cm->frame_parallel_decode) {
cm->last_frame_seg_map = NULL;
}
cm->last_frame_seg_map = NULL;
}
void vp9_free_ref_frame_buffers(BufferPool *pool) {
@@ -186,7 +182,7 @@ void vp9_remove_common(VP9_COMMON *cm) {
void vp9_init_context_buffers(VP9_COMMON *cm) {
cm->setup_mi(cm);
if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
if (cm->last_frame_seg_map)
memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
}

View File

@@ -428,7 +428,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
vp9_clearall_segfeatures(&cm->seg);
cm->seg.abs_delta = SEGMENT_DELTADATA;
if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
if (cm->last_frame_seg_map)
memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
if (cm->current_frame_seg_map)
@@ -457,7 +457,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
}
// prev_mip will only be allocated in encoder.
if (frame_is_intra_only(cm) && cm->prev_mip && !cm->frame_parallel_decode)
if (frame_is_intra_only(cm) && cm->prev_mip)
memset(cm->prev_mip, 0,
cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip));

View File

@@ -235,10 +235,6 @@ typedef struct VP9Common {
struct loopfilter lf;
struct segmentation seg;
// TODO(hkuang): Remove this as it is the same as frame_parallel_decode
// in pbi.
int frame_parallel_decode; // frame-based threading.
// Context probabilities for reference frame prediction
MV_REFERENCE_FRAME comp_fixed_ref;
MV_REFERENCE_FRAME comp_var_ref[2];

View File

@@ -490,8 +490,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
#endif // CONFIG_VP9_HIGHBITDEPTH
static void dec_build_inter_predictors(
VPxWorker *const worker, MACROBLOCKD *xd, int plane, int bw, int bh, int x,
int y, int w, int h, int mi_x, int mi_y, const InterpKernel *kernel,
MACROBLOCKD *xd, int plane, int bw, int bh, int x, int y, int w, int h,
int mi_x, int mi_y, const InterpKernel *kernel,
const struct scale_factors *sf, struct buf_2d *pre_buf,
struct buf_2d *dst_buf, const MV *mv, RefCntBuffer *ref_frame_buf,
int is_scaled, int ref) {
@@ -593,12 +593,6 @@ static void dec_build_inter_predictors(
y_pad = 1;
}
// Wait until reference block is ready. Pad 7 more pixels as last 7
// pixels of each superblock row can be changed by next superblock row.
if (worker != NULL)
vp9_frameworker_wait(worker, ref_frame_buf, VPXMAX(0, (y1 + 7))
<< (plane == 0 ? 0 : 1));
// Skip border extension if block is inside the frame.
if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 ||
y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
@@ -617,14 +611,6 @@ static void dec_build_inter_predictors(
w, h, ref, xs, ys);
return;
}
} else {
// Wait until reference block is ready. Pad 7 more pixels as last 7
// pixels of each superblock row can be changed by next superblock row.
if (worker != NULL) {
const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
vp9_frameworker_wait(worker, ref_frame_buf, VPXMAX(0, (y1 + 7))
<< (plane == 0 ? 0 : 1));
}
}
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -653,8 +639,6 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
const int is_compound = has_second_ref(mi);
int ref;
int is_scaled;
VPxWorker *const fwo =
pbi->frame_parallel_decode ? pbi->frame_worker_owner : NULL;
for (ref = 0; ref < 1 + is_compound; ++ref) {
const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
@@ -686,10 +670,10 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
for (y = 0; y < num_4x4_h; ++y) {
for (x = 0; x < num_4x4_w; ++x) {
const MV mv = average_split_mvs(pd, mi, ref, i++);
dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, 4 * x,
4 * y, 4, 4, mi_x, mi_y, kernel, sf,
pre_buf, dst_buf, &mv, ref_frame_buf,
is_scaled, ref);
dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 4 * x, 4 * y,
4, 4, mi_x, mi_y, kernel, sf, pre_buf,
dst_buf, &mv, ref_frame_buf, is_scaled,
ref);
}
}
}
@@ -703,7 +687,7 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
const int n4w_x4 = 4 * num_4x4_w;
const int n4h_x4 = 4 * num_4x4_h;
struct buf_2d *const pre_buf = &pd->pre[ref];
dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4,
dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4,
n4h_x4, mi_x, mi_y, kernel, sf, pre_buf,
dst_buf, &mv, ref_frame_buf, is_scaled, ref);
}
@@ -1473,11 +1457,6 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data,
winterface->execute(&pbi->lf_worker);
}
}
// After loopfiltering, the last 7 row pixels in each superblock row may
// still be changed by the longest loopfilter of the next superblock
// row.
if (pbi->frame_parallel_decode)
vp9_frameworker_broadcast(pbi->cur_buf, mi_row << MI_BLOCK_SIZE_LOG2);
}
}
@@ -1493,8 +1472,6 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data,
// Get last tile data.
tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1;
if (pbi->frame_parallel_decode)
vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX);
return vpx_reader_find_end(&tile_data->bit_reader);
}
@@ -1793,10 +1770,6 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
cm->lf.filter_level = 0;
cm->show_frame = 1;
if (pbi->frame_parallel_decode) {
for (i = 0; i < REF_FRAMES; ++i)
cm->next_ref_frame_map[i] = cm->ref_frame_map[i];
}
return 0;
}
@@ -2090,24 +2063,6 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data,
vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
}
// If encoded in frame parallel mode, frame context is ready after decoding
// the frame header.
if (pbi->frame_parallel_decode && cm->frame_parallel_decoding_mode) {
VPxWorker *const worker = pbi->frame_worker_owner;
FrameWorkerData *const frame_worker_data = worker->data1;
if (cm->refresh_frame_context) {
context_updated = 1;
cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
}
vp9_frameworker_lock_stats(worker);
pbi->cur_buf->row = -1;
pbi->cur_buf->col = -1;
frame_worker_data->frame_context_ready = 1;
// Signal the main thread that context is ready.
vp9_frameworker_signal_stats(worker);
vp9_frameworker_unlock_stats(worker);
}
if (pbi->tile_worker_data == NULL ||
(tile_cols * tile_rows) != pbi->total_tiles) {
const int num_tile_workers =

View File

@@ -455,12 +455,6 @@ static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv,
}
}
static void fpm_sync(void *const data, int mi_row) {
VP9Decoder *const pbi = (VP9Decoder *)data;
vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame,
mi_row << MI_BLOCK_SIZE_LOG2);
}
// This macro is used to add a motion vector mv_ref list if it isn't
// already in the list. If it's the second motion vector or early_break
// it will also skip all additional processing and jump to Done!
@@ -500,8 +494,7 @@ static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame,
const POSITION *const mv_ref_search,
int_mv *mv_ref_list, int mi_row, int mi_col,
int block, int is_sub8x8, find_mv_refs_sync sync,
void *const data) {
int block, int is_sub8x8) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
int different_ref_found = 0;
@@ -557,23 +550,8 @@ static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
}
}
// TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
// on windows platform. The sync here is unnecessary if use_prev_frame_mvs
// is 0. But after removing it, there will be hang in the unit test on windows
// due to several threads waiting for a thread's signal.
#if defined(_WIN32) && !HAVE_PTHREAD_H
if (cm->frame_parallel_decode && sync != NULL) {
sync(data, mi_row);
}
#endif
// Check the last frame's mode and mv info.
if (prev_frame_mvs) {
// Synchronize here for frame parallel decode if sync function is provided.
if (cm->frame_parallel_decode && sync != NULL) {
sync(data, mi_row);
}
if (prev_frame_mvs->ref_frame[0] == ref_frame) {
ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
} else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
@@ -652,7 +630,7 @@ static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
refmv_count =
dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search,
mv_list, mi_row, mi_col, block, 1, NULL, NULL);
mv_list, mi_row, mi_col, block, 1);
switch (block) {
case 0: best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; break;
@@ -750,9 +728,8 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
int refmv_count;
refmv_count =
dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, tmp_mvs,
mi_row, mi_col, -1, 0, fpm_sync, (void *)pbi);
refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search,
tmp_mvs, mi_row, mi_col, -1, 0);
dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref],
refmv_count);

View File

@@ -254,11 +254,9 @@ static void swap_frame_buffers(VP9Decoder *pbi) {
pbi->hold_ref_buf = 0;
cm->frame_to_show = get_frame_new_buffer(cm);
if (!pbi->frame_parallel_decode || !cm->show_frame) {
lock_buffer_pool(pool);
--frame_bufs[cm->new_fb_idx].ref_count;
unlock_buffer_pool(pool);
}
lock_buffer_pool(pool);
--frame_bufs[cm->new_fb_idx].ref_count;
unlock_buffer_pool(pool);
// Invalidate these references until the next frame starts.
for (ref_index = 0; ref_index < 3; ref_index++)
@@ -292,9 +290,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size,
pbi->ready_for_new_data = 0;
// Check if the previous frame was a frame without any references to it.
// Release frame buffer if not decoding in frame parallel mode.
if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 &&
frame_bufs[cm->new_fb_idx].ref_count == 0)
if (cm->new_fb_idx >= 0 && frame_bufs[cm->new_fb_idx].ref_count == 0)
pool->release_fb_cb(pool->cb_priv,
&frame_bufs[cm->new_fb_idx].raw_frame_buffer);
// Find a free frame buffer. Return error if can not find any.
@@ -309,18 +305,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size,
cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
pbi->hold_ref_buf = 0;
if (pbi->frame_parallel_decode) {
VPxWorker *const worker = pbi->frame_worker_owner;
vp9_frameworker_lock_stats(worker);
frame_bufs[cm->new_fb_idx].frame_worker_owner = worker;
// Reset decoding progress.
pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
pbi->cur_buf->row = -1;
pbi->cur_buf->col = -1;
vp9_frameworker_unlock_stats(worker);
} else {
pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
}
pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
if (setjmp(cm->error.jmp)) {
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
@@ -377,31 +362,14 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size,
if (!cm->show_existing_frame) {
cm->last_show_frame = cm->show_frame;
cm->prev_frame = cm->cur_frame;
if (cm->seg.enabled && !pbi->frame_parallel_decode)
vp9_swap_current_and_last_seg_map(cm);
if (cm->seg.enabled) vp9_swap_current_and_last_seg_map(cm);
}
// Update progress in frame parallel decode.
if (pbi->frame_parallel_decode) {
// Need to lock the mutex here as another thread may
// be accessing this buffer.
VPxWorker *const worker = pbi->frame_worker_owner;
FrameWorkerData *const frame_worker_data = worker->data1;
vp9_frameworker_lock_stats(worker);
if (cm->show_frame) {
cm->current_video_frame++;
}
frame_worker_data->frame_decoded = 1;
frame_worker_data->frame_context_ready = 1;
vp9_frameworker_signal_stats(worker);
vp9_frameworker_unlock_stats(worker);
} else {
cm->last_width = cm->width;
cm->last_height = cm->height;
if (cm->show_frame) {
cm->current_video_frame++;
}
cm->last_width = cm->width;
cm->last_height = cm->height;
if (cm->show_frame) {
cm->current_video_frame++;
}
cm->error.setjmp = 0;

View File

@@ -53,8 +53,6 @@ typedef struct VP9Decoder {
int refresh_frame_flags;
int frame_parallel_decode; // frame-based threading.
// TODO(hkuang): Combine this with cur_buf in macroblockd as they are
// the same.
RefCntBuffer *cur_buf; // Current decoding frame buffer.

View File

@@ -347,13 +347,7 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
frame_worker_data->pbi->max_threads = ctx->cfg.threads;
frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order;
frame_worker_data->pbi->frame_parallel_decode = 0;
frame_worker_data->pbi->common.frame_parallel_decode = 0;
worker->hook = (VPxWorkerHook)frame_worker_hook;
if (!winterface->reset(worker)) {
set_error_detail(ctx, "Frame Worker thread creation failed");
return VPX_CODEC_MEM_ERROR;
}
}
// If postprocessing was enabled by the application and a