Merge changes from topic 'rm-dec-frame-parallel'

* changes:
  VP9_COMMON: rm frame_parallel_decode
  VP9Decoder: rm frame_parallel_decode
  vp9_dx: rm worker thread creation
This commit is contained in:
James Zern
2017-07-05 23:53:22 +00:00
committed by Gerrit Code Review
8 changed files with 26 additions and 142 deletions

View File

@@ -62,8 +62,7 @@ static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) {
cm->prev_seg_map_idx = 1; cm->prev_seg_map_idx = 1;
cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
if (!cm->frame_parallel_decode) cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
return 0; return 0;
} }
@@ -77,10 +76,7 @@ static void free_seg_map(VP9_COMMON *cm) {
} }
cm->current_frame_seg_map = NULL; cm->current_frame_seg_map = NULL;
cm->last_frame_seg_map = NULL;
if (!cm->frame_parallel_decode) {
cm->last_frame_seg_map = NULL;
}
} }
void vp9_free_ref_frame_buffers(BufferPool *pool) { void vp9_free_ref_frame_buffers(BufferPool *pool) {
@@ -186,7 +182,7 @@ void vp9_remove_common(VP9_COMMON *cm) {
void vp9_init_context_buffers(VP9_COMMON *cm) { void vp9_init_context_buffers(VP9_COMMON *cm) {
cm->setup_mi(cm); cm->setup_mi(cm);
if (cm->last_frame_seg_map && !cm->frame_parallel_decode) if (cm->last_frame_seg_map)
memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols); memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
} }

View File

@@ -428,7 +428,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
vp9_clearall_segfeatures(&cm->seg); vp9_clearall_segfeatures(&cm->seg);
cm->seg.abs_delta = SEGMENT_DELTADATA; cm->seg.abs_delta = SEGMENT_DELTADATA;
if (cm->last_frame_seg_map && !cm->frame_parallel_decode) if (cm->last_frame_seg_map)
memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
if (cm->current_frame_seg_map) if (cm->current_frame_seg_map)
@@ -457,7 +457,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
} }
// prev_mip will only be allocated in encoder. // prev_mip will only be allocated in encoder.
if (frame_is_intra_only(cm) && cm->prev_mip && !cm->frame_parallel_decode) if (frame_is_intra_only(cm) && cm->prev_mip)
memset(cm->prev_mip, 0, memset(cm->prev_mip, 0,
cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip)); cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip));

View File

@@ -235,10 +235,6 @@ typedef struct VP9Common {
struct loopfilter lf; struct loopfilter lf;
struct segmentation seg; struct segmentation seg;
// TODO(hkuang): Remove this as it is the same as frame_parallel_decode
// in pbi.
int frame_parallel_decode; // frame-based threading.
// Context probabilities for reference frame prediction // Context probabilities for reference frame prediction
MV_REFERENCE_FRAME comp_fixed_ref; MV_REFERENCE_FRAME comp_fixed_ref;
MV_REFERENCE_FRAME comp_var_ref[2]; MV_REFERENCE_FRAME comp_var_ref[2];

View File

@@ -490,8 +490,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
static void dec_build_inter_predictors( static void dec_build_inter_predictors(
VPxWorker *const worker, MACROBLOCKD *xd, int plane, int bw, int bh, int x, MACROBLOCKD *xd, int plane, int bw, int bh, int x, int y, int w, int h,
int y, int w, int h, int mi_x, int mi_y, const InterpKernel *kernel, int mi_x, int mi_y, const InterpKernel *kernel,
const struct scale_factors *sf, struct buf_2d *pre_buf, const struct scale_factors *sf, struct buf_2d *pre_buf,
struct buf_2d *dst_buf, const MV *mv, RefCntBuffer *ref_frame_buf, struct buf_2d *dst_buf, const MV *mv, RefCntBuffer *ref_frame_buf,
int is_scaled, int ref) { int is_scaled, int ref) {
@@ -593,12 +593,6 @@ static void dec_build_inter_predictors(
y_pad = 1; y_pad = 1;
} }
// Wait until reference block is ready. Pad 7 more pixels as last 7
// pixels of each superblock row can be changed by next superblock row.
if (worker != NULL)
vp9_frameworker_wait(worker, ref_frame_buf, VPXMAX(0, (y1 + 7))
<< (plane == 0 ? 0 : 1));
// Skip border extension if block is inside the frame. // Skip border extension if block is inside the frame.
if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 || if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 ||
y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) { y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
@@ -617,14 +611,6 @@ static void dec_build_inter_predictors(
w, h, ref, xs, ys); w, h, ref, xs, ys);
return; return;
} }
} else {
// Wait until reference block is ready. Pad 7 more pixels as last 7
// pixels of each superblock row can be changed by next superblock row.
if (worker != NULL) {
const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
vp9_frameworker_wait(worker, ref_frame_buf, VPXMAX(0, (y1 + 7))
<< (plane == 0 ? 0 : 1));
}
} }
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -653,8 +639,6 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
const int is_compound = has_second_ref(mi); const int is_compound = has_second_ref(mi);
int ref; int ref;
int is_scaled; int is_scaled;
VPxWorker *const fwo =
pbi->frame_parallel_decode ? pbi->frame_worker_owner : NULL;
for (ref = 0; ref < 1 + is_compound; ++ref) { for (ref = 0; ref < 1 + is_compound; ++ref) {
const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
@@ -686,10 +670,10 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
for (y = 0; y < num_4x4_h; ++y) { for (y = 0; y < num_4x4_h; ++y) {
for (x = 0; x < num_4x4_w; ++x) { for (x = 0; x < num_4x4_w; ++x) {
const MV mv = average_split_mvs(pd, mi, ref, i++); const MV mv = average_split_mvs(pd, mi, ref, i++);
dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, 4 * x, dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 4 * x, 4 * y,
4 * y, 4, 4, mi_x, mi_y, kernel, sf, 4, 4, mi_x, mi_y, kernel, sf, pre_buf,
pre_buf, dst_buf, &mv, ref_frame_buf, dst_buf, &mv, ref_frame_buf, is_scaled,
is_scaled, ref); ref);
} }
} }
} }
@@ -703,7 +687,7 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
const int n4w_x4 = 4 * num_4x4_w; const int n4w_x4 = 4 * num_4x4_w;
const int n4h_x4 = 4 * num_4x4_h; const int n4h_x4 = 4 * num_4x4_h;
struct buf_2d *const pre_buf = &pd->pre[ref]; struct buf_2d *const pre_buf = &pd->pre[ref];
dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4, dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4,
n4h_x4, mi_x, mi_y, kernel, sf, pre_buf, n4h_x4, mi_x, mi_y, kernel, sf, pre_buf,
dst_buf, &mv, ref_frame_buf, is_scaled, ref); dst_buf, &mv, ref_frame_buf, is_scaled, ref);
} }
@@ -1473,11 +1457,6 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data,
winterface->execute(&pbi->lf_worker); winterface->execute(&pbi->lf_worker);
} }
} }
// After loopfiltering, the last 7 row pixels in each superblock row may
// still be changed by the longest loopfilter of the next superblock
// row.
if (pbi->frame_parallel_decode)
vp9_frameworker_broadcast(pbi->cur_buf, mi_row << MI_BLOCK_SIZE_LOG2);
} }
} }
@@ -1493,8 +1472,6 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data,
// Get last tile data. // Get last tile data.
tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1; tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1;
if (pbi->frame_parallel_decode)
vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX);
return vpx_reader_find_end(&tile_data->bit_reader); return vpx_reader_find_end(&tile_data->bit_reader);
} }
@@ -1793,10 +1770,6 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
cm->lf.filter_level = 0; cm->lf.filter_level = 0;
cm->show_frame = 1; cm->show_frame = 1;
if (pbi->frame_parallel_decode) {
for (i = 0; i < REF_FRAMES; ++i)
cm->next_ref_frame_map[i] = cm->ref_frame_map[i];
}
return 0; return 0;
} }
@@ -2090,24 +2063,6 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data,
vp9_loop_filter_frame_init(cm, cm->lf.filter_level); vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
} }
// If encoded in frame parallel mode, frame context is ready after decoding
// the frame header.
if (pbi->frame_parallel_decode && cm->frame_parallel_decoding_mode) {
VPxWorker *const worker = pbi->frame_worker_owner;
FrameWorkerData *const frame_worker_data = worker->data1;
if (cm->refresh_frame_context) {
context_updated = 1;
cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
}
vp9_frameworker_lock_stats(worker);
pbi->cur_buf->row = -1;
pbi->cur_buf->col = -1;
frame_worker_data->frame_context_ready = 1;
// Signal the main thread that context is ready.
vp9_frameworker_signal_stats(worker);
vp9_frameworker_unlock_stats(worker);
}
if (pbi->tile_worker_data == NULL || if (pbi->tile_worker_data == NULL ||
(tile_cols * tile_rows) != pbi->total_tiles) { (tile_cols * tile_rows) != pbi->total_tiles) {
const int num_tile_workers = const int num_tile_workers =

View File

@@ -455,12 +455,6 @@ static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv,
} }
} }
static void fpm_sync(void *const data, int mi_row) {
VP9Decoder *const pbi = (VP9Decoder *)data;
vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame,
mi_row << MI_BLOCK_SIZE_LOG2);
}
// This macro is used to add a motion vector mv_ref list if it isn't // This macro is used to add a motion vector mv_ref list if it isn't
// already in the list. If it's the second motion vector or early_break // already in the list. If it's the second motion vector or early_break
// it will also skip all additional processing and jump to Done! // it will also skip all additional processing and jump to Done!
@@ -500,8 +494,7 @@ static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame,
const POSITION *const mv_ref_search, const POSITION *const mv_ref_search,
int_mv *mv_ref_list, int mi_row, int mi_col, int_mv *mv_ref_list, int mi_row, int mi_col,
int block, int is_sub8x8, find_mv_refs_sync sync, int block, int is_sub8x8) {
void *const data) {
const int *ref_sign_bias = cm->ref_frame_sign_bias; const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0; int i, refmv_count = 0;
int different_ref_found = 0; int different_ref_found = 0;
@@ -557,23 +550,8 @@ static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
} }
} }
// TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
// on windows platform. The sync here is unnecessary if use_prev_frame_mvs
// is 0. But after removing it, there will be hang in the unit test on windows
// due to several threads waiting for a thread's signal.
#if defined(_WIN32) && !HAVE_PTHREAD_H
if (cm->frame_parallel_decode && sync != NULL) {
sync(data, mi_row);
}
#endif
// Check the last frame's mode and mv info. // Check the last frame's mode and mv info.
if (prev_frame_mvs) { if (prev_frame_mvs) {
// Synchronize here for frame parallel decode if sync function is provided.
if (cm->frame_parallel_decode && sync != NULL) {
sync(data, mi_row);
}
if (prev_frame_mvs->ref_frame[0] == ref_frame) { if (prev_frame_mvs->ref_frame[0] == ref_frame) {
ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
} else if (prev_frame_mvs->ref_frame[1] == ref_frame) { } else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
@@ -652,7 +630,7 @@ static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
refmv_count = refmv_count =
dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search, dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search,
mv_list, mi_row, mi_col, block, 1, NULL, NULL); mv_list, mi_row, mi_col, block, 1);
switch (block) { switch (block) {
case 0: best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; break; case 0: best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; break;
@@ -750,9 +728,8 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
int refmv_count; int refmv_count;
refmv_count = refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search,
dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, tmp_mvs, tmp_mvs, mi_row, mi_col, -1, 0);
mi_row, mi_col, -1, 0, fpm_sync, (void *)pbi);
dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref], dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref],
refmv_count); refmv_count);

View File

@@ -254,11 +254,9 @@ static void swap_frame_buffers(VP9Decoder *pbi) {
pbi->hold_ref_buf = 0; pbi->hold_ref_buf = 0;
cm->frame_to_show = get_frame_new_buffer(cm); cm->frame_to_show = get_frame_new_buffer(cm);
if (!pbi->frame_parallel_decode || !cm->show_frame) { lock_buffer_pool(pool);
lock_buffer_pool(pool); --frame_bufs[cm->new_fb_idx].ref_count;
--frame_bufs[cm->new_fb_idx].ref_count; unlock_buffer_pool(pool);
unlock_buffer_pool(pool);
}
// Invalidate these references until the next frame starts. // Invalidate these references until the next frame starts.
for (ref_index = 0; ref_index < 3; ref_index++) for (ref_index = 0; ref_index < 3; ref_index++)
@@ -292,9 +290,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size,
pbi->ready_for_new_data = 0; pbi->ready_for_new_data = 0;
// Check if the previous frame was a frame without any references to it. // Check if the previous frame was a frame without any references to it.
// Release frame buffer if not decoding in frame parallel mode. if (cm->new_fb_idx >= 0 && frame_bufs[cm->new_fb_idx].ref_count == 0)
if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 &&
frame_bufs[cm->new_fb_idx].ref_count == 0)
pool->release_fb_cb(pool->cb_priv, pool->release_fb_cb(pool->cb_priv,
&frame_bufs[cm->new_fb_idx].raw_frame_buffer); &frame_bufs[cm->new_fb_idx].raw_frame_buffer);
// Find a free frame buffer. Return error if can not find any. // Find a free frame buffer. Return error if can not find any.
@@ -309,18 +305,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size,
cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
pbi->hold_ref_buf = 0; pbi->hold_ref_buf = 0;
if (pbi->frame_parallel_decode) { pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
VPxWorker *const worker = pbi->frame_worker_owner;
vp9_frameworker_lock_stats(worker);
frame_bufs[cm->new_fb_idx].frame_worker_owner = worker;
// Reset decoding progress.
pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
pbi->cur_buf->row = -1;
pbi->cur_buf->col = -1;
vp9_frameworker_unlock_stats(worker);
} else {
pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
}
if (setjmp(cm->error.jmp)) { if (setjmp(cm->error.jmp)) {
const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
@@ -377,31 +362,14 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size,
if (!cm->show_existing_frame) { if (!cm->show_existing_frame) {
cm->last_show_frame = cm->show_frame; cm->last_show_frame = cm->show_frame;
cm->prev_frame = cm->cur_frame; cm->prev_frame = cm->cur_frame;
if (cm->seg.enabled && !pbi->frame_parallel_decode) if (cm->seg.enabled) vp9_swap_current_and_last_seg_map(cm);
vp9_swap_current_and_last_seg_map(cm);
} }
// Update progress in frame parallel decode. // Update progress in frame parallel decode.
if (pbi->frame_parallel_decode) { cm->last_width = cm->width;
// Need to lock the mutex here as another thread may cm->last_height = cm->height;
// be accessing this buffer. if (cm->show_frame) {
VPxWorker *const worker = pbi->frame_worker_owner; cm->current_video_frame++;
FrameWorkerData *const frame_worker_data = worker->data1;
vp9_frameworker_lock_stats(worker);
if (cm->show_frame) {
cm->current_video_frame++;
}
frame_worker_data->frame_decoded = 1;
frame_worker_data->frame_context_ready = 1;
vp9_frameworker_signal_stats(worker);
vp9_frameworker_unlock_stats(worker);
} else {
cm->last_width = cm->width;
cm->last_height = cm->height;
if (cm->show_frame) {
cm->current_video_frame++;
}
} }
cm->error.setjmp = 0; cm->error.setjmp = 0;

View File

@@ -53,8 +53,6 @@ typedef struct VP9Decoder {
int refresh_frame_flags; int refresh_frame_flags;
int frame_parallel_decode; // frame-based threading.
// TODO(hkuang): Combine this with cur_buf in macroblockd as they are // TODO(hkuang): Combine this with cur_buf in macroblockd as they are
// the same. // the same.
RefCntBuffer *cur_buf; // Current decoding frame buffer. RefCntBuffer *cur_buf; // Current decoding frame buffer.

View File

@@ -347,13 +347,7 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
frame_worker_data->pbi->max_threads = ctx->cfg.threads; frame_worker_data->pbi->max_threads = ctx->cfg.threads;
frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order; frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order;
frame_worker_data->pbi->frame_parallel_decode = 0;
frame_worker_data->pbi->common.frame_parallel_decode = 0;
worker->hook = (VPxWorkerHook)frame_worker_hook; worker->hook = (VPxWorkerHook)frame_worker_hook;
if (!winterface->reset(worker)) {
set_error_detail(ctx, "Frame Worker thread creation failed");
return VPX_CODEC_MEM_ERROR;
}
} }
// If postprocessing was enabled by the application and a // If postprocessing was enabled by the application and a