From b77fffa127663028169c5ed543956af4b9496c29 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 13 Apr 2016 13:52:36 +0200 Subject: [PATCH] h264: make slice threading work with deblocking_filter=1 In such a case, decode the MBs in parallel without the loop filter, then execute the filter serially. The ref2frm array was previously moved to H264SliceContext. That was incorrect, since it applies to all the slices and should properly be in H264Context (it did not actually break decoding, since this distinction only becomes relevant with slice threading and deblocking_filter=1, which was not implemented before this commit). The ref2frm array is thus moved back to H264Context. --- libavcodec/h264.c | 5 ---- libavcodec/h264.h | 13 ++++----- libavcodec/h264_slice.c | 62 ++++++++++++++++++++++++++++------------- 3 files changed, 48 insertions(+), 32 deletions(-) diff --git a/libavcodec/h264.c b/libavcodec/h264.c index c024d7e03a..27cbcd21aa 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -837,7 +837,6 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size) nal->ref_idc == 0 && nal->type != NAL_SEI) continue; -again: // FIXME these should stop being context-global variables h->nal_ref_idc = nal->ref_idc; h->nal_unit_type = nal->type; @@ -947,10 +946,6 @@ again: if (err < 0) { av_log(h->avctx, AV_LOG_ERROR, "decode_slice_header error\n"); sl->ref_count[0] = sl->ref_count[1] = sl->list_count = 0; - } else if (err == 1) { - /* Slice could not be decoded in parallel mode, restart. */ - sl = &h->slice_ctx[0]; - goto again; } } if (context_count) { diff --git a/libavcodec/h264.h b/libavcodec/h264.h index daad1be152..5c2c8109b2 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -392,7 +392,6 @@ typedef struct H264SliceContext { H264Ref ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs. * Reordered version of default_ref_list * according to picture reordering in slice header */ - int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1 const uint8_t *intra_pcm_ptr; int16_t *dc_val_base; @@ -470,6 +469,11 @@ typedef struct H264Context { int context_initialized; int flags; int workaround_bugs; + /* Set when slice threading is used and at least one slice uses deblocking + * mode 1 (i.e. across slice boundaries). Then we disable the loop filter + * during normal MB decoding and execute it serially at the end. + */ + int postpone_filter; int8_t(*intra4x4_pred_mode); H264PredContext hpc; @@ -591,12 +595,6 @@ typedef struct H264Context { int slice_context_count; - /** - * 1 if the single thread fallback warning has already been - * displayed, 0 otherwise. - */ - int single_decode_warning; - /** @} */ /** @@ -642,6 +640,7 @@ typedef struct H264Context { AVBufferPool *mb_type_pool; AVBufferPool *motion_val_pool; AVBufferPool *ref_index_pool; + int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1 } H264Context; extern const uint16_t ff_h264_mb_sizes[4]; diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index 9e08c0b64e..240feb9d0f 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -498,6 +498,8 @@ static int h264_frame_start(H264Context *h) h->next_output_pic = NULL; + h->postpone_filter = 0; + assert(h->cur_pic_ptr->long_ref == 0); return 0; @@ -920,7 +922,7 @@ static int h264_slice_header_init(H264Context *h) * * @param h h264context * - * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded + * @return 0 if okay, <0 if an error occurred */ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl) { @@ -1481,17 +1483,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl) * Do not bother to deblock across slices. */ sl->deblocking_filter = 2; } else { - h->max_contexts = 1; - if (!h->single_decode_warning) { - av_log(h->avctx, AV_LOG_INFO, - "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n"); - h->single_decode_warning = 1; - } - if (sl != h->slice_ctx) { - av_log(h->avctx, AV_LOG_ERROR, - "Deblocking switched inside frame.\n"); - return 1; - } + h->postpone_filter = 1; } } sl->qp_thresh = 15 - @@ -1509,7 +1501,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl) for (j = 0; j < 2; j++) { int id_list[16]; - int *ref2frm = sl->ref2frm[sl->slice_num & (MAX_SLICES - 1)][j]; + int *ref2frm = h->ref2frm[sl->slice_num & (MAX_SLICES - 1)][j]; for (i = 0; i < 16; i++) { id_list[i] = 60; if (j < sl->list_count && i < sl->ref_count[j] && @@ -1597,7 +1589,7 @@ static av_always_inline void fill_filter_caches_inter(const H264Context *h, if (USES_LIST(top_type, list)) { const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride; const int b8_xy = 4 * top_xy + 2; - int (*ref2frm)[64] = sl->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2); + int (*ref2frm)[64] = h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2); AV_COPY128(mv_dst - 1 * 8, h->cur_pic.motion_val[list][b_xy + 0]); ref_cache[0 - 1 * 8] = ref_cache[1 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 0]]; @@ -1612,7 +1604,7 @@ static av_always_inline void fill_filter_caches_inter(const H264Context *h, if (USES_LIST(left_type[LTOP], list)) { const int b_xy = h->mb2b_xy[left_xy[LTOP]] + 3; const int b8_xy = 4 * left_xy[LTOP] + 1; - int (*ref2frm)[64] = sl->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2); + int (*ref2frm)[64] = h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2); AV_COPY32(mv_dst - 1 + 0, h->cur_pic.motion_val[list][b_xy + b_stride * 0]); AV_COPY32(mv_dst - 1 + 8, h->cur_pic.motion_val[list][b_xy + b_stride * 1]); AV_COPY32(mv_dst - 1 + 16, h->cur_pic.motion_val[list][b_xy + b_stride * 2]); @@ -1645,7 +1637,7 @@ static av_always_inline void fill_filter_caches_inter(const H264Context *h, { int8_t *ref = &h->cur_pic.ref_index[list][4 * mb_xy]; - int (*ref2frm)[64] = sl->ref2frm[sl->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2); + int (*ref2frm)[64] = h->ref2frm[sl->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2); uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101; uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101; AV_WN32A(&ref_cache[0 * 8], ref01); @@ -1820,6 +1812,9 @@ static void loop_filter(const H264Context *h, H264SliceContext *sl, int start_x, const int pixel_shift = h->pixel_shift; const int block_h = 16 >> h->chroma_y_shift; + if (h->postpone_filter) + return; + if (sl->deblocking_filter) { for (mb_x = start_x; mb_x < end_x; mb_x++) for (mb_y = end_mb_y - FRAME_MBAFF(h); mb_y <= end_mb_y; mb_y++) { @@ -1944,6 +1939,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) H264SliceContext *sl = arg; const H264Context *h = sl->h264; int lf_x_start = sl->mb_x; + int orig_deblock = sl->deblocking_filter; int ret; sl->linesize = h->cur_pic_ptr->f->linesize[0]; @@ -1955,6 +1951,9 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) sl->mb_skip_run = -1; + if (h->postpone_filter) + sl->deblocking_filter = 0; + sl->is_complex = FRAME_MBAFF(h) || h->picture_structure != PICT_FRAME || avctx->codec_id != AV_CODEC_ID_H264 || (CONFIG_GRAY && (h->flags & AV_CODEC_FLAG_GRAY)); @@ -2004,7 +2003,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) sl->mb_y, ER_MB_END); if (sl->mb_x >= lf_x_start) loop_filter(h, sl, lf_x_start, sl->mb_x + 1); - return 0; + goto finish; } if (ret < 0 || sl->cabac.bytestream > sl->cabac.bytestream_end + 2) { av_log(h->avctx, AV_LOG_ERROR, @@ -2035,7 +2034,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) sl->mb_y, ER_MB_END); if (sl->mb_x > lf_x_start) loop_filter(h, sl, lf_x_start, sl->mb_x); - return 0; + goto finish; } } } else { @@ -2089,7 +2088,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) er_add_slice(sl, sl->resync_mb_x, sl->resync_mb_y, sl->mb_x - 1, sl->mb_y, ER_MB_END); - return 0; + goto finish; } else { er_add_slice(sl, sl->resync_mb_x, sl->resync_mb_y, sl->mb_x - 1, sl->mb_y, ER_MB_END); @@ -2109,7 +2108,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) if (sl->mb_x > lf_x_start) loop_filter(h, sl, lf_x_start, sl->mb_x); - return 0; + goto finish; } else { er_add_slice(sl, sl->resync_mb_x, sl->resync_mb_y, sl->mb_x, sl->mb_y, ER_MB_ERROR); @@ -2119,6 +2118,10 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) } } } + +finish: + sl->deblocking_filter = orig_deblock; + return 0; } /** @@ -2139,6 +2142,7 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count) int ret; h->slice_ctx[0].next_slice_idx = h->mb_width * h->mb_height; + h->postpone_filter = 0; ret = decode_slice(avctx, &h->slice_ctx[0]); h->mb_y = h->slice_ctx[0].mb_y; @@ -2172,6 +2176,24 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count) h->mb_y = sl->mb_y; for (i = 1; i < context_count; i++) h->slice_ctx[0].er.error_count += h->slice_ctx[i].er.error_count; + + if (h->postpone_filter) { + h->postpone_filter = 0; + + for (i = 0; i < context_count; i++) { + int y_end, x_end; + + sl = &h->slice_ctx[i]; + y_end = FFMIN(sl->mb_y + 1, h->mb_height); + x_end = (sl->mb_y >= h->mb_height) ? h->mb_width : sl->mb_x; + + for (j = sl->resync_mb_y; j < y_end; j += 1 + FIELD_OR_MBAFF_PICTURE(h)) { + sl->mb_y = j; + loop_filter(h, sl, j > sl->resync_mb_y ? 0 : sl->resync_mb_x, + j == y_end - 1 ? x_end : h->mb_width); + } + } + } } return 0;