From cb79881b49d81dd809dc59c94e002c1de1434cd1 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 11 May 2013 21:40:28 +0200 Subject: [PATCH 1/2] vf_yadif: support slice threading --- libavfilter/vf_yadif.c | 85 ++++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 29 deletions(-) diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c index 2b89048cde..56da9e0e26 100644 --- a/libavfilter/vf_yadif.c +++ b/libavfilter/vf_yadif.c @@ -32,6 +32,14 @@ #undef NDEBUG #include +typedef struct ThreadData { + AVFrame *frame; + int plane; + int w, h; + int parity; + int tff; +} ThreadData; + #define CHECK(j)\ { int score = FFABS(cur[mrefs - 1 + (j)] - cur[prefs - 1 - (j)])\ + FFABS(cur[mrefs +(j)] - cur[prefs -(j)])\ @@ -173,50 +181,67 @@ static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1, FILTER(w - 3, w, 0) } +static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + YADIFContext *s = ctx->priv; + ThreadData *td = arg; + int refs = s->cur->linesize[td->plane]; + int df = (s->csp->comp[td->plane].depth_minus1 + 8) / 8; + int pix_3 = 3 * df; + int slice_h = td->h / nb_jobs; + int slice_start = jobnr * slice_h; + int slice_end = (jobnr == nb_jobs - 1) ? td->h : (jobnr + 1) * slice_h; + int y; + + /* filtering reads 3 pixels to the left/right; to avoid invalid reads, + * we need to call the c variant which avoids this for border pixels + */ + for (y = slice_start; y < slice_end; y++) { + if ((y ^ td->parity) & 1) { + uint8_t *prev = &s->prev->data[td->plane][y * refs]; + uint8_t *cur = &s->cur ->data[td->plane][y * refs]; + uint8_t *next = &s->next->data[td->plane][y * refs]; + uint8_t *dst = &td->frame->data[td->plane][y * td->frame->linesize[td->plane]]; + int mode = y == 1 || y + 2 == td->h ? 2 : s->mode; + s->filter_line(dst + pix_3, prev + pix_3, cur + pix_3, + next + pix_3, td->w - 6, + y + 1 < td->h ? refs : -refs, + y ? -refs : refs, + td->parity ^ td->tff, mode); + s->filter_edges(dst, prev, cur, next, td->w, + y + 1 < td->h ? refs : -refs, + y ? -refs : refs, + td->parity ^ td->tff, mode); + } else { + memcpy(&td->frame->data[td->plane][y * td->frame->linesize[td->plane]], + &s->cur->data[td->plane][y * refs], td->w * df); + } + } + return 0; +} + static void filter(AVFilterContext *ctx, AVFrame *dstpic, int parity, int tff) { YADIFContext *yadif = ctx->priv; - int y, i; + ThreadData td = { .frame = dstpic, .parity = parity, .tff = tff }; + int i; for (i = 0; i < yadif->csp->nb_components; i++) { int w = dstpic->width; int h = dstpic->height; - int refs = yadif->cur->linesize[i]; - int df = (yadif->csp->comp[i].depth_minus1 + 8) / 8; - int pix_3 = 3 * df; if (i == 1 || i == 2) { - /* Why is this not part of the per-plane description thing? */ w >>= yadif->csp->log2_chroma_w; h >>= yadif->csp->log2_chroma_h; } - /* filtering reads 3 pixels to the left/right; to avoid invalid reads, - * we need to call the c variant which avoids this for border pixels - */ - for (y = 0; y < h; y++) { - if ((y ^ parity) & 1) { - uint8_t *prev = &yadif->prev->data[i][y * refs]; - uint8_t *cur = &yadif->cur ->data[i][y * refs]; - uint8_t *next = &yadif->next->data[i][y * refs]; - uint8_t *dst = &dstpic->data[i][y * dstpic->linesize[i]]; - int mode = y == 1 || y + 2 == h ? 2 : yadif->mode; - yadif->filter_line(dst + pix_3, prev + pix_3, cur + pix_3, - next + pix_3, w - 6, - y + 1 < h ? refs : -refs, - y ? -refs : refs, - parity ^ tff, mode); - yadif->filter_edges(dst, prev, cur, next, w, - y + 1 < h ? refs : -refs, - y ? -refs : refs, - parity ^ tff, mode); - } else { - memcpy(&dstpic->data[i][y * dstpic->linesize[i]], - &yadif->cur->data[i][y * refs], w * df); - } - } + td.w = w; + td.h = h; + td.plane = i; + + ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(h, ctx->graph->nb_threads)); } emms_c(); @@ -504,4 +529,6 @@ AVFilter avfilter_vf_yadif = { .inputs = avfilter_vf_yadif_inputs, .outputs = avfilter_vf_yadif_outputs, + + .flags = AVFILTER_FLAG_SLICE_THREADS, }; From 8a994b7406b3ab24a6444037da9d1659f3125a01 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 11 May 2013 21:40:28 +0200 Subject: [PATCH 2/2] vf_fade: support slice threading --- libavfilter/vf_fade.c | 80 ++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/libavfilter/vf_fade.c b/libavfilter/vf_fade.c index 9f3a60206a..3704e8bf34 100644 --- a/libavfilter/vf_fade.c +++ b/libavfilter/vf_fade.c @@ -92,39 +92,70 @@ static int config_props(AVFilterLink *inlink) return 0; } -static int filter_frame(AVFilterLink *inlink, AVFrame *frame) +static int filter_slice_luma(AVFilterContext *ctx, void *arg, int jobnr, + int nb_jobs) { - FadeContext *s = inlink->dst->priv; - uint8_t *p; + FadeContext *s = ctx->priv; + AVFrame *frame = arg; + int slice_h = frame->height / nb_jobs; + int slice_start = jobnr * slice_h; + int slice_end = (jobnr == nb_jobs - 1) ? frame->height : (jobnr + 1) * slice_h; + int i, j; + + for (i = slice_start; i < slice_end; i++) { + uint8_t *p = frame->data[0] + i * frame->linesize[0]; + for (j = 0; j < frame->width * s->bpp; j++) { + /* s->factor is using 16 lower-order bits for decimal + * places. 32768 = 1 << 15, it is an integer representation + * of 0.5 and is for rounding. */ + *p = (*p * s->factor + 32768) >> 16; + p++; + } + } + + return 0; +} + +static int filter_slice_chroma(AVFilterContext *ctx, void *arg, int jobnr, + int nb_jobs) +{ + FadeContext *s = ctx->priv; + AVFrame *frame = arg; + int slice_h = FFALIGN(frame->height / nb_jobs, 1 << s->vsub); + int slice_start = jobnr * slice_h; + int slice_end = (jobnr == nb_jobs - 1) ? frame->height : (jobnr + 1) * slice_h; int i, j, plane; - if (s->factor < UINT16_MAX) { - /* luma or rgb plane */ - for (i = 0; i < frame->height; i++) { - p = frame->data[0] + i * frame->linesize[0]; - for (j = 0; j < inlink->w * s->bpp; j++) { - /* s->factor is using 16 lower-order bits for decimal - * places. 32768 = 1 << 15, it is an integer representation - * of 0.5 and is for rounding. */ - *p = (*p * s->factor + 32768) >> 16; + for (plane = 1; plane < 3; plane++) { + for (i = slice_start; i < slice_end; i++) { + uint8_t *p = frame->data[plane] + (i >> s->vsub) * frame->linesize[plane]; + for (j = 0; j < frame->width >> s->hsub; j++) { + /* 8421367 = ((128 << 1) + 1) << 15. It is an integer + * representation of 128.5. The .5 is for rounding + * purposes. */ + *p = ((*p - 128) * s->factor + 8421367) >> 16; p++; } } + } + + return 0; +} + +static int filter_frame(AVFilterLink *inlink, AVFrame *frame) +{ + AVFilterContext *ctx = inlink->dst; + FadeContext *s = ctx->priv; + + if (s->factor < UINT16_MAX) { + /* luma or rgb plane */ + ctx->internal->execute(ctx, filter_slice_luma, frame, NULL, + FFMIN(frame->height, ctx->graph->nb_threads)); if (frame->data[1] && frame->data[2]) { /* chroma planes */ - for (plane = 1; plane < 3; plane++) { - for (i = 0; i < frame->height; i++) { - p = frame->data[plane] + (i >> s->vsub) * frame->linesize[plane]; - for (j = 0; j < inlink->w >> s->hsub; j++) { - /* 8421367 = ((128 << 1) + 1) << 15. It is an integer - * representation of 128.5. The .5 is for rounding - * purposes. */ - *p = ((*p - 128) * s->factor + 8421367) >> 16; - p++; - } - } - } + ctx->internal->execute(ctx, filter_slice_chroma, frame, NULL, + FFMIN(frame->height, ctx->graph->nb_threads)); } } @@ -187,4 +218,5 @@ AVFilter avfilter_vf_fade = { .inputs = avfilter_vf_fade_inputs, .outputs = avfilter_vf_fade_outputs, + .flags = AVFILTER_FLAG_SLICE_THREADS, };