From 64f6570c6e2c5a0344383e89c7897809f0c6e1f1 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 1 Jul 2013 10:01:32 +0200 Subject: [PATCH 1/6] jpeg2000: Use EBCOT's CAUSAL and BYPASS mode in decode_cblk() Speed it up a bit. Signed-off-by: Luca Barbato --- libavcodec/jpeg2000dec.c | 65 ++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 6e7808304e..08c8c4df97 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -828,32 +828,33 @@ static int jpeg2000_decode_packets(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile /* TIER-1 routines */ static void decode_sigpass(Jpeg2000T1Context *t1, int width, int height, - int bpno, int bandno) + int bpno, int bandno, int bpass_csty_symbol, + int vert_causal_ctx_csty_symbol) { int mask = 3 << (bpno - 1), y0, x, y; for (y0 = 0; y0 < height; y0 += 4) for (x = 0; x < width; x++) - for (y = y0; y < height && y < y0 + 4; y++) - if ((t1->flags[y + 1][x + 1] & JPEG2000_T1_SIG_NB) - && !(t1->flags[y + 1][x + 1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))) { - if (ff_mqc_decode(&t1->mqc, - t1->mqc.cx_states + - ff_jpeg2000_getsigctxno(t1->flags[y + 1][x + 1], - bandno))) { - int xorbit, ctxno = ff_jpeg2000_getsgnctxno(t1->flags[y + 1][x + 1], - &xorbit); - - t1->data[y][x] = - (ff_mqc_decode(&t1->mqc, - t1->mqc.cx_states + ctxno) ^ xorbit) - ? -mask : mask; + for (y = y0; y < height && y < y0 + 4; y++) { + if ((t1->flags[y+1][x+1] & JPEG2000_T1_SIG_NB) + && !(t1->flags[y+1][x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))) { + int flags_mask = -1; + if (vert_causal_ctx_csty_symbol && y == y0 + 3) + flags_mask &= ~(JPEG2000_T1_SIG_S | JPEG2000_T1_SIG_SW | JPEG2000_T1_SIG_SE); + if (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_jpeg2000_getsigctxno(t1->flags[y+1][x+1] & flags_mask, bandno))) { + int xorbit, ctxno = ff_jpeg2000_getsgnctxno(t1->flags[y+1][x+1], &xorbit); + if (bpass_csty_symbol) + t1->data[y][x] = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ? -mask : mask; + else + t1->data[y][x] = (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) ? + -mask : mask; ff_jpeg2000_set_significance(t1, x, y, t1->data[y][x] < 0); } t1->flags[y + 1][x + 1] |= JPEG2000_T1_VIS; } + } } static void decode_refpass(Jpeg2000T1Context *t1, int width, int height, @@ -880,11 +881,11 @@ static void decode_refpass(Jpeg2000T1Context *t1, int width, int height, static void decode_clnpass(Jpeg2000DecoderContext *s, Jpeg2000T1Context *t1, int width, int height, int bpno, int bandno, - int seg_symbols) + int seg_symbols, int vert_causal_ctx_csty_symbol) { int mask = 3 << (bpno - 1), y0, x, y, runlen, dec; - for (y0 = 0; y0 < height; y0 += 4) + for (y0 = 0; y0 < height; y0 += 4) { for (x = 0; x < width; x++) { if (y0 + 3 < height && !((t1->flags[y0 + 1][x + 1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) || @@ -906,11 +907,13 @@ static void decode_clnpass(Jpeg2000DecoderContext *s, Jpeg2000T1Context *t1, for (y = y0 + runlen; y < y0 + 4 && y < height; y++) { if (!dec) { - if (!(t1->flags[y + 1][x + 1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))) - dec = ff_mqc_decode(&t1->mqc, - t1->mqc.cx_states + - ff_jpeg2000_getsigctxno(t1->flags[y + 1][x + 1], - bandno)); + if (!(t1->flags[y+1][x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))) { + int flags_mask = -1; + if (vert_causal_ctx_csty_symbol && y == y0 + 3) + flags_mask &= ~(JPEG2000_T1_SIG_S | JPEG2000_T1_SIG_SW | JPEG2000_T1_SIG_SE); + dec = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_jpeg2000_getsigctxno(t1->flags[y+1][x+1] & flags_mask, + bandno)); + } } if (dec) { int xorbit; @@ -926,6 +929,7 @@ static void decode_clnpass(Jpeg2000DecoderContext *s, Jpeg2000T1Context *t1, t1->flags[y + 1][x + 1] &= ~JPEG2000_T1_VIS; } } + } if (seg_symbols) { int val; val = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI); @@ -943,6 +947,9 @@ static int decode_cblk(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *codsty, int width, int height, int bandpos) { int passno = cblk->npasses, pass_t = 2, bpno = cblk->nonzerobits - 1, y; + int clnpass_cnt = 0; + int bpass_csty_symbol = codsty->cblk_style & JPEG2000_CBLK_BYPASS; + int vert_causal_ctx_csty_symbol = codsty->cblk_style & JPEG2000_CBLK_VSC; for (y = 0; y < height; y++) memset(t1->data[y], 0, width * sizeof(**t1->data)); @@ -960,14 +967,22 @@ static int decode_cblk(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *codsty, while (passno--) { switch (pass_t) { case 0: - decode_sigpass(t1, width, height, bpno + 1, bandpos); + decode_sigpass(t1, width, height, bpno + 1, bandpos, + bpass_csty_symbol && (clnpass_cnt >= 4), + vert_causal_ctx_csty_symbol); break; case 1: decode_refpass(t1, width, height, bpno + 1); + if (bpass_csty_symbol && clnpass_cnt >= 4) + ff_mqc_initdec(&t1->mqc, cblk->data); break; case 2: decode_clnpass(s, t1, width, height, bpno + 1, bandpos, - codsty->cblk_style & JPEG2000_CBLK_SEGSYM); + codsty->cblk_style & JPEG2000_CBLK_SEGSYM, + vert_causal_ctx_csty_symbol); + clnpass_cnt = clnpass_cnt + 1; + if (bpass_csty_symbol && clnpass_cnt >= 4) + ff_mqc_initdec(&t1->mqc, cblk->data); break; } @@ -1179,7 +1194,7 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, uint16_t *dst; x = tile->comp[compno].coord[0][0] - s->image_offset_x; dst = linel + (x * s->ncomponents + compno); - for (; x < s->avctx->width; x += s->cdx[compno]) { + for (; x < tile->comp[compno].coord[0][1] - s->image_offset_x; x += s-> cdx[compno]) { int val; /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */ if (tile->codsty->transform == FF_DWT97) From 53d5d89c1b7534205e5f825918701dda56d18896 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 1 Jul 2013 10:01:33 +0200 Subject: [PATCH 2/6] jpeg2000: Speed up jpeg2000_decode_tile() Skip processing bands with dimension set to 0. Signed-off-by: Luca Barbato --- libavcodec/jpeg2000dec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 08c8c4df97..6a300bd8f5 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -1118,6 +1118,10 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, int cblkno = 0, bandpos; bandpos = bandno + (reslevelno > 0); + if (band->coord[0][0] == band->coord[0][1] || + band->coord[1][0] == band->coord[1][1]) + continue; + nb_precincts = rlevel->num_precincts_x * rlevel->num_precincts_y; /* Loop on precincts */ for (precno = 0; precno < nb_precincts; precno++) { From c1dcbc590d90199b989095a722319fbf8851dce7 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 1 Jul 2013 10:01:34 +0200 Subject: [PATCH 3/6] jpeg2000: Do not assume a single tile In preparation of supporting multiple tiles. Signed-off-by: Luca Barbato --- libavcodec/jpeg2000dec.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 6a300bd8f5..d2974be432 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -1327,12 +1327,18 @@ static int jpeg2000_read_main_headers(Jpeg2000DecoderContext *s) static int jpeg2000_read_bitstream_packets(Jpeg2000DecoderContext *s) { int ret = 0; - Jpeg2000Tile *tile = s->tile + s->curtileno; + int tileno; - if (ret = init_tile(s, s->curtileno)) - return ret; - if (ret = jpeg2000_decode_packets(s, tile)) - return ret; + for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) { + Jpeg2000Tile *tile = s->tile + tileno; + + if (ret = init_tile(s, tileno)) + return ret; + + s->g = tile->tile_part[0].tpg; + if (ret = jpeg2000_decode_packets(s, tile)) + return ret; + } return 0; } From e11099db203c46ddeb9ac5707a824c8ae01ee8f4 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 1 Jul 2013 10:01:35 +0200 Subject: [PATCH 4/6] jpeg2000: Optimize dequantization Float: 4700 -> 2700 cycles Integer: 4400 -> 2800 cycles (sandybridge i7) Signed-off-by: Luca Barbato --- libavcodec/jpeg2000dec.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index d2974be432..589cbc12b1 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -1006,13 +1006,14 @@ static void dequantization_float(int x, int y, Jpeg2000Cblk *cblk, Jpeg2000Component *comp, Jpeg2000T1Context *t1, Jpeg2000Band *band) { - int i, j, idx; - float *datap = &comp->f_data[(comp->coord[0][1] - comp->coord[0][0]) * y + x]; - for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) - for (i = 0; i < (cblk->coord[0][1] - cblk->coord[0][0]); ++i) { - idx = (comp->coord[0][1] - comp->coord[0][0]) * j + i; - datap[idx] = (float)(t1->data[j][i]) * band->f_stepsize; - } + int i, j; + int w = cblk->coord[0][1] - cblk->coord[0][0]; + for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) { + float *datap = &comp->f_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x]; + int *src = t1->data[j]; + for (i = 0; i < w; ++i) + datap[i] = src[i] * band->f_stepsize; + } } /* Integer dequantization of a codeblock.*/ @@ -1020,14 +1021,14 @@ static void dequantization_int(int x, int y, Jpeg2000Cblk *cblk, Jpeg2000Component *comp, Jpeg2000T1Context *t1, Jpeg2000Band *band) { - int i, j, idx; - int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * y + x]; - for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) - for (i = 0; i < (cblk->coord[0][1] - cblk->coord[0][0]); ++i) { - idx = (comp->coord[0][1] - comp->coord[0][0]) * j + i; - datap[idx] = - ((int32_t)(t1->data[j][i]) * band->i_stepsize + (1 << 15)) >> 16; - } + int i, j; + int w = cblk->coord[0][1] - cblk->coord[0][0]; + for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) { + int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x]; + int *src = t1->data[j]; + for (i = 0; i < w; ++i) + datap[i] = (src[i] * band->i_stepsize + (1 << 15)) >> 16; + } } /* Inverse ICT parameters in float and integer. From f0552e63a669853ac7cd76f201356b8ea1608d6b Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 1 Jul 2013 10:01:36 +0200 Subject: [PATCH 5/6] jpeg2000: Reset s->numX/Ytiles on tile deallocation Keep the structure fields more consistent after cleanup. Signed-off-by: Luca Barbato --- libavcodec/jpeg2000dec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 589cbc12b1..db7a0c89ba 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -1233,6 +1233,7 @@ static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s) av_freep(&s->tile[tileno].comp); } av_freep(&s->tile); + s->numXtiles = s->numYtiles = 0; } static int jpeg2000_read_main_headers(Jpeg2000DecoderContext *s) From 22e18ea39e371030cc78973d1b46aae45a7ea215 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 1 Jul 2013 10:01:37 +0200 Subject: [PATCH 6/6] jpeg2000: Optimize output sample conversion 67935 -> 29984 kcycles Reviewed-by: Nicolas BERTRAND Signed-off-by: Luca Barbato --- libavcodec/jpeg2000dec.c | 69 +++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 25 deletions(-) diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index db7a0c89ba..89cae5f2cb 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -1162,6 +1162,9 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, Jpeg2000Component *comp = tile->comp + compno; float *datap = comp->f_data; int32_t *i_datap = comp->i_data; + int cbps = s->cbps[compno]; + int w = tile->comp[compno].coord[0][1] - s->image_offset_x; + y = tile->comp[compno].coord[1][0] - s->image_offset_y; line = picture->data[0] + y * picture->linesize[0]; for (; y < tile->comp[compno].coord[1][1] - s->image_offset_y; y += s->cdy[compno]) { @@ -1170,18 +1173,24 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, x = tile->comp[compno].coord[0][0] - s->image_offset_x; dst = line + x * s->ncomponents + compno; - for (; x < tile->comp[compno].coord[0][1] - s->image_offset_x; x += s->cdx[compno]) { - int val; - /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */ - if (tile->codsty->transform == FF_DWT97) - val = lrintf(*datap) + (1 << (s->cbps[compno] - 1)); - else - val = *i_datap + (1 << (s->cbps[compno] - 1)); - val = av_clip(val, 0, (1 << s->cbps[compno]) - 1); - *dst = val << (8 - s->cbps[compno]); - datap++; - i_datap++; - dst += s->ncomponents; + if (tile->codsty->transform == FF_DWT97) { + for (; x < w; x += s->cdx[compno]) { + int val = lrintf(*datap) + (1 << (cbps - 1)); + /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */ + val = av_clip(val, 0, (1 << cbps) - 1); + *dst = val << (8 - cbps); + datap++; + dst += s->ncomponents; + } + } else { + for (; x < w; x += s->cdx[compno]) { + int val = *i_datap + (1 << (cbps - 1)); + /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */ + val = av_clip(val, 0, (1 << cbps) - 1); + *dst = val << (8 - cbps); + i_datap++; + dst += s->ncomponents; + } } line += picture->linesize[0]; } @@ -1192,6 +1201,8 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, float *datap = comp->f_data; int32_t *i_datap = comp->i_data; uint16_t *linel; + int cbps = s->cbps[compno]; + int w = tile->comp[compno].coord[0][1] - s->image_offset_x; y = tile->comp[compno].coord[1][0] - s->image_offset_y; linel = (uint16_t *)picture->data[0] + y * (picture->linesize[0] >> 1); @@ -1199,24 +1210,32 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, uint16_t *dst; x = tile->comp[compno].coord[0][0] - s->image_offset_x; dst = linel + (x * s->ncomponents + compno); - for (; x < tile->comp[compno].coord[0][1] - s->image_offset_x; x += s-> cdx[compno]) { - int val; - /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */ - if (tile->codsty->transform == FF_DWT97) - val = lrintf(*datap) + (1 << (s->cbps[compno] - 1)); - else - val = *i_datap + (1 << (s->cbps[compno] - 1)); - val = av_clip(val, 0, (1 << s->cbps[compno]) - 1); - /* align 12 bit values in little-endian mode */ - *dst = val << (16 - s->cbps[compno]); - datap++; - i_datap++; - dst += s->ncomponents; + if (tile->codsty->transform == FF_DWT97) { + for (; x < w; x += s-> cdx[compno]) { + int val = lrintf(*datap) + (1 << (cbps - 1)); + /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */ + val = av_clip(val, 0, (1 << cbps) - 1); + /* align 12 bit values in little-endian mode */ + *dst = val << (16 - cbps); + datap++; + dst += s->ncomponents; + } + } else { + for (; x < w; x += s-> cdx[compno]) { + int val = *i_datap + (1 << (cbps - 1)); + /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */ + val = av_clip(val, 0, (1 << cbps) - 1); + /* align 12 bit values in little-endian mode */ + *dst = val << (16 - cbps); + i_datap++; + dst += s->ncomponents; + } } linel += picture->linesize[0] >> 1; } } } + return 0; }