From 51ca70322296f59666dda6c59bfa35ff9bf17dd5 Mon Sep 17 00:00:00 2001 From: Donny Yang Date: Wed, 19 Aug 2015 06:41:23 +0000 Subject: [PATCH] apng: Support inter-frame compression The current algorithm is just "try all the combinations, and pick the best". It's not very fast either, probably due to a lot of copying, but will do for an initial implementation. Signed-off-by: Donny Yang Signed-off-by: Michael Niedermayer --- libavcodec/pngenc.c | 422 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 385 insertions(+), 37 deletions(-) diff --git a/libavcodec/pngenc.c b/libavcodec/pngenc.c index af8ca4e7a3..f6ad830cd9 100644 --- a/libavcodec/pngenc.c +++ b/libavcodec/pngenc.c @@ -36,6 +36,14 @@ #define IOBUF_SIZE 4096 +typedef struct APNGFctlChunk { + uint32_t sequence_number; + uint32_t width, height; + uint32_t x_offset, y_offset; + uint16_t delay_num, delay_den; + uint8_t dispose_op, blend_op; +} APNGFctlChunk; + typedef struct PNGEncContext { AVClass *class; HuffYUVEncDSPContext hdsp; @@ -59,6 +67,12 @@ typedef struct PNGEncContext { // APNG uint32_t palette_checksum; // Used to ensure a single unique palette uint32_t sequence_number; + + AVFrame *prev_frame; + AVFrame *last_frame; + APNGFctlChunk last_frame_fctl; + uint8_t *last_frame_packet; + size_t last_frame_packet_size; } PNGEncContext; static void png_get_interlaced_row(uint8_t *dst, int row_size, @@ -403,7 +417,7 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict) uint8_t *progressive_buf = NULL; uint8_t *top_buf = NULL; - row_size = (avctx->width * s->bits_per_pixel + 7) >> 3; + row_size = (pict->width * s->bits_per_pixel + 7) >> 3; crow_base = av_malloc((row_size + 32) << (s->filter_type == PNG_FILTER_VALUE_MIXED)); if (!crow_base) { @@ -430,16 +444,16 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict) for (pass = 0; pass < NB_PASSES; pass++) { /* NOTE: a pass is completely omitted if no pixels would be * output */ - pass_row_size = ff_png_pass_row_size(pass, s->bits_per_pixel, avctx->width); + pass_row_size = ff_png_pass_row_size(pass, s->bits_per_pixel, pict->width); if (pass_row_size > 0) { top = NULL; - for (y = 0; y < avctx->height; y++) + for (y = 0; y < pict->height; y++) if ((ff_png_pass_ymask[pass] << (y & 7)) & 0x80) { ptr = p->data[0] + y * p->linesize[0]; FFSWAP(uint8_t *, progressive_buf, top_buf); png_get_interlaced_row(progressive_buf, pass_row_size, s->bits_per_pixel, pass, - ptr, avctx->width); + ptr, pict->width); crow = png_choose_filter(s, crow_buf, progressive_buf, top, pass_row_size, s->bits_per_pixel >> 3); png_write_row(avctx, crow, pass_row_size + 1); @@ -449,7 +463,7 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict) } } else { top = NULL; - for (y = 0; y < avctx->height; y++) { + for (y = 0; y < pict->height; y++) { ptr = p->data[0] + y * p->linesize[0]; crow = png_choose_filter(s, crow_buf, ptr, top, row_size, s->bits_per_pixel >> 3); @@ -530,6 +544,272 @@ static int encode_png(AVCodecContext *avctx, AVPacket *pkt, return 0; } +static int apng_do_inverse_blend(AVFrame *output, const AVFrame *input, + APNGFctlChunk *fctl_chunk, uint8_t bpp) +{ + // output: background, input: foreground + // output the image such that when blended with the background, will produce the foreground + + unsigned int x, y; + unsigned int leftmost_x = input->width; + unsigned int rightmost_x = 0; + unsigned int topmost_y = input->height; + unsigned int bottommost_y = 0; + const uint8_t *input_data = input->data[0]; + uint8_t *output_data = output->data[0]; + ptrdiff_t input_linesize = input->linesize[0]; + ptrdiff_t output_linesize = output->linesize[0]; + + // Find bounding box of changes + for (y = 0; y < input->height; ++y) { + for (x = 0; x < input->width; ++x) { + if (!memcmp(input_data + bpp * x, output_data + bpp * x, bpp)) + continue; + + if (x < leftmost_x) + leftmost_x = x; + if (x >= rightmost_x) + rightmost_x = x + 1; + if (y < topmost_y) + topmost_y = y; + if (y >= bottommost_y) + bottommost_y = y + 1; + } + + input_data += input_linesize; + output_data += output_linesize; + } + + if (leftmost_x == input->width && rightmost_x == 0) { + // Empty frame + // APNG does not support empty frames, so we make it a 1x1 frame + leftmost_x = topmost_y = 0; + rightmost_x = bottommost_y = 1; + } + + // Do actual inverse blending + if (fctl_chunk->blend_op == APNG_BLEND_OP_SOURCE) { + output_data = output->data[0]; + for (y = topmost_y; y < bottommost_y; ++y) { + memcpy(output_data, + input->data[0] + input_linesize * y + bpp * leftmost_x, + bpp * (rightmost_x - leftmost_x)); + output_data += output_linesize; + } + } else { // APNG_BLEND_OP_OVER + size_t transparent_palette_index; + uint32_t *palette; + + switch (input->format) { + case AV_PIX_FMT_RGBA64BE: + case AV_PIX_FMT_YA16BE: + case AV_PIX_FMT_RGBA: + case AV_PIX_FMT_GRAY8A: + break; + + case AV_PIX_FMT_PAL8: + palette = (uint32_t*)input->data[1]; + for (transparent_palette_index = 0; transparent_palette_index < 256; ++transparent_palette_index) + if (palette[transparent_palette_index] >> 24 == 0) + break; + break; + + default: + // No alpha, so blending not possible + return -1; + } + + for (y = topmost_y; y < bottommost_y; ++y) { + uint8_t *foreground = input->data[0] + input_linesize * y + bpp * leftmost_x; + uint8_t *background = output->data[0] + output_linesize * y + bpp * leftmost_x; + output_data = output->data[0] + output_linesize * (y - topmost_y); + for (x = leftmost_x; x < rightmost_x; ++x, foreground += bpp, background += bpp, output_data += bpp) { + if (!memcmp(foreground, background, bpp)) { + if (input->format == AV_PIX_FMT_PAL8) { + if (transparent_palette_index == 256) { + // Need fully transparent colour, but none exists + return -1; + } + + *output_data = transparent_palette_index; + } else { + memset(output_data, 0, bpp); + } + continue; + } + + // Check for special alpha values, since full inverse + // alpha-on-alpha blending is rarely possible, and when + // possible, doesn't compress much better than + // APNG_BLEND_OP_SOURCE blending + switch (input->format) { + case AV_PIX_FMT_RGBA64BE: + if (((uint16_t*)foreground)[3] == 0xffff || + ((uint16_t*)background)[3] == 0) + break; + return -1; + + case AV_PIX_FMT_YA16BE: + if (((uint16_t*)foreground)[1] == 0xffff || + ((uint16_t*)background)[1] == 0) + break; + return -1; + + case AV_PIX_FMT_RGBA: + if (foreground[3] == 0xff || background[3] == 0) + break; + return -1; + + case AV_PIX_FMT_GRAY8A: + if (foreground[1] == 0xff || background[1] == 0) + break; + return -1; + + case AV_PIX_FMT_PAL8: + if (palette[*foreground] >> 24 == 0xff || + palette[*background] >> 24 == 0) + break; + return -1; + } + + memmove(output_data, foreground, bpp); + } + } + } + + output->width = rightmost_x - leftmost_x; + output->height = bottommost_y - topmost_y; + fctl_chunk->width = output->width; + fctl_chunk->height = output->height; + fctl_chunk->x_offset = leftmost_x; + fctl_chunk->y_offset = topmost_y; + + return 0; +} + +static int apng_encode_frame(AVCodecContext *avctx, const AVFrame *pict, + APNGFctlChunk *best_fctl_chunk, APNGFctlChunk *best_last_fctl_chunk) +{ + PNGEncContext *s = avctx->priv_data; + int ret; + unsigned int y; + AVFrame* diffFrame; + uint8_t bpp = (s->bits_per_pixel + 7) >> 3; + uint8_t *original_bytestream, *original_bytestream_end; + uint8_t *temp_bytestream = 0, *temp_bytestream_end; + uint32_t best_sequence_number; + uint8_t *best_bytestream; + size_t best_bytestream_size = SIZE_MAX; + APNGFctlChunk last_fctl_chunk = *best_last_fctl_chunk; + APNGFctlChunk fctl_chunk = *best_fctl_chunk; + + if (avctx->frame_number == 0) { + best_fctl_chunk->width = pict->width; + best_fctl_chunk->height = pict->height; + best_fctl_chunk->x_offset = 0; + best_fctl_chunk->y_offset = 0; + best_fctl_chunk->blend_op = APNG_BLEND_OP_SOURCE; + return encode_frame(avctx, pict); + } + + diffFrame = av_frame_alloc(); + if (!diffFrame) + return AVERROR(ENOMEM); + + diffFrame->format = pict->format; + diffFrame->width = pict->width; + diffFrame->height = pict->height; + if ((ret = av_frame_get_buffer(diffFrame, 32)) < 0) + goto fail; + + original_bytestream = s->bytestream; + original_bytestream_end = s->bytestream_end; + + temp_bytestream = av_malloc(original_bytestream_end - original_bytestream); + temp_bytestream_end = temp_bytestream + (original_bytestream_end - original_bytestream); + if (!temp_bytestream) { + ret = AVERROR(ENOMEM); + goto fail; + } + + for (last_fctl_chunk.dispose_op = 0; last_fctl_chunk.dispose_op < 3; ++last_fctl_chunk.dispose_op) { + // 0: APNG_DISPOSE_OP_NONE + // 1: APNG_DISPOSE_OP_BACKGROUND + // 2: APNG_DISPOSE_OP_PREVIOUS + + for (fctl_chunk.blend_op = 0; fctl_chunk.blend_op < 2; ++fctl_chunk.blend_op) { + // 0: APNG_BLEND_OP_SOURCE + // 1: APNG_BLEND_OP_OVER + + uint32_t original_sequence_number = s->sequence_number, sequence_number; + uint8_t *bytestream_start = s->bytestream; + size_t bytestream_size; + + // Do disposal + if (last_fctl_chunk.dispose_op != APNG_DISPOSE_OP_PREVIOUS) { + memcpy(diffFrame->data[0], s->last_frame->data[0], + s->last_frame->linesize[0] * s->last_frame->height); + + if (last_fctl_chunk.dispose_op == APNG_DISPOSE_OP_BACKGROUND) { + for (y = last_fctl_chunk.y_offset; y < last_fctl_chunk.y_offset + last_fctl_chunk.height; ++y) { + size_t row_start = s->last_frame->linesize[0] * y + bpp * last_fctl_chunk.x_offset; + memset(diffFrame->data[0] + row_start, 0, bpp * last_fctl_chunk.width); + } + } + } else { + if (!s->prev_frame) + continue; + + memcpy(diffFrame->data[0], s->prev_frame->data[0], + s->prev_frame->linesize[0] * s->prev_frame->height); + } + + // Do inverse blending + if (apng_do_inverse_blend(diffFrame, pict, &fctl_chunk, bpp) < 0) + continue; + + // Do encoding + ret = encode_frame(avctx, diffFrame); + sequence_number = s->sequence_number; + s->sequence_number = original_sequence_number; + bytestream_size = s->bytestream - bytestream_start; + s->bytestream = bytestream_start; + if (ret < 0) + goto fail; + + if (bytestream_size < best_bytestream_size) { + *best_fctl_chunk = fctl_chunk; + *best_last_fctl_chunk = last_fctl_chunk; + + best_sequence_number = sequence_number; + best_bytestream = s->bytestream; + best_bytestream_size = bytestream_size; + + if (best_bytestream == original_bytestream) { + s->bytestream = temp_bytestream; + s->bytestream_end = temp_bytestream_end; + } else { + s->bytestream = original_bytestream; + s->bytestream_end = original_bytestream_end; + } + } + } + } + + s->sequence_number = best_sequence_number; + s->bytestream = original_bytestream + best_bytestream_size; + s->bytestream_end = original_bytestream_end; + if (best_bytestream != original_bytestream) + memcpy(original_bytestream, best_bytestream, best_bytestream_size); + + ret = 0; + +fail: + av_freep(&temp_bytestream); + av_frame_free(&diffFrame); + return ret; +} + static int encode_apng(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *pict, int *got_packet) { @@ -537,9 +817,9 @@ static int encode_apng(AVCodecContext *avctx, AVPacket *pkt, int ret; int enc_row_size; size_t max_packet_size; - uint8_t buf[26]; + APNGFctlChunk fctl_chunk; - if (avctx->codec_id == AV_CODEC_ID_APNG && s->color_type == PNG_COLOR_TYPE_PALETTE) { + if (pict && avctx->codec_id == AV_CODEC_ID_APNG && s->color_type == PNG_COLOR_TYPE_PALETTE) { uint32_t checksum = ~av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), ~0U, pict->data[1], 256 * sizeof(uint32_t)); if (avctx->frame_number == 0) { @@ -560,47 +840,111 @@ static int encode_apng(AVCodecContext *avctx, AVPacket *pkt, ); if (max_packet_size > INT_MAX) return AVERROR(ENOMEM); - ret = ff_alloc_packet2(avctx, pkt, max_packet_size, 0); - if (ret < 0) - return ret; - - s->bytestream_start = - s->bytestream = pkt->data; - s->bytestream_end = pkt->data + pkt->size; if (avctx->frame_number == 0) { + s->bytestream = avctx->extradata = av_malloc(FF_MIN_BUFFER_SIZE); + if (!avctx->extradata) + return AVERROR(ENOMEM); + ret = encode_headers(avctx, pict); if (ret < 0) return ret; - avctx->extradata = av_malloc(s->bytestream - s->bytestream_start); - if (!avctx->extradata) - return AVERROR(ENOMEM); - avctx->extradata_size = s->bytestream - s->bytestream_start; - memcpy(avctx->extradata, s->bytestream_start, s->bytestream - s->bytestream_start); + avctx->extradata_size = s->bytestream - avctx->extradata; - s->bytestream = s->bytestream_start; + s->last_frame_packet = av_malloc(max_packet_size); + if (!s->last_frame_packet) + return AVERROR(ENOMEM); + } else if (s->last_frame) { + ret = ff_alloc_packet2(avctx, pkt, max_packet_size, 0); + if (ret < 0) + return ret; + + memcpy(pkt->data, s->last_frame_packet, s->last_frame_packet_size); + pkt->size = s->last_frame_packet_size; + pkt->pts = pkt->dts = s->last_frame->pts; } - AV_WB32(buf, s->sequence_number); - AV_WB32(buf + 4, avctx->width); - AV_WB32(buf + 8, avctx->height); - AV_WB32(buf + 12, 0); // x offset - AV_WB32(buf + 16, 0); // y offset - AV_WB16(buf + 20, 0); // delay numerator (filled in during muxing) - AV_WB16(buf + 22, 0); // delay denominator - buf[24] = APNG_DISPOSE_OP_BACKGROUND; - buf[25] = APNG_BLEND_OP_SOURCE; - png_write_chunk(&s->bytestream, MKTAG('f', 'c', 'T', 'L'), buf, 26); - ++s->sequence_number; + if (pict) { + s->bytestream_start = + s->bytestream = s->last_frame_packet; + s->bytestream_end = s->bytestream + max_packet_size; - ret = encode_frame(avctx, pict); - if (ret < 0) - return ret; + // We're encoding the frame first, so we have to do a bit of shuffling around + // to have the image data write to the correct place in the buffer + fctl_chunk.sequence_number = s->sequence_number; + ++s->sequence_number; + s->bytestream += 26 + 12; - pkt->size = s->bytestream - s->bytestream_start; - pkt->flags |= AV_PKT_FLAG_KEY; - *got_packet = 1; + ret = apng_encode_frame(avctx, pict, &fctl_chunk, &s->last_frame_fctl); + if (ret < 0) + return ret; + + fctl_chunk.delay_num = 0; // delay filled in during muxing + fctl_chunk.delay_den = 0; + } else { + s->last_frame_fctl.dispose_op = APNG_DISPOSE_OP_NONE; + } + + if (s->last_frame) { + uint8_t* last_fctl_chunk_start = pkt->data; + uint8_t buf[26]; + + AV_WB32(buf + 0, s->last_frame_fctl.sequence_number); + AV_WB32(buf + 4, s->last_frame_fctl.width); + AV_WB32(buf + 8, s->last_frame_fctl.height); + AV_WB32(buf + 12, s->last_frame_fctl.x_offset); + AV_WB32(buf + 16, s->last_frame_fctl.y_offset); + AV_WB16(buf + 20, s->last_frame_fctl.delay_num); + AV_WB16(buf + 22, s->last_frame_fctl.delay_den); + buf[24] = s->last_frame_fctl.dispose_op; + buf[25] = s->last_frame_fctl.blend_op; + png_write_chunk(&last_fctl_chunk_start, MKTAG('f', 'c', 'T', 'L'), buf, 26); + + *got_packet = 1; + } + + if (pict) { + if (!s->last_frame) { + s->last_frame = av_frame_alloc(); + if (!s->last_frame) + return AVERROR(ENOMEM); + } else if (s->last_frame_fctl.dispose_op != APNG_DISPOSE_OP_PREVIOUS) { + if (!s->prev_frame) { + s->prev_frame = av_frame_alloc(); + if (!s->prev_frame) + return AVERROR(ENOMEM); + + s->prev_frame->format = pict->format; + s->prev_frame->width = pict->width; + s->prev_frame->height = pict->height; + if ((ret = av_frame_get_buffer(s->prev_frame, 32)) < 0) + return ret; + } + + // Do disposal, but not blending + memcpy(s->prev_frame->data[0], s->last_frame->data[0], + s->last_frame->linesize[0] * s->last_frame->height); + if (s->last_frame_fctl.dispose_op == APNG_DISPOSE_OP_BACKGROUND) { + uint32_t y; + uint8_t bpp = (s->bits_per_pixel + 7) >> 3; + for (y = s->last_frame_fctl.y_offset; y < s->last_frame_fctl.y_offset + s->last_frame_fctl.height; ++y) { + size_t row_start = s->last_frame->linesize[0] * y + bpp * s->last_frame_fctl.x_offset; + memset(s->prev_frame->data[0] + row_start, 0, bpp * s->last_frame_fctl.width); + } + } + } + + av_frame_unref(s->last_frame); + ret = av_frame_ref(s->last_frame, (AVFrame*)pict); + if (ret < 0) + return ret; + + s->last_frame_fctl = fctl_chunk; + s->last_frame_packet_size = s->bytestream - s->bytestream_start; + } else { + av_frame_free(&s->last_frame); + } return 0; } @@ -713,6 +1057,9 @@ static av_cold int png_enc_close(AVCodecContext *avctx) PNGEncContext *s = avctx->priv_data; deflateEnd(&s->zstream); + av_frame_free(&s->last_frame); + av_frame_free(&s->prev_frame); + av_freep(&s->last_frame_packet); return 0; } @@ -768,6 +1115,7 @@ AVCodec ff_apng_encoder = { .init = png_enc_init, .close = png_enc_close, .encode2 = encode_apng, + .capabilities = CODEC_CAP_DELAY, .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA, AV_PIX_FMT_RGB48BE, AV_PIX_FMT_RGBA64BE,