apng: Support inter-frame compression

The current algorithm is just "try all the combinations, and pick the best".
It's not very fast either, probably due to a lot of copying, but will do for
an initial implementation.

Signed-off-by: Donny Yang <work@kota.moe>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
Donny Yang 2015-08-19 06:41:23 +00:00 committed by Michael Niedermayer
parent 3322f0d415
commit 51ca703222

View File

@ -36,6 +36,14 @@
#define IOBUF_SIZE 4096
typedef struct APNGFctlChunk {
uint32_t sequence_number;
uint32_t width, height;
uint32_t x_offset, y_offset;
uint16_t delay_num, delay_den;
uint8_t dispose_op, blend_op;
} APNGFctlChunk;
typedef struct PNGEncContext {
AVClass *class;
HuffYUVEncDSPContext hdsp;
@ -59,6 +67,12 @@ typedef struct PNGEncContext {
// APNG
uint32_t palette_checksum; // Used to ensure a single unique palette
uint32_t sequence_number;
AVFrame *prev_frame;
AVFrame *last_frame;
APNGFctlChunk last_frame_fctl;
uint8_t *last_frame_packet;
size_t last_frame_packet_size;
} PNGEncContext;
static void png_get_interlaced_row(uint8_t *dst, int row_size,
@ -403,7 +417,7 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict)
uint8_t *progressive_buf = NULL;
uint8_t *top_buf = NULL;
row_size = (avctx->width * s->bits_per_pixel + 7) >> 3;
row_size = (pict->width * s->bits_per_pixel + 7) >> 3;
crow_base = av_malloc((row_size + 32) << (s->filter_type == PNG_FILTER_VALUE_MIXED));
if (!crow_base) {
@ -430,16 +444,16 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict)
for (pass = 0; pass < NB_PASSES; pass++) {
/* NOTE: a pass is completely omitted if no pixels would be
* output */
pass_row_size = ff_png_pass_row_size(pass, s->bits_per_pixel, avctx->width);
pass_row_size = ff_png_pass_row_size(pass, s->bits_per_pixel, pict->width);
if (pass_row_size > 0) {
top = NULL;
for (y = 0; y < avctx->height; y++)
for (y = 0; y < pict->height; y++)
if ((ff_png_pass_ymask[pass] << (y & 7)) & 0x80) {
ptr = p->data[0] + y * p->linesize[0];
FFSWAP(uint8_t *, progressive_buf, top_buf);
png_get_interlaced_row(progressive_buf, pass_row_size,
s->bits_per_pixel, pass,
ptr, avctx->width);
ptr, pict->width);
crow = png_choose_filter(s, crow_buf, progressive_buf,
top, pass_row_size, s->bits_per_pixel >> 3);
png_write_row(avctx, crow, pass_row_size + 1);
@ -449,7 +463,7 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict)
}
} else {
top = NULL;
for (y = 0; y < avctx->height; y++) {
for (y = 0; y < pict->height; y++) {
ptr = p->data[0] + y * p->linesize[0];
crow = png_choose_filter(s, crow_buf, ptr, top,
row_size, s->bits_per_pixel >> 3);
@ -530,6 +544,272 @@ static int encode_png(AVCodecContext *avctx, AVPacket *pkt,
return 0;
}
static int apng_do_inverse_blend(AVFrame *output, const AVFrame *input,
APNGFctlChunk *fctl_chunk, uint8_t bpp)
{
// output: background, input: foreground
// output the image such that when blended with the background, will produce the foreground
unsigned int x, y;
unsigned int leftmost_x = input->width;
unsigned int rightmost_x = 0;
unsigned int topmost_y = input->height;
unsigned int bottommost_y = 0;
const uint8_t *input_data = input->data[0];
uint8_t *output_data = output->data[0];
ptrdiff_t input_linesize = input->linesize[0];
ptrdiff_t output_linesize = output->linesize[0];
// Find bounding box of changes
for (y = 0; y < input->height; ++y) {
for (x = 0; x < input->width; ++x) {
if (!memcmp(input_data + bpp * x, output_data + bpp * x, bpp))
continue;
if (x < leftmost_x)
leftmost_x = x;
if (x >= rightmost_x)
rightmost_x = x + 1;
if (y < topmost_y)
topmost_y = y;
if (y >= bottommost_y)
bottommost_y = y + 1;
}
input_data += input_linesize;
output_data += output_linesize;
}
if (leftmost_x == input->width && rightmost_x == 0) {
// Empty frame
// APNG does not support empty frames, so we make it a 1x1 frame
leftmost_x = topmost_y = 0;
rightmost_x = bottommost_y = 1;
}
// Do actual inverse blending
if (fctl_chunk->blend_op == APNG_BLEND_OP_SOURCE) {
output_data = output->data[0];
for (y = topmost_y; y < bottommost_y; ++y) {
memcpy(output_data,
input->data[0] + input_linesize * y + bpp * leftmost_x,
bpp * (rightmost_x - leftmost_x));
output_data += output_linesize;
}
} else { // APNG_BLEND_OP_OVER
size_t transparent_palette_index;
uint32_t *palette;
switch (input->format) {
case AV_PIX_FMT_RGBA64BE:
case AV_PIX_FMT_YA16BE:
case AV_PIX_FMT_RGBA:
case AV_PIX_FMT_GRAY8A:
break;
case AV_PIX_FMT_PAL8:
palette = (uint32_t*)input->data[1];
for (transparent_palette_index = 0; transparent_palette_index < 256; ++transparent_palette_index)
if (palette[transparent_palette_index] >> 24 == 0)
break;
break;
default:
// No alpha, so blending not possible
return -1;
}
for (y = topmost_y; y < bottommost_y; ++y) {
uint8_t *foreground = input->data[0] + input_linesize * y + bpp * leftmost_x;
uint8_t *background = output->data[0] + output_linesize * y + bpp * leftmost_x;
output_data = output->data[0] + output_linesize * (y - topmost_y);
for (x = leftmost_x; x < rightmost_x; ++x, foreground += bpp, background += bpp, output_data += bpp) {
if (!memcmp(foreground, background, bpp)) {
if (input->format == AV_PIX_FMT_PAL8) {
if (transparent_palette_index == 256) {
// Need fully transparent colour, but none exists
return -1;
}
*output_data = transparent_palette_index;
} else {
memset(output_data, 0, bpp);
}
continue;
}
// Check for special alpha values, since full inverse
// alpha-on-alpha blending is rarely possible, and when
// possible, doesn't compress much better than
// APNG_BLEND_OP_SOURCE blending
switch (input->format) {
case AV_PIX_FMT_RGBA64BE:
if (((uint16_t*)foreground)[3] == 0xffff ||
((uint16_t*)background)[3] == 0)
break;
return -1;
case AV_PIX_FMT_YA16BE:
if (((uint16_t*)foreground)[1] == 0xffff ||
((uint16_t*)background)[1] == 0)
break;
return -1;
case AV_PIX_FMT_RGBA:
if (foreground[3] == 0xff || background[3] == 0)
break;
return -1;
case AV_PIX_FMT_GRAY8A:
if (foreground[1] == 0xff || background[1] == 0)
break;
return -1;
case AV_PIX_FMT_PAL8:
if (palette[*foreground] >> 24 == 0xff ||
palette[*background] >> 24 == 0)
break;
return -1;
}
memmove(output_data, foreground, bpp);
}
}
}
output->width = rightmost_x - leftmost_x;
output->height = bottommost_y - topmost_y;
fctl_chunk->width = output->width;
fctl_chunk->height = output->height;
fctl_chunk->x_offset = leftmost_x;
fctl_chunk->y_offset = topmost_y;
return 0;
}
static int apng_encode_frame(AVCodecContext *avctx, const AVFrame *pict,
APNGFctlChunk *best_fctl_chunk, APNGFctlChunk *best_last_fctl_chunk)
{
PNGEncContext *s = avctx->priv_data;
int ret;
unsigned int y;
AVFrame* diffFrame;
uint8_t bpp = (s->bits_per_pixel + 7) >> 3;
uint8_t *original_bytestream, *original_bytestream_end;
uint8_t *temp_bytestream = 0, *temp_bytestream_end;
uint32_t best_sequence_number;
uint8_t *best_bytestream;
size_t best_bytestream_size = SIZE_MAX;
APNGFctlChunk last_fctl_chunk = *best_last_fctl_chunk;
APNGFctlChunk fctl_chunk = *best_fctl_chunk;
if (avctx->frame_number == 0) {
best_fctl_chunk->width = pict->width;
best_fctl_chunk->height = pict->height;
best_fctl_chunk->x_offset = 0;
best_fctl_chunk->y_offset = 0;
best_fctl_chunk->blend_op = APNG_BLEND_OP_SOURCE;
return encode_frame(avctx, pict);
}
diffFrame = av_frame_alloc();
if (!diffFrame)
return AVERROR(ENOMEM);
diffFrame->format = pict->format;
diffFrame->width = pict->width;
diffFrame->height = pict->height;
if ((ret = av_frame_get_buffer(diffFrame, 32)) < 0)
goto fail;
original_bytestream = s->bytestream;
original_bytestream_end = s->bytestream_end;
temp_bytestream = av_malloc(original_bytestream_end - original_bytestream);
temp_bytestream_end = temp_bytestream + (original_bytestream_end - original_bytestream);
if (!temp_bytestream) {
ret = AVERROR(ENOMEM);
goto fail;
}
for (last_fctl_chunk.dispose_op = 0; last_fctl_chunk.dispose_op < 3; ++last_fctl_chunk.dispose_op) {
// 0: APNG_DISPOSE_OP_NONE
// 1: APNG_DISPOSE_OP_BACKGROUND
// 2: APNG_DISPOSE_OP_PREVIOUS
for (fctl_chunk.blend_op = 0; fctl_chunk.blend_op < 2; ++fctl_chunk.blend_op) {
// 0: APNG_BLEND_OP_SOURCE
// 1: APNG_BLEND_OP_OVER
uint32_t original_sequence_number = s->sequence_number, sequence_number;
uint8_t *bytestream_start = s->bytestream;
size_t bytestream_size;
// Do disposal
if (last_fctl_chunk.dispose_op != APNG_DISPOSE_OP_PREVIOUS) {
memcpy(diffFrame->data[0], s->last_frame->data[0],
s->last_frame->linesize[0] * s->last_frame->height);
if (last_fctl_chunk.dispose_op == APNG_DISPOSE_OP_BACKGROUND) {
for (y = last_fctl_chunk.y_offset; y < last_fctl_chunk.y_offset + last_fctl_chunk.height; ++y) {
size_t row_start = s->last_frame->linesize[0] * y + bpp * last_fctl_chunk.x_offset;
memset(diffFrame->data[0] + row_start, 0, bpp * last_fctl_chunk.width);
}
}
} else {
if (!s->prev_frame)
continue;
memcpy(diffFrame->data[0], s->prev_frame->data[0],
s->prev_frame->linesize[0] * s->prev_frame->height);
}
// Do inverse blending
if (apng_do_inverse_blend(diffFrame, pict, &fctl_chunk, bpp) < 0)
continue;
// Do encoding
ret = encode_frame(avctx, diffFrame);
sequence_number = s->sequence_number;
s->sequence_number = original_sequence_number;
bytestream_size = s->bytestream - bytestream_start;
s->bytestream = bytestream_start;
if (ret < 0)
goto fail;
if (bytestream_size < best_bytestream_size) {
*best_fctl_chunk = fctl_chunk;
*best_last_fctl_chunk = last_fctl_chunk;
best_sequence_number = sequence_number;
best_bytestream = s->bytestream;
best_bytestream_size = bytestream_size;
if (best_bytestream == original_bytestream) {
s->bytestream = temp_bytestream;
s->bytestream_end = temp_bytestream_end;
} else {
s->bytestream = original_bytestream;
s->bytestream_end = original_bytestream_end;
}
}
}
}
s->sequence_number = best_sequence_number;
s->bytestream = original_bytestream + best_bytestream_size;
s->bytestream_end = original_bytestream_end;
if (best_bytestream != original_bytestream)
memcpy(original_bytestream, best_bytestream, best_bytestream_size);
ret = 0;
fail:
av_freep(&temp_bytestream);
av_frame_free(&diffFrame);
return ret;
}
static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
const AVFrame *pict, int *got_packet)
{
@ -537,9 +817,9 @@ static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
int ret;
int enc_row_size;
size_t max_packet_size;
uint8_t buf[26];
APNGFctlChunk fctl_chunk;
if (avctx->codec_id == AV_CODEC_ID_APNG && s->color_type == PNG_COLOR_TYPE_PALETTE) {
if (pict && avctx->codec_id == AV_CODEC_ID_APNG && s->color_type == PNG_COLOR_TYPE_PALETTE) {
uint32_t checksum = ~av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), ~0U, pict->data[1], 256 * sizeof(uint32_t));
if (avctx->frame_number == 0) {
@ -560,47 +840,111 @@ static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
);
if (max_packet_size > INT_MAX)
return AVERROR(ENOMEM);
ret = ff_alloc_packet2(avctx, pkt, max_packet_size, 0);
if (ret < 0)
return ret;
s->bytestream_start =
s->bytestream = pkt->data;
s->bytestream_end = pkt->data + pkt->size;
if (avctx->frame_number == 0) {
s->bytestream = avctx->extradata = av_malloc(FF_MIN_BUFFER_SIZE);
if (!avctx->extradata)
return AVERROR(ENOMEM);
ret = encode_headers(avctx, pict);
if (ret < 0)
return ret;
avctx->extradata = av_malloc(s->bytestream - s->bytestream_start);
if (!avctx->extradata)
avctx->extradata_size = s->bytestream - avctx->extradata;
s->last_frame_packet = av_malloc(max_packet_size);
if (!s->last_frame_packet)
return AVERROR(ENOMEM);
avctx->extradata_size = s->bytestream - s->bytestream_start;
memcpy(avctx->extradata, s->bytestream_start, s->bytestream - s->bytestream_start);
s->bytestream = s->bytestream_start;
}
AV_WB32(buf, s->sequence_number);
AV_WB32(buf + 4, avctx->width);
AV_WB32(buf + 8, avctx->height);
AV_WB32(buf + 12, 0); // x offset
AV_WB32(buf + 16, 0); // y offset
AV_WB16(buf + 20, 0); // delay numerator (filled in during muxing)
AV_WB16(buf + 22, 0); // delay denominator
buf[24] = APNG_DISPOSE_OP_BACKGROUND;
buf[25] = APNG_BLEND_OP_SOURCE;
png_write_chunk(&s->bytestream, MKTAG('f', 'c', 'T', 'L'), buf, 26);
++s->sequence_number;
ret = encode_frame(avctx, pict);
} else if (s->last_frame) {
ret = ff_alloc_packet2(avctx, pkt, max_packet_size, 0);
if (ret < 0)
return ret;
pkt->size = s->bytestream - s->bytestream_start;
pkt->flags |= AV_PKT_FLAG_KEY;
memcpy(pkt->data, s->last_frame_packet, s->last_frame_packet_size);
pkt->size = s->last_frame_packet_size;
pkt->pts = pkt->dts = s->last_frame->pts;
}
if (pict) {
s->bytestream_start =
s->bytestream = s->last_frame_packet;
s->bytestream_end = s->bytestream + max_packet_size;
// We're encoding the frame first, so we have to do a bit of shuffling around
// to have the image data write to the correct place in the buffer
fctl_chunk.sequence_number = s->sequence_number;
++s->sequence_number;
s->bytestream += 26 + 12;
ret = apng_encode_frame(avctx, pict, &fctl_chunk, &s->last_frame_fctl);
if (ret < 0)
return ret;
fctl_chunk.delay_num = 0; // delay filled in during muxing
fctl_chunk.delay_den = 0;
} else {
s->last_frame_fctl.dispose_op = APNG_DISPOSE_OP_NONE;
}
if (s->last_frame) {
uint8_t* last_fctl_chunk_start = pkt->data;
uint8_t buf[26];
AV_WB32(buf + 0, s->last_frame_fctl.sequence_number);
AV_WB32(buf + 4, s->last_frame_fctl.width);
AV_WB32(buf + 8, s->last_frame_fctl.height);
AV_WB32(buf + 12, s->last_frame_fctl.x_offset);
AV_WB32(buf + 16, s->last_frame_fctl.y_offset);
AV_WB16(buf + 20, s->last_frame_fctl.delay_num);
AV_WB16(buf + 22, s->last_frame_fctl.delay_den);
buf[24] = s->last_frame_fctl.dispose_op;
buf[25] = s->last_frame_fctl.blend_op;
png_write_chunk(&last_fctl_chunk_start, MKTAG('f', 'c', 'T', 'L'), buf, 26);
*got_packet = 1;
}
if (pict) {
if (!s->last_frame) {
s->last_frame = av_frame_alloc();
if (!s->last_frame)
return AVERROR(ENOMEM);
} else if (s->last_frame_fctl.dispose_op != APNG_DISPOSE_OP_PREVIOUS) {
if (!s->prev_frame) {
s->prev_frame = av_frame_alloc();
if (!s->prev_frame)
return AVERROR(ENOMEM);
s->prev_frame->format = pict->format;
s->prev_frame->width = pict->width;
s->prev_frame->height = pict->height;
if ((ret = av_frame_get_buffer(s->prev_frame, 32)) < 0)
return ret;
}
// Do disposal, but not blending
memcpy(s->prev_frame->data[0], s->last_frame->data[0],
s->last_frame->linesize[0] * s->last_frame->height);
if (s->last_frame_fctl.dispose_op == APNG_DISPOSE_OP_BACKGROUND) {
uint32_t y;
uint8_t bpp = (s->bits_per_pixel + 7) >> 3;
for (y = s->last_frame_fctl.y_offset; y < s->last_frame_fctl.y_offset + s->last_frame_fctl.height; ++y) {
size_t row_start = s->last_frame->linesize[0] * y + bpp * s->last_frame_fctl.x_offset;
memset(s->prev_frame->data[0] + row_start, 0, bpp * s->last_frame_fctl.width);
}
}
}
av_frame_unref(s->last_frame);
ret = av_frame_ref(s->last_frame, (AVFrame*)pict);
if (ret < 0)
return ret;
s->last_frame_fctl = fctl_chunk;
s->last_frame_packet_size = s->bytestream - s->bytestream_start;
} else {
av_frame_free(&s->last_frame);
}
return 0;
}
@ -713,6 +1057,9 @@ static av_cold int png_enc_close(AVCodecContext *avctx)
PNGEncContext *s = avctx->priv_data;
deflateEnd(&s->zstream);
av_frame_free(&s->last_frame);
av_frame_free(&s->prev_frame);
av_freep(&s->last_frame_packet);
return 0;
}
@ -768,6 +1115,7 @@ AVCodec ff_apng_encoder = {
.init = png_enc_init,
.close = png_enc_close,
.encode2 = encode_apng,
.capabilities = CODEC_CAP_DELAY,
.pix_fmts = (const enum AVPixelFormat[]) {
AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA,
AV_PIX_FMT_RGB48BE, AV_PIX_FMT_RGBA64BE,