mp3enc: write full LAME frame

Most importantly, it contains the encoder delay and replaygain info.
This commit is contained in:
Anton Khirnov 2014-04-12 15:20:57 +02:00
parent 88b32673db
commit ef363ebd59
2 changed files with 129 additions and 34 deletions

View File

@ -370,7 +370,8 @@ to provide the pictures as soon as possible to avoid excessive buffering.
A Xing/LAME frame right after the ID3v2 header (if present). It is enabled by
default, but will be written only if the output is seekable. The
@code{write_xing} private option can be used to disable it. The frame contains
various information that may be useful to the decoder, like the audio duration.
various information that may be useful to the decoder, like the audio duration
or encoder delay.
@item
A legacy ID3v1 tag at the end of the file (disabled by default). It may be

View File

@ -32,6 +32,9 @@
#include "libavutil/opt.h"
#include "libavutil/dict.h"
#include "libavutil/avassert.h"
#include "libavutil/crc.h"
#include "libavutil/mathematics.h"
#include "libavutil/replaygain.h"
static int id3v1_set_string(AVFormatContext *s, const char *key,
uint8_t *buf, int buf_size)
@ -76,8 +79,8 @@ static int id3v1_create_tag(AVFormatContext *s, uint8_t *buf)
#define XING_NUM_BAGS 400
#define XING_TOC_SIZE 100
// maximum size of the xing frame: offset/Xing/flags/frames/size/TOC
#define XING_MAX_SIZE (32 + 4 + 4 + 4 + 4 + XING_TOC_SIZE)
// size of the XING/LAME data, starting from the Xing tag
#define XING_SIZE 156
typedef struct MP3Context {
const AVClass *class;
@ -87,7 +90,18 @@ typedef struct MP3Context {
int write_xing;
/* xing header */
int64_t xing_offset;
// a buffer containing the whole XING/LAME frame
uint8_t *xing_frame;
int xing_frame_size;
AVCRC audio_crc; // CRC of the audio data
uint32_t audio_size; // total size of the audio data
// offset of the XING/LAME frame in the file
int64_t xing_frame_offset;
// offset of the XING/INFO tag in the frame
int xing_offset;
int32_t frames;
int32_t size;
uint32_t want;
@ -115,13 +129,15 @@ static void mp3_write_xing(AVFormatContext *s)
{
MP3Context *mp3 = s->priv_data;
AVCodecContext *codec = s->streams[mp3->audio_stream_idx]->codec;
AVDictionaryEntry *enc = av_dict_get(s->streams[mp3->audio_stream_idx]->metadata, "encoder", NULL, 0);
AVIOContext *dyn_ctx;
int32_t header;
MPADecodeHeader mpah;
int srate_idx, i, channels;
int bitrate_idx;
int best_bitrate_idx;
int best_bitrate_error = INT_MAX;
int xing_offset;
int ret;
int ver = 0;
int lsf, bytes_needed;
@ -161,14 +177,8 @@ static void mp3_write_xing(AVFormatContext *s)
lsf = !((header & (1 << 20) && header & (1 << 19)));
xing_offset = xing_offtbl[ver != 3][channels == 1];
bytes_needed = 4 // header
+ xing_offset
+ 4 // xing tag
+ 4 // frames/size/toc flags
+ 4 // frames
+ 4 // size
+ XING_TOC_SIZE; // toc
mp3->xing_offset = xing_offtbl[ver != 3][channels == 1] + 4;
bytes_needed = mp3->xing_offset + XING_SIZE;
for (bitrate_idx = 1; bitrate_idx < 15; bitrate_idx++) {
int bit_rate = 1000 * avpriv_mpa_bitrate_tab[lsf][3 - 1][bitrate_idx];
@ -192,28 +202,72 @@ static void mp3_write_xing(AVFormatContext *s)
header &= ~mask;
}
avio_wb32(s->pb, header);
ret = avio_open_dyn_buf(&dyn_ctx);
if (ret < 0)
return;
avio_wb32(dyn_ctx, header);
avpriv_mpegaudio_decode_header(&mpah, header);
av_assert0(mpah.frame_size >= XING_MAX_SIZE);
av_assert0(mpah.frame_size >= bytes_needed);
ffio_fill(s->pb, 0, xing_offset);
mp3->xing_offset = avio_tell(s->pb);
ffio_wfourcc(s->pb, "Xing");
avio_wb32(s->pb, 0x01 | 0x02 | 0x04); // frames / size / TOC
ffio_fill(dyn_ctx, 0, mp3->xing_offset - 4);
ffio_wfourcc(dyn_ctx, "Xing");
avio_wb32(dyn_ctx, 0x01 | 0x02 | 0x04 | 0x08); // frames / size / TOC / vbr scale
mp3->size = mpah.frame_size;
mp3->want = 1;
avio_wb32(s->pb, 0); // frames
avio_wb32(s->pb, 0); // size
avio_wb32(dyn_ctx, 0); // frames
avio_wb32(dyn_ctx, 0); // size
// TOC
for (i = 0; i < XING_TOC_SIZE; i++)
avio_w8(s->pb, 255 * i / XING_TOC_SIZE);
avio_w8(dyn_ctx, 255 * i / XING_TOC_SIZE);
ffio_fill(s->pb, 0, mpah.frame_size - bytes_needed);
// vbr quality
// we write it, because some (broken) tools always expect it to be present
avio_wb32(dyn_ctx, 0);
// encoder short version string
if (enc) {
uint8_t encoder_str[9] = { 0 };
memcpy(encoder_str, enc->value, FFMIN(strlen(enc->value), sizeof(encoder_str)));
avio_write(dyn_ctx, encoder_str, sizeof(encoder_str));
} else
ffio_fill(dyn_ctx, 0, 9);
avio_w8(dyn_ctx, 0); // tag revision 0 / unknown vbr method
avio_w8(dyn_ctx, 0); // unknown lowpass filter value
ffio_fill(dyn_ctx, 0, 8); // empty replaygain fields
avio_w8(dyn_ctx, 0); // unknown encoding flags
avio_w8(dyn_ctx, 0); // unknown abr/minimal bitrate
// encoder delay
if (codec->initial_padding >= 1 << 12) {
av_log(s, AV_LOG_WARNING, "Too many samples of initial padding.\n");
avio_wb24(dyn_ctx, 0);
} else {
avio_wb24(dyn_ctx, codec->initial_padding << 12);
}
avio_w8(dyn_ctx, 0); // misc
avio_w8(dyn_ctx, 0); // mp3gain
avio_wb16(dyn_ctx, 0); // preset
// audio length and CRCs (will be updated later)
avio_wb32(dyn_ctx, 0); // music length
avio_wb16(dyn_ctx, 0); // music crc
avio_wb16(dyn_ctx, 0); // tag crc
ffio_fill(dyn_ctx, 0, mpah.frame_size - bytes_needed);
mp3->xing_frame_size = avio_close_dyn_buf(dyn_ctx, &mp3->xing_frame);
mp3->xing_frame_offset = avio_tell(s->pb);
avio_write(s->pb, mp3->xing_frame, mp3->xing_frame_size);
mp3->audio_size = mp3->xing_frame_size;
}
/*
@ -264,6 +318,12 @@ static int mp3_write_audio_packet(AVFormatContext *s, AVPacket *pkt)
}
mp3_xing_add_frame(mp3, pkt);
if (mp3->xing_offset) {
mp3->audio_size += pkt->size;
mp3->audio_crc = av_crc(av_crc_get_table(AV_CRC_16_ANSI_LE),
mp3->audio_crc, pkt->data, pkt->size);
}
}
return ff_raw_write_packet(s, pkt);
@ -292,26 +352,58 @@ static int mp3_queue_flush(AVFormatContext *s)
static void mp3_update_xing(AVFormatContext *s)
{
MP3Context *mp3 = s->priv_data;
int i;
AVReplayGain *rg;
uint16_t tag_crc;
uint8_t *toc;
int i, rg_size;
/* replace "Xing" identification string with "Info" for CBR files. */
if (!mp3->has_variable_bitrate) {
avio_seek(s->pb, mp3->xing_offset, SEEK_SET);
ffio_wfourcc(s->pb, "Info");
}
if (!mp3->has_variable_bitrate)
AV_WL32(mp3->xing_frame + mp3->xing_offset, MKTAG('I', 'n', 'f', 'o'));
avio_seek(s->pb, mp3->xing_offset + 8, SEEK_SET);
avio_wb32(s->pb, mp3->frames);
avio_wb32(s->pb, mp3->size);
avio_w8(s->pb, 0); // first toc entry has to be zero.
AV_WB32(mp3->xing_frame + mp3->xing_offset + 8, mp3->frames);
AV_WB32(mp3->xing_frame + mp3->xing_offset + 12, mp3->size);
toc = mp3->xing_frame + mp3->xing_offset + 16;
toc[0] = 0; // first toc entry has to be zero.
for (i = 1; i < XING_TOC_SIZE; ++i) {
int j = i * mp3->pos / XING_TOC_SIZE;
int seek_point = 256LL * mp3->bag[j] / mp3->size;
avio_w8(s->pb, FFMIN(seek_point, 255));
toc[i] = FFMIN(seek_point, 255);
}
/* write replaygain */
rg = (AVReplayGain*)av_stream_get_side_data(s->streams[0], AV_PKT_DATA_REPLAYGAIN,
&rg_size);
if (rg && rg_size >= sizeof(*rg)) {
uint16_t val;
AV_WB32(mp3->xing_frame + mp3->xing_offset + 131,
av_rescale(rg->track_peak, 1 << 23, 100000));
if (rg->track_gain != INT32_MIN) {
val = FFABS(rg->track_gain / 10000) & ((1 << 9) - 1);
val |= (rg->track_gain < 0) << 9;
val |= 1 << 13;
AV_WB16(mp3->xing_frame + mp3->xing_offset + 135, val);
}
if (rg->album_gain != INT32_MIN) {
val = FFABS(rg->album_gain / 10000) & ((1 << 9) - 1);
val |= (rg->album_gain < 0) << 9;
val |= 1 << 14;
AV_WB16(mp3->xing_frame + mp3->xing_offset + 137, val);
}
}
AV_WB32(mp3->xing_frame + mp3->xing_offset + XING_SIZE - 8, mp3->audio_size);
AV_WB16(mp3->xing_frame + mp3->xing_offset + XING_SIZE - 4, mp3->audio_crc);
tag_crc = av_crc(av_crc_get_table(AV_CRC_16_ANSI_LE), 0, mp3->xing_frame, 190);
AV_WB16(mp3->xing_frame + mp3->xing_offset + XING_SIZE - 2, tag_crc);
avio_seek(s->pb, mp3->xing_frame_offset, SEEK_SET);
avio_write(s->pb, mp3->xing_frame, mp3->xing_frame_size);
avio_seek(s->pb, 0, SEEK_END);
}
@ -334,6 +426,8 @@ static int mp3_write_trailer(struct AVFormatContext *s)
if (mp3->xing_offset)
mp3_update_xing(s);
av_freep(&mp3->xing_frame);
return 0;
}