opusdec: properly handle mismatching configurations in multichannel streams

The substreams can have different resampling delays, so an additional level of buffering is needed to synchronize them. Bug-Id: 876
2015-07-27 11:13:53 +02:00
parent 859ce02c98
commit c49b88b93b
2 changed files with 99 additions and 14 deletions
--- a/libavcodec/opus.h
+++ b/libavcodec/opus.h
@@ -164,6 +164,16 @@ typedef struct ChannelMap {

 typedef struct OpusContext {
    OpusStreamContext *streams;
+
+    /* current output buffers for each streams */
+    float **out;
+    int   *out_size;
+    /* Buffers for synchronizing the streams when they have different
+     * resampling delays */
+    AVAudioFifo **sync_buffers;
+    /* number of decoded samples for each stream */
+    int         *decoded_samples;
+
    int             nb_streams;
    int      nb_stereo_streams;

--- a/libavcodec/opusdec.c
+++ b/libavcodec/opusdec.c
@@ -367,12 +367,17 @@ static int opus_decode_frame(OpusStreamContext *s, const uint8_t *data, int size

 static int opus_decode_subpacket(OpusStreamContext *s,
                                 const uint8_t *buf, int buf_size,
+                                 float **out, int out_size,
                                 int nb_samples)
 {
    int output_samples = 0;
    int flush_needed   = 0;
    int i, j, ret;

+    s->out[0]   = out[0];
+    s->out[1]   = out[1];
+    s->out_size = out_size;
+
    /* check if we need to flush the resampler */
    if (avresample_is_open(s->avr)) {
        if (buf) {
@@ -450,9 +455,16 @@ static int opus_decode_packet(AVCodecContext *avctx, void *data,
    const uint8_t *buf  = avpkt->data;
    int buf_size        = avpkt->size;
    int coded_samples   = 0;
-    int decoded_samples = 0;
+    int decoded_samples = INT_MAX;
+    int delayed_samples = 0;
    int i, ret;

+    /* calculate the number of delayed samples */
+    for (i = 0; i < c->nb_streams; i++) {
+        delayed_samples = FFMAX(delayed_samples,
+                                c->streams[i].delayed_samples + av_audio_fifo_size(c->sync_buffers[i]));
+    }
+
    /* decode the header of the first sub-packet to find out the sample count */
    if (buf) {
        OpusPacket *pkt = &c->streams[0].packet;
@@ -465,7 +477,7 @@ static int opus_decode_packet(AVCodecContext *avctx, void *data,
        c->streams[0].silk_samplerate = get_silk_samplerate(pkt->config);
    }

-    frame->nb_samples = coded_samples + c->streams[0].delayed_samples;
+    frame->nb_samples = coded_samples + delayed_samples;

    /* no input or buffered data => nothing to do */
    if (!frame->nb_samples) {
@@ -481,14 +493,43 @@ static int opus_decode_packet(AVCodecContext *avctx, void *data,
    }
    frame->nb_samples = 0;

+    memset(c->out, 0, c->nb_streams * 2 * sizeof(*c->out));
    for (i = 0; i < avctx->channels; i++) {
        ChannelMap *map = &c->channel_maps[i];
        if (!map->copy)
-            c->streams[map->stream_idx].out[map->channel_idx] = (float*)frame->extended_data[i];
+            c->out[2 * map->stream_idx + map->channel_idx] = (float*)frame->extended_data[i];
    }

-    for (i = 0; i < c->nb_streams; i++)
-        c->streams[i].out_size = frame->linesize[0];
+    /* read the data from the sync buffers */
+    for (i = 0; i < c->nb_streams; i++) {
+        float          **out = c->out + 2 * i;
+        int sync_size = av_audio_fifo_size(c->sync_buffers[i]);
+
+        float sync_dummy[32];
+        int out_dummy = (!out[0]) | ((!out[1]) << 1);
+
+        if (!out[0])
+            out[0] = sync_dummy;
+        if (!out[1])
+            out[1] = sync_dummy;
+        if (out_dummy && sync_size > FF_ARRAY_ELEMS(sync_dummy))
+            return AVERROR_BUG;
+
+        ret = av_audio_fifo_read(c->sync_buffers[i], (void**)out, sync_size);
+        if (ret < 0)
+            return ret;
+
+        if (out_dummy & 1)
+            out[0] = NULL;
+        else
+            out[0] += ret;
+        if (out_dummy & 2)
+            out[1] = NULL;
+        else
+            out[1] += ret;
+
+        c->out_size[i] = frame->linesize[0] - ret * sizeof(float);
+    }

    /* decode each sub-packet */
    for (i = 0; i < c->nb_streams; i++) {
@@ -509,20 +550,31 @@ static int opus_decode_packet(AVCodecContext *avctx, void *data,
            s->silk_samplerate = get_silk_samplerate(s->packet.config);
        }

-        ret = opus_decode_subpacket(&c->streams[i], buf,
-                                    s->packet.data_size, coded_samples);
+        ret = opus_decode_subpacket(&c->streams[i], buf, s->packet.data_size,
+                                    c->out + 2 * i, c->out_size[i], coded_samples);
        if (ret < 0)
            return ret;
-        if (decoded_samples && ret != decoded_samples) {
-            av_log(avctx, AV_LOG_ERROR, "Different numbers of decoded samples "
-                   "in a multi-channel stream\n");
-            return AVERROR_INVALIDDATA;
-        }
-        decoded_samples = ret;
+        c->decoded_samples[i] = ret;
+        decoded_samples       = FFMIN(decoded_samples, ret);
+
        buf      += s->packet.packet_size;
        buf_size -= s->packet.packet_size;
    }

+    /* buffer the extra samples */
+    for (i = 0; i < c->nb_streams; i++) {
+        int buffer_samples = c->decoded_samples[i] - decoded_samples;
+        if (buffer_samples) {
+            float *buf[2] = { c->out[2 * i + 0] ? c->out[2 * i + 0] : (float*)frame->extended_data[0],
+                              c->out[2 * i + 1] ? c->out[2 * i + 1] : (float*)frame->extended_data[0] };
+            buf[0] += buffer_samples;
+            buf[1] += buffer_samples;
+            ret = av_audio_fifo_write(c->sync_buffers[i], (void**)buf, buffer_samples);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
    for (i = 0; i < avctx->channels; i++) {
        ChannelMap *map = &c->channel_maps[i];

@@ -563,6 +615,8 @@ static av_cold void opus_decode_flush(AVCodecContext *ctx)
            av_audio_fifo_drain(s->celt_delay, av_audio_fifo_size(s->celt_delay));
        avresample_close(s->avr);

+        av_audio_fifo_drain(c->sync_buffers[i], av_audio_fifo_size(c->sync_buffers[i]));
+
        ff_silk_flush(s->silk);
        ff_celt_flush(s->celt);
    }
@@ -587,6 +641,16 @@ static av_cold int opus_decode_close(AVCodecContext *avctx)
    }

    av_freep(&c->streams);
+
+    if (c->sync_buffers) {
+        for (i = 0; i < c->nb_streams; i++)
+            av_audio_fifo_free(c->sync_buffers[i]);
+    }
+    av_freep(&c->sync_buffers);
+    av_freep(&c->decoded_samples);
+    av_freep(&c->out);
+    av_freep(&c->out_size);
+
    c->nb_streams = 0;

    av_freep(&c->channel_maps);
@@ -611,7 +675,11 @@ static av_cold int opus_decode_init(AVCodecContext *avctx)

    /* allocate and init each independent decoder */
    c->streams = av_mallocz_array(c->nb_streams, sizeof(*c->streams));
-    if (!c->streams) {
+    c->out             = av_mallocz_array(c->nb_streams, 2 * sizeof(*c->out));
+    c->out_size        = av_mallocz_array(c->nb_streams, sizeof(*c->out_size));
+    c->sync_buffers    = av_mallocz_array(c->nb_streams, sizeof(*c->sync_buffers));
+    c->decoded_samples = av_mallocz_array(c->nb_streams, sizeof(*c->decoded_samples));
+    if (!c->streams || !c->sync_buffers || !c->decoded_samples || !c->out || !c->out_size) {
        c->nb_streams = 0;
        ret = AVERROR(ENOMEM);
        goto fail;
@@ -658,6 +726,13 @@ static av_cold int opus_decode_init(AVCodecContext *avctx)
            ret = AVERROR(ENOMEM);
            goto fail;
        }
+
+        c->sync_buffers[i] = av_audio_fifo_alloc(avctx->sample_fmt,
+                                                 s->output_channels, 32);
+        if (!c->sync_buffers[i]) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
    }

    return 0;