Merge commit '8e134e5104e99a69cd4cea10540a7ce9c3682a2c'

* commit '8e134e5104e99a69cd4cea10540a7ce9c3682a2c': lavc: clarify get_buffer() documentation mpegaudiodec: use planar sample format for output unless packed is requested x86: h264 qpel: use the correct number of utilized xmm regs in cglobal Merged-by: Michael Niedermayer <michaelni@gmx.at>
2012-11-26 14:24:14 +01:00 · 2012-11-26 14:24:14 +01:00 · a13148f633
commit a13148f633
parent 86270236d5 8e134e5104
4 changed files with 73 additions and 55 deletions
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@ -2406,7 +2406,12 @@ typedef struct AVCodecContext {
     *
     * Decoders cannot use the buffer after returning from
     * avcodec_decode_audio4(), so they will not call release_buffer(), as it
-     * is assumed to be released immediately upon return.
+     * is assumed to be released immediately upon return. In some rare cases,
+     * a decoder may need to call get_buffer() more than once in a single
+     * call to avcodec_decode_audio4(). In that case, when get_buffer() is
+     * called again after it has already been called once, the previously
+     * acquired buffer is assumed to be released at that time and may not be
+     * reused by the decoder.
     *
     * As a convenience, av_samples_get_buffer_size() and
     * av_samples_fill_arrays() in libavutil may be used by custom get_buffer()
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@ -96,6 +96,7 @@ typedef struct MPADecodeContext {
 #   define MULLx(x, y, s) ((y)*(x))
 #   define RENAME(a) a ## _float
 #   define OUT_FMT   AV_SAMPLE_FMT_FLT
+#   define OUT_FMT_P AV_SAMPLE_FMT_FLTP
 #else
 #   define SHR(a,b)       ((a)>>(b))
 /* WARNING: only correct for positive numbers */
@ -106,6 +107,7 @@ typedef struct MPADecodeContext {
 #   define MULLx(x, y, s) MULL(x,y,s)
 #   define RENAME(a)      a ## _fixed
 #   define OUT_FMT   AV_SAMPLE_FMT_S16
+#   define OUT_FMT_P AV_SAMPLE_FMT_S16P
 #endif

 /****************/
@ -441,7 +443,11 @@ static av_cold int decode_init(AVCodecContext * avctx)
    ff_mpadsp_init(&s->mpadsp);
    ff_dsputil_init(&s->dsp, avctx);

-    avctx->sample_fmt= OUT_FMT;
+    if (avctx->request_sample_fmt == OUT_FMT &&
+        avctx->codec_id != AV_CODEC_ID_MP3ON4)
+        avctx->sample_fmt = OUT_FMT;
+    else
+        avctx->sample_fmt = OUT_FMT_P;
    s->err_recognition = avctx->err_recognition;

    if (avctx->codec_id == AV_CODEC_ID_MP3ADU)
@ -1564,7 +1570,7 @@ static int mp_decode_layer3(MPADecodeContext *s)
    return nb_granules * 18;
 }

-static int mp_decode_frame(MPADecodeContext *s, OUT_INT *samples,
+static int mp_decode_frame(MPADecodeContext *s, OUT_INT **samples,
                           const uint8_t *buf, int buf_size)
 {
    int i, nb_frames, ch, ret;
@ -1627,20 +1633,26 @@ static int mp_decode_frame(MPADecodeContext *s, OUT_INT *samples,
            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
            return ret;
        }
-        samples = (OUT_INT *)s->frame.data[0];
+        samples = (OUT_INT **)s->frame.extended_data;
    }

    /* apply the synthesis filter */
    for (ch = 0; ch < s->nb_channels; ch++) {
-        samples_ptr = samples + ch;
+        int sample_stride;
+        if (s->avctx->sample_fmt == OUT_FMT_P) {
+            samples_ptr   = samples[ch];
+            sample_stride = 1;
+        } else {
+            samples_ptr   = samples[0] + ch;
+            sample_stride = s->nb_channels;
+        }
        for (i = 0; i < nb_frames; i++) {
-            RENAME(ff_mpa_synth_filter)(
-                         &s->mpadsp,
-                         s->synth_buf[ch], &(s->synth_buf_offset[ch]),
-                         RENAME(ff_mpa_synth_window), &s->dither_state,
-                         samples_ptr, s->nb_channels,
-                         s->sb_samples[ch][i]);
-            samples_ptr += 32 * s->nb_channels;
+            RENAME(ff_mpa_synth_filter)(&s->mpadsp, s->synth_buf[ch],
+                                        &(s->synth_buf_offset[ch]),
+                                        RENAME(ff_mpa_synth_window),
+                                        &s->dither_state, samples_ptr,
+                                        sample_stride, s->sb_samples[ch][i]);
+            samples_ptr += 32 * sample_stride;
        }
    }

@ -1789,7 +1801,6 @@ typedef struct MP3On4DecodeContext {
    int syncword;                   ///< syncword patch
    const uint8_t *coff;            ///< channel offsets in output buffer
    MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance
-    OUT_INT *decoded_buf;           ///< output buffer for decoded samples
 } MP3On4DecodeContext;

 #include "mpeg4audio.h"
@ -1831,8 +1842,6 @@ static av_cold int decode_close_mp3on4(AVCodecContext * avctx)
    for (i = 0; i < s->frames; i++)
        av_free(s->mp3decctx[i]);

-    av_freep(&s->decoded_buf);
-
    return 0;
 }

@ -1893,14 +1902,6 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
        s->mp3decctx[i]->mpadsp = s->mp3decctx[0]->mpadsp;
    }

-    /* Allocate buffer for multi-channel output if needed */
-    if (s->frames > 1) {
-        s->decoded_buf = av_malloc(MPA_FRAME_SIZE * MPA_MAX_CHANNELS *
-                                   sizeof(*s->decoded_buf));
-        if (!s->decoded_buf)
-            goto alloc_fail;
-    }
-
    return 0;
 alloc_fail:
    decode_close_mp3on4(avctx);
@ -1927,9 +1928,9 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data,
    MPADecodeContext *m;
    int fsize, len = buf_size, out_size = 0;
    uint32_t header;
-    OUT_INT *out_samples;
-    OUT_INT *outptr, *bp;
-    int fr, j, n, ch, ret;
+    OUT_INT **out_samples;
+    OUT_INT *outptr[2];
+    int fr, ch, ret;

    /* get output buffer */
    s->frame->nb_samples = s->frames * MPA_FRAME_SIZE;
@ -1937,15 +1938,12 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data,
        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
        return ret;
    }
-    out_samples = (OUT_INT *)s->frame->data[0];
+    out_samples = (OUT_INT **)s->frame->extended_data;

    // Discard too short frames
    if (buf_size < HEADER_SIZE)
        return AVERROR_INVALIDDATA;

-    // If only one decoder interleave is not needed
-    outptr = s->frames == 1 ? out_samples : s->decoded_buf;
-
    avctx->bit_rate = 0;

    ch = 0;
@ -1973,6 +1971,10 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data,
        }
        ch += m->nb_channels;

+        outptr[0] = out_samples[s->coff[fr]];
+        if (m->nb_channels > 1)
+            outptr[1] = out_samples[s->coff[fr] + 1];
+
        if ((ret = mp_decode_frame(m, outptr, buf, fsize)) < 0)
            return ret;

@ -1980,23 +1982,6 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data,
        buf      += fsize;
        len      -= fsize;

-        if (s->frames > 1) {
-            n = m->avctx->frame_size*m->nb_channels;
-            /* interleave output data */
-            bp = out_samples + s->coff[fr];
-            if (m->nb_channels == 1) {
-                for (j = 0; j < n; j++) {
-                    *bp = s->decoded_buf[j];
-                    bp += avctx->channels;
-                }
-            } else {
-                for (j = 0; j < n; j++) {
-                    bp[0] = s->decoded_buf[j++];
-                    bp[1] = s->decoded_buf[j];
-                    bp   += avctx->channels;
-                }
-            }
-        }
        avctx->bit_rate += m->bit_rate;
    }

@ -2023,6 +2008,9 @@ AVCodec ff_mp1_decoder = {
    .capabilities   = CODEC_CAP_DR1,
    .flush          = flush,
    .long_name      = NULL_IF_CONFIG_SMALL("MP1 (MPEG audio layer 1)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_NONE },
 };
 #endif
 #if CONFIG_MP2_DECODER
@ -2036,6 +2024,9 @@ AVCodec ff_mp2_decoder = {
    .capabilities   = CODEC_CAP_DR1,
    .flush          = flush,
    .long_name      = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_NONE },
 };
 #endif
 #if CONFIG_MP3_DECODER
@ -2049,6 +2040,9 @@ AVCodec ff_mp3_decoder = {
    .capabilities   = CODEC_CAP_DR1,
    .flush          = flush,
    .long_name      = NULL_IF_CONFIG_SMALL("MP3 (MPEG audio layer 3)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_NONE },
 };
 #endif
 #if CONFIG_MP3ADU_DECODER
@ -2062,6 +2056,9 @@ AVCodec ff_mp3adu_decoder = {
    .capabilities   = CODEC_CAP_DR1,
    .flush          = flush,
    .long_name      = NULL_IF_CONFIG_SMALL("ADU (Application Data Unit) MP3 (MPEG audio layer 3)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_NONE },
 };
 #endif
 #if CONFIG_MP3ON4_DECODER
@ -2076,6 +2073,8 @@ AVCodec ff_mp3on4_decoder = {
    .capabilities   = CODEC_CAP_DR1,
    .flush          = flush_mp3on4,
    .long_name      = NULL_IF_CONFIG_SMALL("MP3onMP4"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_NONE },
 };
 #endif
 #endif
--- a/libavcodec/mpegaudiodec_float.c
+++ b/libavcodec/mpegaudiodec_float.c
@ -33,6 +33,9 @@ AVCodec ff_mp1float_decoder = {
    .capabilities   = CODEC_CAP_DR1,
    .flush          = flush,
    .long_name      = NULL_IF_CONFIG_SMALL("MP1 (MPEG audio layer 1)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_FLT,
+                                                      AV_SAMPLE_FMT_NONE },
 };
 #endif
 #if CONFIG_MP2FLOAT_DECODER
@ -46,6 +49,9 @@ AVCodec ff_mp2float_decoder = {
    .capabilities   = CODEC_CAP_DR1,
    .flush          = flush,
    .long_name      = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_FLT,
+                                                      AV_SAMPLE_FMT_NONE },
 };
 #endif
 #if CONFIG_MP3FLOAT_DECODER
@ -59,6 +65,9 @@ AVCodec ff_mp3float_decoder = {
    .capabilities   = CODEC_CAP_DR1,
    .flush          = flush,
    .long_name      = NULL_IF_CONFIG_SMALL("MP3 (MPEG audio layer 3)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_FLT,
+                                                      AV_SAMPLE_FMT_NONE },
 };
 #endif
 #if CONFIG_MP3ADUFLOAT_DECODER
@ -72,6 +81,9 @@ AVCodec ff_mp3adufloat_decoder = {
    .capabilities   = CODEC_CAP_DR1,
    .flush          = flush,
    .long_name      = NULL_IF_CONFIG_SMALL("ADU (Application Data Unit) MP3 (MPEG audio layer 3)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_FLT,
+                                                      AV_SAMPLE_FMT_NONE },
 };
 #endif
 #if CONFIG_MP3ON4FLOAT_DECODER
@ -86,5 +98,7 @@ AVCodec ff_mp3on4float_decoder = {
    .capabilities   = CODEC_CAP_DR1,
    .flush          = flush_mp3on4,
    .long_name      = NULL_IF_CONFIG_SMALL("MP3onMP4"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_NONE },
 };
 #endif
--- a/libavcodec/x86/h264_qpel_8bit.asm
+++ b/libavcodec/x86/h264_qpel_8bit.asm
@ -157,7 +157,7 @@ QPEL8_H_LOWPASS_OP put
 QPEL8_H_LOWPASS_OP avg

 %macro QPEL8_H_LOWPASS_OP_XMM 1
-cglobal %1_h264_qpel8_h_lowpass, 4,5,7 ; dst, src, dstStride, srcStride
+cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride
    movsxdifnidn  r2, r2d
    movsxdifnidn  r3, r3d
    mov          r4d, 8
@ -312,7 +312,7 @@ QPEL8_H_LOWPASS_L2_OP avg


 %macro QPEL8_H_LOWPASS_L2_OP_XMM 1
-cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,7 ; dst, src, src2, dstStride, src2Stride
+cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, src2Stride
    movsxdifnidn  r3, r3d
    movsxdifnidn  r4, r4d
    mov          r5d, 8
@ -415,13 +415,13 @@ QPEL4_V_LOWPASS_OP avg

 %macro QPEL8OR16_V_LOWPASS_OP 1
 %if cpuflag(sse2)
-cglobal %1_h264_qpel8or16_v_lowpass, 5,5,7 ; dst, src, dstStride, srcStride, h
+cglobal %1_h264_qpel8or16_v_lowpass, 5,5,8 ; dst, src, dstStride, srcStride, h
    movsxdifnidn  r2, r2d
    movsxdifnidn  r3, r3d
    sub           r1, r3
    sub           r1, r3
 %else
-cglobal %1_h264_qpel8or16_v_lowpass_op, 5,5,7 ; dst, src, dstStride, srcStride, h
+cglobal %1_h264_qpel8or16_v_lowpass_op, 5,5,8 ; dst, src, dstStride, srcStride, h
    movsxdifnidn  r2, r2d
    movsxdifnidn  r3, r3d
 %endif
@ -543,7 +543,7 @@ QPEL4_HV1_LOWPASS_OP put
 QPEL4_HV1_LOWPASS_OP avg

 %macro QPEL8OR16_HV1_LOWPASS_OP 1
-cglobal %1_h264_qpel8or16_hv1_lowpass_op, 4,4,7 ; src, tmp, srcStride, size
+cglobal %1_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size
    movsxdifnidn  r2, r2d
    pxor          m7, m7
    movh          m0, [r0]
@ -635,7 +635,7 @@ QPEL8OR16_HV2_LOWPASS_OP put
 QPEL8OR16_HV2_LOWPASS_OP avg

 %macro QPEL8OR16_HV2_LOWPASS_OP_XMM 1
-cglobal %1_h264_qpel8or16_hv2_lowpass, 5,5,7 ; dst, tmp, dstStride, tmpStride, size
+cglobal %1_h264_qpel8or16_hv2_lowpass, 5,5,8 ; dst, tmp, dstStride, tmpStride, size
    movsxdifnidn  r2, r2d
    movsxdifnidn  r3, r3d
    cmp          r4d, 16