From c4f5c2d6f4ffa3f4b56555059000208a6ba47b55 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 16 May 2011 16:52:01 +0100
Subject: [PATCH 01/19] Move some mpegaudio functions to new mpegaudiodsp
 subsystem

This separation allows these functions to be used in a cleaner
fashion from other codecs (e.g. qdm2) and simplifies creating
optimised versions of them.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 configure                             |  19 ++-
 libavcodec/Makefile                   |   3 +
 libavcodec/mpc.c                      |   4 +-
 libavcodec/mpc.h                      |   1 +
 libavcodec/mpc7.c                     |   3 +-
 libavcodec/mpc8.c                     |   3 +-
 libavcodec/mpegaudio.h                |  24 +--
 libavcodec/mpegaudiodec.c             | 197 +------------------------
 libavcodec/mpegaudiodec_float.c       |  19 ---
 libavcodec/mpegaudiodsp.c             |  40 +++++
 libavcodec/mpegaudiodsp.h             |  63 ++++++++
 libavcodec/mpegaudiodsp_fixed.c       |  20 +++
 libavcodec/mpegaudiodsp_float.c       |  20 +++
 libavcodec/mpegaudiodsp_template.c    | 205 ++++++++++++++++++++++++++
 libavcodec/ppc/mpegaudiodec_altivec.c |   9 +-
 libavcodec/qdm2.c                     |   6 +-
 libavcodec/x86/mpegaudiodec_mmx.c     |   9 +-
 17 files changed, 390 insertions(+), 255 deletions(-)
 create mode 100644 libavcodec/mpegaudiodsp.c
 create mode 100644 libavcodec/mpegaudiodsp.h
 create mode 100644 libavcodec/mpegaudiodsp_fixed.c
 create mode 100644 libavcodec/mpegaudiodsp_float.c
 create mode 100644 libavcodec/mpegaudiodsp_template.c

diff --git a/configure b/configure
index 5b81e0b599..6becb09724 100755
--- a/configure
+++ b/configure
@@ -952,6 +952,7 @@ CONFIG_LIST="
     mdct
     memalign_hack
     mlib
+    mpegaudiodsp
     network
     nonfree
     pic
@@ -1235,6 +1236,7 @@ symver_if_any="symver_asm_label symver_gnu_asm"
 dct_select="rdft"
 mdct_select="fft"
 rdft_select="fft"
+mpegaudiodsp_select="dct"
 
 # decoders / encoders / hardware accelerators
 aac_decoder_select="mdct sinewin"
@@ -1286,11 +1288,16 @@ ljpeg_encoder_select="aandct"
 loco_decoder_select="golomb"
 mjpeg_encoder_select="aandct"
 mlp_decoder_select="mlp_parser"
-mp1float_decoder_select="dct"
-mp2float_decoder_select="dct"
-mp3adufloat_decoder_select="dct"
-mp3float_decoder_select="dct"
-mp3on4float_decoder_select="dct"
+mp1_decoder_select="mpegaudiodsp"
+mp2_decoder_select="mpegaudiodsp"
+mp3adu_decoder_select="mpegaudiodsp"
+mp3_decoder_select="mpegaudiodsp"
+mp3on4_decoder_select="mpegaudiodsp"
+mp1float_decoder_select="mpegaudiodsp"
+mp2float_decoder_select="mpegaudiodsp"
+mp3adufloat_decoder_select="mpegaudiodsp"
+mp3float_decoder_select="mpegaudiodsp"
+mp3on4float_decoder_select="mpegaudiodsp"
 mpeg1video_encoder_select="aandct"
 mpeg2video_encoder_select="aandct"
 mpeg4_decoder_select="h263_decoder mpeg4video_parser"
@@ -1315,7 +1322,7 @@ nellymoser_encoder_select="mdct sinewin"
 png_decoder_select="zlib"
 png_encoder_select="zlib"
 qcelp_decoder_select="lsp"
-qdm2_decoder_select="mdct rdft"
+qdm2_decoder_select="mdct rdft mpegaudiodsp"
 ra_144_encoder_select="lpc"
 rv10_decoder_select="h263_decoder"
 rv10_encoder_select="h263_encoder"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index fa70216c9c..b26c33de63 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -40,6 +40,9 @@ OBJS-$(CONFIG_HUFFMAN)                 += huffman.o
 OBJS-$(CONFIG_LPC)                     += lpc.o
 OBJS-$(CONFIG_LSP)                     += lsp.o
 OBJS-$(CONFIG_MDCT)                    += mdct_fixed.o mdct_float.o
+OBJS-$(CONFIG_MPEGAUDIODSP)            += mpegaudiodsp.o                \
+                                          mpegaudiodsp_fixed.o          \
+                                          mpegaudiodsp_float.o
 RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
 OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
diff --git a/libavcodec/mpc.c b/libavcodec/mpc.c
index 15febefe0b..4573860525 100644
--- a/libavcodec/mpc.c
+++ b/libavcodec/mpc.c
@@ -29,6 +29,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
+#include "mpegaudiodsp.h"
 #include "mpegaudio.h"
 
 #include "mpc.h"
@@ -51,7 +52,8 @@ static void mpc_synth(MPCContext *c, int16_t *out, int channels)
     for(ch = 0;  ch < channels; ch++){
         samples_ptr = samples + ch;
         for(i = 0; i < SAMPLES_PER_BAND; i++) {
-            ff_mpa_synth_filter_fixed(c->synth_buf[ch], &(c->synth_buf_offset[ch]),
+            ff_mpa_synth_filter_fixed(&c->mpadsp,
+                                c->synth_buf[ch], &(c->synth_buf_offset[ch]),
                                 ff_mpa_synth_window_fixed, &dither_state,
                                 samples_ptr, channels,
                                 c->sb_samples[ch][i]);
diff --git a/libavcodec/mpc.h b/libavcodec/mpc.h
index 67fc7feed0..eea4b6df36 100644
--- a/libavcodec/mpc.h
+++ b/libavcodec/mpc.h
@@ -52,6 +52,7 @@ typedef struct {
 
 typedef struct {
     DSPContext dsp;
+    MPADSPContext mpadsp;
     GetBitContext gb;
     int IS, MSS, gapless;
     int lastframelen;
diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c
index 6a4bf57043..dbfa3c8636 100644
--- a/libavcodec/mpc7.c
+++ b/libavcodec/mpc7.c
@@ -29,7 +29,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
-#include "mpegaudio.h"
+#include "mpegaudiodsp.h"
 #include "libavutil/audioconvert.h"
 
 #include "mpc.h"
@@ -68,6 +68,7 @@ static av_cold int mpc7_decode_init(AVCodecContext * avctx)
     memset(c->oldDSCF, 0, sizeof(c->oldDSCF));
     av_lfg_init(&c->rnd, 0xDEADBEEF);
     dsputil_init(&c->dsp, avctx);
+    ff_mpadsp_init(&c->mpadsp);
     c->dsp.bswap_buf((uint32_t*)buf, (const uint32_t*)avctx->extradata, 4);
     ff_mpc_init();
     init_get_bits(&gb, buf, 128);
diff --git a/libavcodec/mpc8.c b/libavcodec/mpc8.c
index 5de8c15b4c..81de9cf500 100644
--- a/libavcodec/mpc8.c
+++ b/libavcodec/mpc8.c
@@ -29,7 +29,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
-#include "mpegaudio.h"
+#include "mpegaudiodsp.h"
 #include "libavutil/audioconvert.h"
 
 #include "mpc.h"
@@ -120,6 +120,7 @@ static av_cold int mpc8_decode_init(AVCodecContext * avctx)
     memset(c->oldDSCF, 0, sizeof(c->oldDSCF));
     av_lfg_init(&c->rnd, 0xDEADBEEF);
     dsputil_init(&c->dsp, avctx);
+    ff_mpadsp_init(&c->mpadsp);
 
     ff_mpc_init();
 
diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index 3422b6df68..c33960e987 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -33,7 +33,6 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
-#include "dct.h"
 
 /* max frame size, in samples */
 #define MPA_FRAME_SIZE 1152
@@ -69,7 +68,6 @@
 typedef float OUT_INT;
 #else
 typedef int16_t OUT_INT;
-#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
 #endif
 
 #if CONFIG_FLOAT
@@ -142,11 +140,7 @@ typedef struct MPADecodeContext {
     int dither_state;
     int error_recognition;
     AVCodecContext* avctx;
-#if CONFIG_FLOAT
-    DCTContext dct;
-#endif
-    void (*apply_window_mp3)(MPA_INT *synth_buf, MPA_INT *window,
-                             int *dither_state, OUT_INT *samples, int incr);
+    MPADSPContext mpadsp;
 } MPADecodeContext;
 
 /* layer 3 huffman tables */
@@ -158,22 +152,6 @@ typedef struct HuffTable {
 
 int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
 int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
-extern MPA_INT ff_mpa_synth_window_fixed[];
-void ff_mpa_synth_init_fixed(MPA_INT *window);
-void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
-                         MPA_INT *window, int *dither_state,
-                         OUT_INT *samples, int incr,
-                         INTFLOAT sb_samples[SBLIMIT]);
-
-void ff_mpa_synth_init_float(MPA_INT *window);
-void ff_mpa_synth_filter_float(MPADecodeContext *s,
-                         MPA_INT *synth_buf_ptr, int *synth_buf_offset,
-                         MPA_INT *window, int *dither_state,
-                         OUT_INT *samples, int incr,
-                         INTFLOAT sb_samples[SBLIMIT]);
-
-void ff_mpegaudiodec_init_mmx(MPADecodeContext *s);
-void ff_mpegaudiodec_init_altivec(MPADecodeContext *s);
 
 /* fast header check for resync */
 static inline int ff_mpa_check_header(uint32_t header){
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index 4802a04bc8..cc193c68d0 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -29,7 +29,7 @@
 #include "get_bits.h"
 #include "dsputil.h"
 #include "mathops.h"
-#include "dct32.h"
+#include "mpegaudiodsp.h"
 
 /*
  * TODO:
@@ -68,8 +68,6 @@
 #include "mpegaudiodectab.h"
 
 static void RENAME(compute_antialias)(MPADecodeContext *s, GranuleDef *g);
-static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
-                               int *dither_state, OUT_INT *samples, int incr);
 
 /* vlc structure for decoding layer 3 huffman tables */
 static VLC huff_vlc[16];
@@ -119,8 +117,6 @@ static const int32_t scale_factor_mult2[3][3] = {
     SCALE_GEN(4.0 / 9.0), /* 9 steps */
 };
 
-DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256];
-
 /**
  * Convert region offsets to region sizes and truncate
  * size to big_values.
@@ -259,14 +255,8 @@ static av_cold int decode_init(AVCodecContext * avctx)
     int i, j, k;
 
     s->avctx = avctx;
-    s->apply_window_mp3 = apply_window_mp3_c;
-#if HAVE_MMX && CONFIG_FLOAT
-    ff_mpegaudiodec_init_mmx(s);
-#endif
-#if CONFIG_FLOAT
-    ff_dct_init(&s->dct, 5, DCT_II);
-#endif
-    if (HAVE_ALTIVEC && CONFIG_FLOAT) ff_mpegaudiodec_init_altivec(s);
+
+    ff_mpadsp_init(&s->mpadsp);
 
     avctx->sample_fmt= OUT_FMT;
     s->error_recognition= avctx->error_recognition;
@@ -461,183 +451,6 @@ static av_cold int decode_init(AVCodecContext * avctx)
     return 0;
 }
 
-
-#if CONFIG_FLOAT
-static inline float round_sample(float *sum)
-{
-    float sum1=*sum;
-    *sum = 0;
-    return sum1;
-}
-
-/* signed 16x16 -> 32 multiply add accumulate */
-#define MACS(rt, ra, rb) rt+=(ra)*(rb)
-
-/* signed 16x16 -> 32 multiply */
-#define MULS(ra, rb) ((ra)*(rb))
-
-#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
-
-#else
-
-static inline int round_sample(int64_t *sum)
-{
-    int sum1;
-    sum1 = (int)((*sum) >> OUT_SHIFT);
-    *sum &= (1<<OUT_SHIFT)-1;
-    return av_clip_int16(sum1);
-}
-
-#   define MULS(ra, rb) MUL64(ra, rb)
-#   define MACS(rt, ra, rb) MAC64(rt, ra, rb)
-#   define MLSS(rt, ra, rb) MLS64(rt, ra, rb)
-#endif
-
-#define SUM8(op, sum, w, p)               \
-{                                         \
-    op(sum, (w)[0 * 64], (p)[0 * 64]);    \
-    op(sum, (w)[1 * 64], (p)[1 * 64]);    \
-    op(sum, (w)[2 * 64], (p)[2 * 64]);    \
-    op(sum, (w)[3 * 64], (p)[3 * 64]);    \
-    op(sum, (w)[4 * 64], (p)[4 * 64]);    \
-    op(sum, (w)[5 * 64], (p)[5 * 64]);    \
-    op(sum, (w)[6 * 64], (p)[6 * 64]);    \
-    op(sum, (w)[7 * 64], (p)[7 * 64]);    \
-}
-
-#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
-{                                               \
-    INTFLOAT tmp;\
-    tmp = p[0 * 64];\
-    op1(sum1, (w1)[0 * 64], tmp);\
-    op2(sum2, (w2)[0 * 64], tmp);\
-    tmp = p[1 * 64];\
-    op1(sum1, (w1)[1 * 64], tmp);\
-    op2(sum2, (w2)[1 * 64], tmp);\
-    tmp = p[2 * 64];\
-    op1(sum1, (w1)[2 * 64], tmp);\
-    op2(sum2, (w2)[2 * 64], tmp);\
-    tmp = p[3 * 64];\
-    op1(sum1, (w1)[3 * 64], tmp);\
-    op2(sum2, (w2)[3 * 64], tmp);\
-    tmp = p[4 * 64];\
-    op1(sum1, (w1)[4 * 64], tmp);\
-    op2(sum2, (w2)[4 * 64], tmp);\
-    tmp = p[5 * 64];\
-    op1(sum1, (w1)[5 * 64], tmp);\
-    op2(sum2, (w2)[5 * 64], tmp);\
-    tmp = p[6 * 64];\
-    op1(sum1, (w1)[6 * 64], tmp);\
-    op2(sum2, (w2)[6 * 64], tmp);\
-    tmp = p[7 * 64];\
-    op1(sum1, (w1)[7 * 64], tmp);\
-    op2(sum2, (w2)[7 * 64], tmp);\
-}
-
-void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window)
-{
-    int i, j;
-
-    /* max = 18760, max sum over all 16 coefs : 44736 */
-    for(i=0;i<257;i++) {
-        INTFLOAT v;
-        v = ff_mpa_enwindow[i];
-#if CONFIG_FLOAT
-        v *= 1.0 / (1LL<<(16 + FRAC_BITS));
-#endif
-        window[i] = v;
-        if ((i & 63) != 0)
-            v = -v;
-        if (i != 0)
-            window[512 - i] = v;
-    }
-
-    // Needed for avoiding shuffles in ASM implementations
-    for(i=0; i < 8; i++)
-        for(j=0; j < 16; j++)
-            window[512+16*i+j] = window[64*i+32-j];
-
-    for(i=0; i < 8; i++)
-        for(j=0; j < 16; j++)
-            window[512+128+16*i+j] = window[64*i+48-j];
-}
-
-static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
-                               int *dither_state, OUT_INT *samples, int incr)
-{
-    register const MPA_INT *w, *w2, *p;
-    int j;
-    OUT_INT *samples2;
-#if CONFIG_FLOAT
-    float sum, sum2;
-#else
-    int64_t sum, sum2;
-#endif
-
-    /* copy to avoid wrap */
-    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
-
-    samples2 = samples + 31 * incr;
-    w = window;
-    w2 = window + 31;
-
-    sum = *dither_state;
-    p = synth_buf + 16;
-    SUM8(MACS, sum, w, p);
-    p = synth_buf + 48;
-    SUM8(MLSS, sum, w + 32, p);
-    *samples = round_sample(&sum);
-    samples += incr;
-    w++;
-
-    /* we calculate two samples at the same time to avoid one memory
-       access per two sample */
-    for(j=1;j<16;j++) {
-        sum2 = 0;
-        p = synth_buf + 16 + j;
-        SUM8P2(sum, MACS, sum2, MLSS, w, w2, p);
-        p = synth_buf + 48 - j;
-        SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p);
-
-        *samples = round_sample(&sum);
-        samples += incr;
-        sum += sum2;
-        *samples2 = round_sample(&sum);
-        samples2 -= incr;
-        w++;
-        w2--;
-    }
-
-    p = synth_buf + 32;
-    SUM8(MLSS, sum, w + 32, p);
-    *samples = round_sample(&sum);
-    *dither_state= sum;
-}
-
-
-/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
-   32 samples. */
-/* XXX: optimize by avoiding ring buffer usage */
-#if !CONFIG_FLOAT
-void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
-                         MPA_INT *window, int *dither_state,
-                         OUT_INT *samples, int incr,
-                         INTFLOAT sb_samples[SBLIMIT])
-{
-    register MPA_INT *synth_buf;
-    int offset;
-
-    offset = *synth_buf_offset;
-    synth_buf = synth_buf_ptr + offset;
-
-    ff_dct32_fixed(synth_buf, sb_samples);
-    apply_window_mp3_c(synth_buf, window, dither_state, samples, incr);
-
-    offset = (offset - 32) & 511;
-    *synth_buf_offset = offset;
-}
-#endif
-
 #define C3 FIXHR(0.86602540378443864676/2)
 
 /* 0.5 / cos(pi*(2*i+1)/36) */
@@ -1915,9 +1728,7 @@ static int mp_decode_frame(MPADecodeContext *s,
         samples_ptr = samples + ch;
         for(i=0;i<nb_frames;i++) {
             RENAME(ff_mpa_synth_filter)(
-#if CONFIG_FLOAT
-                         s,
-#endif
+                         &s->mpadsp,
                          s->synth_buf[ch], &(s->synth_buf_offset[ch]),
                          RENAME(ff_mpa_synth_window), &s->dither_state,
                          samples_ptr, s->nb_channels,
diff --git a/libavcodec/mpegaudiodec_float.c b/libavcodec/mpegaudiodec_float.c
index 0ef85d19c1..94463a824e 100644
--- a/libavcodec/mpegaudiodec_float.c
+++ b/libavcodec/mpegaudiodec_float.c
@@ -22,25 +22,6 @@
 #define CONFIG_FLOAT 1
 #include "mpegaudiodec.c"
 
-void ff_mpa_synth_filter_float(MPADecodeContext *s, float *synth_buf_ptr,
-                               int *synth_buf_offset,
-                               float *window, int *dither_state,
-                               float *samples, int incr,
-                               float sb_samples[SBLIMIT])
-{
-    float *synth_buf;
-    int offset;
-
-    offset = *synth_buf_offset;
-    synth_buf = synth_buf_ptr + offset;
-
-    s->dct.dct32(synth_buf, sb_samples);
-    s->apply_window_mp3(synth_buf, window, dither_state, samples, incr);
-
-    offset = (offset - 32) & 511;
-    *synth_buf_offset = offset;
-}
-
 static void compute_antialias_float(MPADecodeContext *s,
                               GranuleDef *g)
 {
diff --git a/libavcodec/mpegaudiodsp.c b/libavcodec/mpegaudiodsp.c
new file mode 100644
index 0000000000..57fe962b91
--- /dev/null
+++ b/libavcodec/mpegaudiodsp.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2011 Mans Rullgard
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "mpegaudiodsp.h"
+#include "dct.h"
+#include "dct32.h"
+
+void ff_mpadsp_init(MPADSPContext *s)
+{
+    DCTContext dct;
+
+    ff_dct_init(&dct, 5, DCT_II);
+
+    s->apply_window_float = ff_mpadsp_apply_window_float;
+    s->apply_window_fixed = ff_mpadsp_apply_window_fixed;
+
+    s->dct32_float = dct.dct32;
+    s->dct32_fixed = ff_dct32_fixed;
+
+    if (HAVE_MMX)     ff_mpadsp_init_mmx(s);
+    if (HAVE_ALTIVEC) ff_mpadsp_init_altivec(s);
+}
diff --git a/libavcodec/mpegaudiodsp.h b/libavcodec/mpegaudiodsp.h
new file mode 100644
index 0000000000..7b05b68eee
--- /dev/null
+++ b/libavcodec/mpegaudiodsp.h
@@ -0,0 +1,63 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGAUDIODSP_H
+#define AVCODEC_MPEGAUDIODSP_H
+
+#include <stdint.h>
+
+typedef struct MPADSPContext {
+    void (*apply_window_float)(float *synth_buf, float *window,
+                               int *dither_state, float *samples, int incr);
+    void (*apply_window_fixed)(int32_t *synth_buf, int32_t *window,
+                               int *dither_state, int16_t *samples, int incr);
+    void (*dct32_float)(float *dst, const float *src);
+    void (*dct32_fixed)(int *dst, const int *src);
+} MPADSPContext;
+
+void ff_mpadsp_init(MPADSPContext *s);
+
+extern int32_t ff_mpa_synth_window_fixed[];
+extern float   ff_mpa_synth_window_float[];
+
+void ff_mpa_synth_filter_fixed(MPADSPContext *s,
+                               int32_t *synth_buf_ptr, int *synth_buf_offset,
+                               int32_t *window, int *dither_state,
+                               int16_t *samples, int incr,
+                               int *sb_samples);
+
+void ff_mpa_synth_filter_float(MPADSPContext *s,
+                               float *synth_buf_ptr, int *synth_buf_offset,
+                               float *window, int *dither_state,
+                               float *samples, int incr,
+                               float *sb_samples);
+
+void ff_mpadsp_init_mmx(MPADSPContext *s);
+void ff_mpadsp_init_altivec(MPADSPContext *s);
+
+void ff_mpa_synth_init_float(float *window);
+void ff_mpa_synth_init_fixed(int32_t *window);
+
+void ff_mpadsp_apply_window_float(float *synth_buf, float *window,
+                                  int *dither_state, float *samples,
+                                  int incr);
+void ff_mpadsp_apply_window_fixed(int32_t *synth_buf, int32_t *window,
+                                  int *dither_state, int16_t *samples,
+                                  int incr);
+
+#endif
diff --git a/libavcodec/mpegaudiodsp_fixed.c b/libavcodec/mpegaudiodsp_fixed.c
new file mode 100644
index 0000000000..3c49a568b7
--- /dev/null
+++ b/libavcodec/mpegaudiodsp_fixed.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CONFIG_FLOAT 0
+#include "mpegaudiodsp_template.c"
diff --git a/libavcodec/mpegaudiodsp_float.c b/libavcodec/mpegaudiodsp_float.c
new file mode 100644
index 0000000000..2d8d53ea26
--- /dev/null
+++ b/libavcodec/mpegaudiodsp_float.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CONFIG_FLOAT 1
+#include "mpegaudiodsp_template.c"
diff --git a/libavcodec/mpegaudiodsp_template.c b/libavcodec/mpegaudiodsp_template.c
new file mode 100644
index 0000000000..5561c46135
--- /dev/null
+++ b/libavcodec/mpegaudiodsp_template.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/mem.h"
+#include "dct32.h"
+#include "mathops.h"
+#include "mpegaudiodsp.h"
+#include "mpegaudio.h"
+#include "mpegaudiodata.h"
+
+#if CONFIG_FLOAT
+#define RENAME(n) n##_float
+
+static inline float round_sample(float *sum)
+{
+    float sum1=*sum;
+    *sum = 0;
+    return sum1;
+}
+
+#define MACS(rt, ra, rb) rt+=(ra)*(rb)
+#define MULS(ra, rb) ((ra)*(rb))
+#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
+
+#else
+
+#define RENAME(n) n##_fixed
+#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
+
+static inline int round_sample(int64_t *sum)
+{
+    int sum1;
+    sum1 = (int)((*sum) >> OUT_SHIFT);
+    *sum &= (1<<OUT_SHIFT)-1;
+    return av_clip_int16(sum1);
+}
+
+#   define MULS(ra, rb) MUL64(ra, rb)
+#   define MACS(rt, ra, rb) MAC64(rt, ra, rb)
+#   define MLSS(rt, ra, rb) MLS64(rt, ra, rb)
+#endif
+
+DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256];
+
+#define SUM8(op, sum, w, p)               \
+{                                         \
+    op(sum, (w)[0 * 64], (p)[0 * 64]);    \
+    op(sum, (w)[1 * 64], (p)[1 * 64]);    \
+    op(sum, (w)[2 * 64], (p)[2 * 64]);    \
+    op(sum, (w)[3 * 64], (p)[3 * 64]);    \
+    op(sum, (w)[4 * 64], (p)[4 * 64]);    \
+    op(sum, (w)[5 * 64], (p)[5 * 64]);    \
+    op(sum, (w)[6 * 64], (p)[6 * 64]);    \
+    op(sum, (w)[7 * 64], (p)[7 * 64]);    \
+}
+
+#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
+{                                               \
+    INTFLOAT tmp;\
+    tmp = p[0 * 64];\
+    op1(sum1, (w1)[0 * 64], tmp);\
+    op2(sum2, (w2)[0 * 64], tmp);\
+    tmp = p[1 * 64];\
+    op1(sum1, (w1)[1 * 64], tmp);\
+    op2(sum2, (w2)[1 * 64], tmp);\
+    tmp = p[2 * 64];\
+    op1(sum1, (w1)[2 * 64], tmp);\
+    op2(sum2, (w2)[2 * 64], tmp);\
+    tmp = p[3 * 64];\
+    op1(sum1, (w1)[3 * 64], tmp);\
+    op2(sum2, (w2)[3 * 64], tmp);\
+    tmp = p[4 * 64];\
+    op1(sum1, (w1)[4 * 64], tmp);\
+    op2(sum2, (w2)[4 * 64], tmp);\
+    tmp = p[5 * 64];\
+    op1(sum1, (w1)[5 * 64], tmp);\
+    op2(sum2, (w2)[5 * 64], tmp);\
+    tmp = p[6 * 64];\
+    op1(sum1, (w1)[6 * 64], tmp);\
+    op2(sum2, (w2)[6 * 64], tmp);\
+    tmp = p[7 * 64];\
+    op1(sum1, (w1)[7 * 64], tmp);\
+    op2(sum2, (w2)[7 * 64], tmp);\
+}
+
+void RENAME(ff_mpadsp_apply_window)(MPA_INT *synth_buf, MPA_INT *window,
+                                  int *dither_state, OUT_INT *samples,
+                                  int incr)
+{
+    register const MPA_INT *w, *w2, *p;
+    int j;
+    OUT_INT *samples2;
+#if CONFIG_FLOAT
+    float sum, sum2;
+#else
+    int64_t sum, sum2;
+#endif
+
+    /* copy to avoid wrap */
+    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
+
+    samples2 = samples + 31 * incr;
+    w = window;
+    w2 = window + 31;
+
+    sum = *dither_state;
+    p = synth_buf + 16;
+    SUM8(MACS, sum, w, p);
+    p = synth_buf + 48;
+    SUM8(MLSS, sum, w + 32, p);
+    *samples = round_sample(&sum);
+    samples += incr;
+    w++;
+
+    /* we calculate two samples at the same time to avoid one memory
+       access per two sample */
+    for(j=1;j<16;j++) {
+        sum2 = 0;
+        p = synth_buf + 16 + j;
+        SUM8P2(sum, MACS, sum2, MLSS, w, w2, p);
+        p = synth_buf + 48 - j;
+        SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p);
+
+        *samples = round_sample(&sum);
+        samples += incr;
+        sum += sum2;
+        *samples2 = round_sample(&sum);
+        samples2 -= incr;
+        w++;
+        w2--;
+    }
+
+    p = synth_buf + 32;
+    SUM8(MLSS, sum, w + 32, p);
+    *samples = round_sample(&sum);
+    *dither_state= sum;
+}
+
+/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
+   32 samples. */
+void RENAME(ff_mpa_synth_filter)(MPADSPContext *s, MPA_INT *synth_buf_ptr,
+                                 int *synth_buf_offset,
+                                 MPA_INT *window, int *dither_state,
+                                 OUT_INT *samples, int incr,
+                                 MPA_INT *sb_samples)
+{
+    MPA_INT *synth_buf;
+    int offset;
+
+    offset = *synth_buf_offset;
+    synth_buf = synth_buf_ptr + offset;
+
+    s->RENAME(dct32)(synth_buf, sb_samples);
+    s->RENAME(apply_window)(synth_buf, window, dither_state, samples, incr);
+
+    offset = (offset - 32) & 511;
+    *synth_buf_offset = offset;
+}
+
+void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window)
+{
+    int i, j;
+
+    /* max = 18760, max sum over all 16 coefs : 44736 */
+    for(i=0;i<257;i++) {
+        INTFLOAT v;
+        v = ff_mpa_enwindow[i];
+#if CONFIG_FLOAT
+        v *= 1.0 / (1LL<<(16 + FRAC_BITS));
+#endif
+        window[i] = v;
+        if ((i & 63) != 0)
+            v = -v;
+        if (i != 0)
+            window[512 - i] = v;
+    }
+
+    // Needed for avoiding shuffles in ASM implementations
+    for(i=0; i < 8; i++)
+        for(j=0; j < 16; j++)
+            window[512+16*i+j] = window[64*i+32-j];
+
+    for(i=0; i < 8; i++)
+        for(j=0; j < 16; j++)
+            window[512+128+16*i+j] = window[64*i+48-j];
+}
diff --git a/libavcodec/ppc/mpegaudiodec_altivec.c b/libavcodec/ppc/mpegaudiodec_altivec.c
index af94276e8a..5df0fdafe4 100644
--- a/libavcodec/ppc/mpegaudiodec_altivec.c
+++ b/libavcodec/ppc/mpegaudiodec_altivec.c
@@ -21,9 +21,8 @@
 
 #include "dsputil_altivec.h"
 #include "util_altivec.h"
-
-#define CONFIG_FLOAT 1
-#include "libavcodec/mpegaudio.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/mpegaudiodsp.h"
 
 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
@@ -124,7 +123,7 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out,
     *out = sum;
 }
 
-void ff_mpegaudiodec_init_altivec(MPADecodeContext *s)
+void ff_mpadsp_init_altivec(MPADSPContext *s)
 {
-    s->apply_window_mp3 = apply_window_mp3;
+    s->apply_window_float = apply_window_mp3;
 }
diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 0f4dd18966..f74cfd9258 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -39,6 +39,7 @@
 #include "get_bits.h"
 #include "dsputil.h"
 #include "rdft.h"
+#include "mpegaudiodsp.h"
 #include "mpegaudio.h"
 
 #include "qdm2data.h"
@@ -170,6 +171,7 @@ typedef struct {
     float output_buffer[1024];
 
     /// Synthesis filter
+    MPADSPContext mpadsp;
     DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
     int synth_buf_offset[MPA_MAX_CHANNELS];
     DECLARE_ALIGNED(16, int32_t, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT];
@@ -1616,7 +1618,8 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index)
         OUT_INT *samples_ptr = samples + ch;
 
         for (i = 0; i < 8; i++) {
-            ff_mpa_synth_filter_fixed(q->synth_buf[ch], &(q->synth_buf_offset[ch]),
+            ff_mpa_synth_filter_fixed(&q->mpadsp,
+                q->synth_buf[ch], &(q->synth_buf_offset[ch]),
                 ff_mpa_synth_window_fixed, &dither_state,
                 samples_ptr, q->nb_channels,
                 q->sb_samples[ch][(8 * index) + i]);
@@ -1863,6 +1866,7 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx)
     }
 
     ff_rdft_init(&s->rdft_ctx, s->fft_order, IDFT_C2R);
+    ff_mpadsp_init(&s->mpadsp);
 
     qdm2_init(s);
 
diff --git a/libavcodec/x86/mpegaudiodec_mmx.c b/libavcodec/x86/mpegaudiodec_mmx.c
index ce5b7d6df8..b64461513e 100644
--- a/libavcodec/x86/mpegaudiodec_mmx.c
+++ b/libavcodec/x86/mpegaudiodec_mmx.c
@@ -21,9 +21,8 @@
 
 #include "libavutil/cpu.h"
 #include "libavutil/x86_cpu.h"
-
-#define CONFIG_FLOAT 1
-#include "libavcodec/mpegaudio.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/mpegaudiodsp.h"
 
 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
@@ -148,11 +147,11 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out,
     *out = sum;
 }
 
-void ff_mpegaudiodec_init_mmx(MPADecodeContext *s)
+void ff_mpadsp_init_mmx(MPADSPContext *s)
 {
     int mm_flags = av_get_cpu_flags();
 
     if (mm_flags & AV_CPU_FLAG_SSE2) {
-        s->apply_window_mp3 = apply_window_mp3;
+        s->apply_window_float = apply_window_mp3;
     }
 }

From 918d0584a452bf76264e717006f2cfc40b0de21f Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 17 May 2011 14:22:25 +0100
Subject: [PATCH 02/19] mpegaudio: move some struct definitions from
 mpegaudio.h

These structs are only used in mpegaudiodec.c, so move them there
and remove no longer needed #include lines from mpegaudio.h.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudio.h       | 55 ------------------------------------
 libavcodec/mpegaudiodec.c    | 46 ++++++++++++++++++++++++++++++
 libavcodec/mpegaudiodectab.h |  7 +++++
 3 files changed, 53 insertions(+), 55 deletions(-)

diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index c33960e987..8c6d6ef066 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -31,8 +31,6 @@
 #endif
 
 #include "avcodec.h"
-#include "get_bits.h"
-#include "dsputil.h"
 
 /* max frame size, in samples */
 #define MPA_FRAME_SIZE 1152
@@ -81,29 +79,6 @@ typedef int16_t MPA_INT;
 typedef int32_t MPA_INT;
 #endif
 
-#define BACKSTEP_SIZE 512
-#define EXTRABYTES 24
-
-/* layer 3 "granule" */
-typedef struct GranuleDef {
-    uint8_t scfsi;
-    int part2_3_length;
-    int big_values;
-    int global_gain;
-    int scalefac_compress;
-    uint8_t block_type;
-    uint8_t switch_point;
-    int table_select[3];
-    int subblock_gain[3];
-    uint8_t scalefac_scale;
-    uint8_t count1table_select;
-    int region_size[3]; /* number of huffman codes in each region */
-    int preflag;
-    int short_start, long_end; /* long/short band indexes */
-    uint8_t scale_factors[40];
-    INTFLOAT sb_hybrid[SBLIMIT * 18]; /* 576 samples */
-} GranuleDef;
-
 #define MPA_DECODE_HEADER \
     int frame_size; \
     int error_protection; \
@@ -120,36 +95,6 @@ typedef struct MPADecodeHeader {
   MPA_DECODE_HEADER
 } MPADecodeHeader;
 
-typedef struct MPADecodeContext {
-    MPA_DECODE_HEADER
-    uint8_t last_buf[2*BACKSTEP_SIZE + EXTRABYTES];
-    int last_buf_size;
-    /* next header (used in free format parsing) */
-    uint32_t free_format_next_header;
-    GetBitContext gb;
-    GetBitContext in_gb;
-    DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2];
-    int synth_buf_offset[MPA_MAX_CHANNELS];
-    DECLARE_ALIGNED(16, INTFLOAT, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
-    INTFLOAT mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
-    GranuleDef granules[2][2]; /* Used in Layer 3 */
-#ifdef DEBUG
-    int frame_count;
-#endif
-    int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3
-    int dither_state;
-    int error_recognition;
-    AVCodecContext* avctx;
-    MPADSPContext mpadsp;
-} MPADecodeContext;
-
-/* layer 3 huffman tables */
-typedef struct HuffTable {
-    int xsize;
-    const uint8_t *bits;
-    const uint16_t *codes;
-} HuffTable;
-
 int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
 int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
 
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index cc193c68d0..f0d9958d2b 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -39,6 +39,52 @@
 #include "mpegaudio.h"
 #include "mpegaudiodecheader.h"
 
+#define BACKSTEP_SIZE 512
+#define EXTRABYTES 24
+
+/* layer 3 "granule" */
+typedef struct GranuleDef {
+    uint8_t scfsi;
+    int part2_3_length;
+    int big_values;
+    int global_gain;
+    int scalefac_compress;
+    uint8_t block_type;
+    uint8_t switch_point;
+    int table_select[3];
+    int subblock_gain[3];
+    uint8_t scalefac_scale;
+    uint8_t count1table_select;
+    int region_size[3]; /* number of huffman codes in each region */
+    int preflag;
+    int short_start, long_end; /* long/short band indexes */
+    uint8_t scale_factors[40];
+    INTFLOAT sb_hybrid[SBLIMIT * 18]; /* 576 samples */
+} GranuleDef;
+
+typedef struct MPADecodeContext {
+    MPA_DECODE_HEADER
+    uint8_t last_buf[2*BACKSTEP_SIZE + EXTRABYTES];
+    int last_buf_size;
+    /* next header (used in free format parsing) */
+    uint32_t free_format_next_header;
+    GetBitContext gb;
+    GetBitContext in_gb;
+    DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2];
+    int synth_buf_offset[MPA_MAX_CHANNELS];
+    DECLARE_ALIGNED(16, INTFLOAT, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
+    INTFLOAT mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
+    GranuleDef granules[2][2]; /* Used in Layer 3 */
+#ifdef DEBUG
+    int frame_count;
+#endif
+    int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3
+    int dither_state;
+    int error_recognition;
+    AVCodecContext* avctx;
+    MPADSPContext mpadsp;
+} MPADecodeContext;
+
 #if CONFIG_FLOAT
 #   define SHR(a,b)       ((a)*(1.0f/(1<<(b))))
 #   define FIXR_OLD(a)    ((int)((a) * FRAC_ONE + 0.5))
diff --git a/libavcodec/mpegaudiodectab.h b/libavcodec/mpegaudiodectab.h
index fdcf83fb88..041d1860b7 100644
--- a/libavcodec/mpegaudiodectab.h
+++ b/libavcodec/mpegaudiodectab.h
@@ -33,6 +33,13 @@
 /*******************************************************/
 /* layer 3 tables */
 
+/* layer 3 huffman tables */
+typedef struct HuffTable {
+    int xsize;
+    const uint8_t *bits;
+    const uint16_t *codes;
+} HuffTable;
+
 /* layer3 scale factor size */
 static const uint8_t slen_table[2][16] = {
     { 0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },

From c7bbc6cd7a7e26c6c6f26e1b06f8ce354c7a1a46 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 16 May 2011 18:59:25 +0100
Subject: [PATCH 03/19] mpegaudio: merge two #if CONFIG_FLOAT blocks

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudio.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index 8c6d6ef066..5929db8687 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -62,21 +62,18 @@
 
 #define FIX(a)   ((int)((a) * FRAC_ONE))
 
-#if CONFIG_FLOAT
-typedef float OUT_INT;
-#else
-typedef int16_t OUT_INT;
-#endif
-
 #if CONFIG_FLOAT
 #   define INTFLOAT float
 typedef float MPA_INT;
+typedef float OUT_INT;
 #elif FRAC_BITS <= 15
 #   define INTFLOAT int
 typedef int16_t MPA_INT;
+typedef int16_t OUT_INT;
 #else
 #   define INTFLOAT int
 typedef int32_t MPA_INT;
+typedef int16_t OUT_INT;
 #endif
 
 #define MPA_DECODE_HEADER \

From b122c651075814722ade6f93c46cb2ee08c45b49 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 12:31:05 +0100
Subject: [PATCH 04/19] asfdec: add missing #include for av_bswap32()

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavformat/asfdec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index ed02d40fb9..30642a61fa 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -21,6 +21,7 @@
 
 //#define DEBUG
 
+#include "libavutil/bswap.h"
 #include "libavutil/common.h"
 #include "libavutil/avstring.h"
 #include "libavcodec/mpegaudio.h"

From d7d21c9f4befe8fea9596e41e691a9fba8f377e2 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 12:48:33 +0100
Subject: [PATCH 05/19] mpegaudio: remove useless #undef at end of file

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudioenc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/mpegaudioenc.c b/libavcodec/mpegaudioenc.c
index e6f2e3b9c2..50876ec2a4 100644
--- a/libavcodec/mpegaudioenc.c
+++ b/libavcodec/mpegaudioenc.c
@@ -776,5 +776,3 @@ AVCodec ff_mp2_encoder = {
     .supported_samplerates= (const int[]){44100, 48000,  32000, 22050, 24000, 16000, 0},
     .long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
 };
-
-#undef FIX

From c2a16e44f8ed130c2b492f1f3ce09f7f55a7d4a4 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 14:01:11 +0100
Subject: [PATCH 06/19] mpegaudio: remove unused version of SAME_HEADER_MASK

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudio.h        | 4 ----
 libavcodec/mpegaudio_parser.c | 1 -
 2 files changed, 5 deletions(-)

diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index 5929db8687..a46ecc5a9f 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -47,10 +47,6 @@
 #define MPA_DUAL    2
 #define MPA_MONO    3
 
-/* header + layer + bitrate + freq + lsf/mpeg25 */
-#define SAME_HEADER_MASK \
-   (0xffe00000 | (3 << 17) | (0xf << 12) | (3 << 10) | (3 << 19))
-
 #define MP3_MASK 0xFFFE0CCF
 
 #ifndef FRAC_BITS
diff --git a/libavcodec/mpegaudio_parser.c b/libavcodec/mpegaudio_parser.c
index 3bf1a18636..7cfd107d53 100644
--- a/libavcodec/mpegaudio_parser.c
+++ b/libavcodec/mpegaudio_parser.c
@@ -35,7 +35,6 @@ typedef struct MpegAudioParseContext {
 #define MPA_HEADER_SIZE 4
 
 /* header + layer + bitrate + freq + lsf/mpeg25 */
-#undef SAME_HEADER_MASK /* mpegaudio.h defines different version */
 #define SAME_HEADER_MASK \
    (0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19))
 

From 429059f866259b447233c3fa23fffddce3e39508 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 16 May 2011 14:23:59 +0200
Subject: [PATCH 07/19] configure: Adjust AVX assembler check.

Older nasm versions have trouble assembling certain AVX instructions, but the
current AVX check did not detect this. Update the check to use an instruction
that triggers the nasm problem.
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 6becb09724..96867a550e 100755
--- a/configure
+++ b/configure
@@ -2729,7 +2729,7 @@ EOF
 
         check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
             die "yasm not found, use --disable-yasm for a crippled build"
-        check_yasm "vpaddw xmm0, xmm0, xmm0" || disable avx
+        check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
     fi
 
     case "$cpu" in

From 0b5e44ed2922f2abe0de9670d099666ca3622a69 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 16:26:39 +0100
Subject: [PATCH 08/19] mpegaudiodsp: fix x86 and ppc makefiles

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/ppc/Makefile | 6 +-----
 libavcodec/x86/Makefile | 6 +-----
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 35ea0c38f8..8e37fc791d 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -7,11 +7,7 @@ ALTIVEC-OBJS-$(CONFIG_VP5_DECODER)     += ppc/vp3dsp_altivec.o
 ALTIVEC-OBJS-$(CONFIG_VP6_DECODER)     += ppc/vp3dsp_altivec.o
 ALTIVEC-OBJS-$(CONFIG_VP8_DECODER)     += ppc/vp8dsp_altivec.o
 
-ALTIVEC-OBJS-$(CONFIG_MP1FLOAT_DECODER)    += ppc/mpegaudiodec_altivec.o
-ALTIVEC-OBJS-$(CONFIG_MP2FLOAT_DECODER)    += ppc/mpegaudiodec_altivec.o
-ALTIVEC-OBJS-$(CONFIG_MP3FLOAT_DECODER)    += ppc/mpegaudiodec_altivec.o
-ALTIVEC-OBJS-$(CONFIG_MP3ON4FLOAT_DECODER) += ppc/mpegaudiodec_altivec.o
-ALTIVEC-OBJS-$(CONFIG_MP3ADUFLOAT_DECODER) += ppc/mpegaudiodec_altivec.o
+ALTIVEC-OBJS-$(CONFIG_MPEGAUDIODSP)    += ppc/mpegaudiodec_altivec.o
 
 FFT-OBJS-$(HAVE_GNU_AS)                += ppc/fft_altivec_s.o           \
 
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 1cde9517a5..5f428501e3 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -21,11 +21,7 @@ YASM-OBJS-$(CONFIG_VC1_DECODER)        += x86/vc1dsp_yasm.o
 MMX-OBJS-$(CONFIG_AC3DSP)              += x86/ac3dsp_mmx.o
 YASM-OBJS-$(CONFIG_AC3DSP)             += x86/ac3dsp.o
 MMX-OBJS-$(CONFIG_CAVS_DECODER)        += x86/cavsdsp_mmx.o
-MMX-OBJS-$(CONFIG_MP1FLOAT_DECODER)    += x86/mpegaudiodec_mmx.o
-MMX-OBJS-$(CONFIG_MP2FLOAT_DECODER)    += x86/mpegaudiodec_mmx.o
-MMX-OBJS-$(CONFIG_MP3FLOAT_DECODER)    += x86/mpegaudiodec_mmx.o
-MMX-OBJS-$(CONFIG_MP3ON4FLOAT_DECODER) += x86/mpegaudiodec_mmx.o
-MMX-OBJS-$(CONFIG_MP3ADUFLOAT_DECODER) += x86/mpegaudiodec_mmx.o
+MMX-OBJS-$(CONFIG_MPEGAUDIODSP)        += x86/mpegaudiodec_mmx.o
 MMX-OBJS-$(CONFIG_ENCODERS)            += x86/dsputilenc_mmx.o
 YASM-OBJS-$(CONFIG_ENCODERS)           += x86/dsputilenc_yasm.o
 MMX-OBJS-$(CONFIG_GPL)                 += x86/idct_mmx.o

From 7ed0a77ff4d19eb77671ce786ef515e5ffb1496e Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 19 May 2011 17:39:50 +0200
Subject: [PATCH 09/19] Remove silly insults from avformat_version() Doxygen
 documentation.

---
 libavformat/avformat.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index aca246d95a..3d4cc68f89 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -23,8 +23,7 @@
 
 
 /**
- * I return the LIBAVFORMAT_VERSION_INT constant.  You got
- * a fucking problem with that, douchebag?
+ * Return the LIBAVFORMAT_VERSION_INT constant.
  */
 unsigned avformat_version(void);
 

From d9a69f730e180ab9b1cac8a4d53d07b1ec375a38 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 16 May 2011 00:54:36 +0200
Subject: [PATCH 10/19] Simplify CLEANFILES make variable by using wildcards.

Also ensures that generated file cos_fixed_tables.c is deleted on 'make clean'.
---
 libavcodec/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index b26c33de63..4765ceb413 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -670,7 +670,7 @@ HOSTPROGS = costablegen
 
 DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86
 
-CLEANFILES = sin_tables.c cos_tables.c *_tables.h *_tablegen$(HOSTEXESUF)
+CLEANFILES = *_tables.c *_tables.h *_tablegen$(HOSTEXESUF)
 
 include $(SUBDIR)../subdir.mak
 

From 8a0572b05451b9a161989ab1ec2c03447a4c1ce6 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 16 May 2011 00:59:50 +0200
Subject: [PATCH 11/19] Ignore generated tables and generated table generator
 programs.

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 3d5e38b6bd..8887980bc0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,9 @@ ffmpeg
 ffplay
 ffprobe
 ffserver
+libavcodec/*_tablegen
+libavcodec/*_tables.c
+libavcodec/*_tables.h
 libavcodec/libavcodec*
 libavdevice/libavdevice*
 libavfilter/libavfilter*

From c98657a21536ef71a1d8d6a7ff4d69dd19d9e5b7 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 19 May 2011 18:12:17 +0200
Subject: [PATCH 12/19] Remove potentially unstable filenames from comments in
 generated files.

---
 libavcodec/costablegen.c | 2 +-
 libavcodec/tableprint.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/costablegen.c b/libavcodec/costablegen.c
index 65c492696b..6bfb8eabf2 100644
--- a/libavcodec/costablegen.c
+++ b/libavcodec/costablegen.c
@@ -54,7 +54,7 @@ int main(int argc, char *argv[])
     int fixed  = argc > 2 && !strcmp(argv[2], "fixed");
     double (*func)(double) = do_sin ? sin : cos;
 
-    printf("/* This file was generated by libavcodec/costablegen */\n");
+    printf("/* This file was automatically generated. */\n");
     printf("#define CONFIG_FFT_FLOAT %d\n", !fixed);
     printf("#include \"libavcodec/%s\"\n", do_sin ? "rdft.h" : "fft.h");
     for (i = 4; i <= BITS; i++) {
diff --git a/libavcodec/tableprint.c b/libavcodec/tableprint.c
index da77525626..362dc24c50 100644
--- a/libavcodec/tableprint.c
+++ b/libavcodec/tableprint.c
@@ -36,6 +36,6 @@ WRITE_2D_FUNC(uint32_t)
 WRITE_2D_FUNC(float)
 
 void write_fileheader(void) {
-    printf("/* This file was generated by libavcodec/tableprint */\n");
+    printf("/* This file was automatically generated. */\n");
     printf("#include <stdint.h>\n");
 }

From 272874c9dbde5d48884c417b76f3c7f04938c92f Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 18:34:40 +0100
Subject: [PATCH 13/19] Simplify trig table rules

This collapses the make rules for the trig tables into a pattern
rule.  Based on a patch by Diego, modified to avoid using fragile
make constructs and allow future addition of fixed-point sin tables.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/Makefile      | 11 ++++-------
 libavcodec/costablegen.c |  2 +-
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 4765ceb413..7a9d897606 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -676,14 +676,11 @@ include $(SUBDIR)../subdir.mak
 
 $(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o
 
-$(SUBDIR)cos_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF)
-	$(M)./$< > $@
+TRIG_TABLES  = cos cos_fixed sin
+TRIG_TABLES := $(TRIG_TABLES:%=$(SUBDIR)%_tables.c)
 
-$(SUBDIR)cos_fixed_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF)
-	$(M)./$< cos fixed > $@
-
-$(SUBDIR)sin_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF)
-	$(M)./$< sin > $@
+$(TRIG_TABLES): $(SUBDIR)%_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF)
+	$(M)./$< $* > $@
 
 ifdef CONFIG_SMALL
 $(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=1
diff --git a/libavcodec/costablegen.c b/libavcodec/costablegen.c
index 6bfb8eabf2..5e52c482c6 100644
--- a/libavcodec/costablegen.c
+++ b/libavcodec/costablegen.c
@@ -51,7 +51,7 @@ int main(int argc, char *argv[])
 {
     int i, j;
     int do_sin = argc > 1 && !strcmp(argv[1], "sin");
-    int fixed  = argc > 2 && !strcmp(argv[2], "fixed");
+    int fixed  = argc > 1 &&  strstr(argv[1], "fixed");
     double (*func)(double) = do_sin ? sin : cos;
 
     printf("/* This file was automatically generated. */\n");

From 9f2405661d5bcc9416f4b3339f1139997467e1f5 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 19 May 2011 19:22:41 +0200
Subject: [PATCH 14/19] Collapse tableprint.c into tableprint.h.

tableprint.c serves little purpose on its own and removing it allows building
the table generator programs with the normal HOSTPROGS Makefile rules.
---
 libavcodec/Makefile     |  7 +++----
 libavcodec/tableprint.c | 41 -----------------------------------------
 libavcodec/tableprint.h | 24 ++++++++++++++++++++----
 3 files changed, 23 insertions(+), 49 deletions(-)
 delete mode 100644 libavcodec/tableprint.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 7a9d897606..d0abe69202 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -666,7 +666,9 @@ TESTPROGS = cabac dct eval fft fft-fixed h264 iirfilter rangecoder snow
 TESTPROGS-$(HAVE_MMX) += motion
 TESTOBJS = dctref.o
 
-HOSTPROGS = costablegen
+HOSTPROGS = aac_tablegen aacps_tablegen cbrt_tablegen costablegen       \
+            dv_tablegen motionpixels_tablegen mpegaudio_tablegen        \
+            pcm_tablegen qdm2_tablegen sinewin_tablegen
 
 DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86
 
@@ -688,9 +690,6 @@ else
 $(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=0
 endif
 
-$(SUBDIR)%_tablegen$(HOSTEXESUF): $(SUBDIR)%_tablegen.c $(SUBDIR)%_tablegen.h $(SUBDIR)tableprint.c
-	$(HOSTCC) $(HOSTCFLAGS) $(HOSTLDFLAGS) -o $@ $(filter %.c,$^) $(HOSTLIBS)
-
 GEN_HEADERS = cbrt_tables.h aacps_tables.h aac_tables.h dv_tables.h     \
               sinewin_tables.h mpegaudio_tables.h motionpixels_tables.h \
               pcm_tables.h qdm2_tables.h
diff --git a/libavcodec/tableprint.c b/libavcodec/tableprint.c
deleted file mode 100644
index 362dc24c50..0000000000
--- a/libavcodec/tableprint.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Generate a file for hardcoded tables
- *
- * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdio.h>
-#include <inttypes.h>
-#include "tableprint.h"
-
-WRITE_1D_FUNC(int8_t,   "%3"PRIi8, 15)
-WRITE_1D_FUNC(uint8_t,  "0x%02"PRIx8, 15)
-WRITE_1D_FUNC(uint16_t, "0x%08"PRIx16, 7)
-WRITE_1D_FUNC(uint32_t, "0x%08"PRIx32, 7)
-WRITE_1D_FUNC(float,    "%.18e", 3)
-
-WRITE_2D_FUNC(int8_t)
-WRITE_2D_FUNC(uint8_t)
-WRITE_2D_FUNC(uint32_t)
-WRITE_2D_FUNC(float)
-
-void write_fileheader(void) {
-    printf("/* This file was automatically generated. */\n");
-    printf("#include <stdint.h>\n");
-}
diff --git a/libavcodec/tableprint.h b/libavcodec/tableprint.h
index af69fe8580..ddf2635da0 100644
--- a/libavcodec/tableprint.h
+++ b/libavcodec/tableprint.h
@@ -23,8 +23,9 @@
 #ifndef AVCODEC_TABLEPRINT_H
 #define AVCODEC_TABLEPRINT_H
 
-#include <stdint.h>
+#include <inttypes.h>
 #include <stdio.h>
+
 #include "libavutil/common.h"
 
 #define WRITE_1D_FUNC_ARGV(type, linebrk, fmtstr, ...)\
@@ -70,9 +71,6 @@ void write_uint32_t_2d_array(const void *, int, int);
 void write_float_2d_array   (const void *, int, int);
 /** \} */ // end of printfuncs group
 
-/** Write a standard file header */
-void write_fileheader(void);
-
 #define WRITE_ARRAY(prefix, type, name)                 \
     do {                                                \
         const size_t array_size = FF_ARRAY_ELEMS(name); \
@@ -92,4 +90,22 @@ void write_fileheader(void);
         printf("};\n");                                                 \
     } while(0)
 
+
+WRITE_1D_FUNC(int8_t,   "%3"PRIi8, 15)
+WRITE_1D_FUNC(uint8_t,  "0x%02"PRIx8, 15)
+WRITE_1D_FUNC(uint16_t, "0x%08"PRIx16, 7)
+WRITE_1D_FUNC(uint32_t, "0x%08"PRIx32, 7)
+WRITE_1D_FUNC(float,    "%.18e", 3)
+
+WRITE_2D_FUNC(int8_t)
+WRITE_2D_FUNC(uint8_t)
+WRITE_2D_FUNC(uint32_t)
+WRITE_2D_FUNC(float)
+
+static inline void write_fileheader(void)
+{
+    printf("/* This file was automatically generated. */\n");
+    printf("#include <stdint.h>\n");
+}
+
 #endif /* AVCODEC_TABLEPRINT_H */

From 4887f8245c5dcd2a27817a678122eea4d1a0f31a Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 16 May 2011 00:53:07 +0200
Subject: [PATCH 15/19] Rename costablegen.c ---> cos_tablegen.c.

This is consistent with how all other table generation programs are named.
Moreover this ensures that the cos table generation program is correctly
deleted when cleaning the tree.
---
 libavcodec/Makefile                          | 4 ++--
 libavcodec/{costablegen.c => cos_tablegen.c} | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename libavcodec/{costablegen.c => cos_tablegen.c} (100%)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index d0abe69202..6cb59a3c43 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -666,7 +666,7 @@ TESTPROGS = cabac dct eval fft fft-fixed h264 iirfilter rangecoder snow
 TESTPROGS-$(HAVE_MMX) += motion
 TESTOBJS = dctref.o
 
-HOSTPROGS = aac_tablegen aacps_tablegen cbrt_tablegen costablegen       \
+HOSTPROGS = aac_tablegen aacps_tablegen cbrt_tablegen cos_tablegen      \
             dv_tablegen motionpixels_tablegen mpegaudio_tablegen        \
             pcm_tablegen qdm2_tablegen sinewin_tablegen
 
@@ -681,7 +681,7 @@ $(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o
 TRIG_TABLES  = cos cos_fixed sin
 TRIG_TABLES := $(TRIG_TABLES:%=$(SUBDIR)%_tables.c)
 
-$(TRIG_TABLES): $(SUBDIR)%_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF)
+$(TRIG_TABLES): $(SUBDIR)%_tables.c: $(SUBDIR)cos_tablegen$(HOSTEXESUF)
 	$(M)./$< $* > $@
 
 ifdef CONFIG_SMALL
diff --git a/libavcodec/costablegen.c b/libavcodec/cos_tablegen.c
similarity index 100%
rename from libavcodec/costablegen.c
rename to libavcodec/cos_tablegen.c

From 89a20987355757be64c49fb714721c38902ac1cc Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 19 May 2011 20:46:24 +0100
Subject: [PATCH 16/19] Fix ff_mpa_synth_filter_fixed() prototype

The prototype should use the same typedefs as the definition, or it
will fail where int32_t is not int (DOS apparently).

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mpegaudiodsp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/mpegaudiodsp.h b/libavcodec/mpegaudiodsp.h
index 7b05b68eee..597e2533f5 100644
--- a/libavcodec/mpegaudiodsp.h
+++ b/libavcodec/mpegaudiodsp.h
@@ -39,7 +39,7 @@ void ff_mpa_synth_filter_fixed(MPADSPContext *s,
                                int32_t *synth_buf_ptr, int *synth_buf_offset,
                                int32_t *window, int *dither_state,
                                int16_t *samples, int incr,
-                               int *sb_samples);
+                               int32_t *sb_samples);
 
 void ff_mpa_synth_filter_float(MPADSPContext *s,
                                float *synth_buf_ptr, int *synth_buf_offset,

From 0ffc84150599d15b66a3960202e07755f99fe0d0 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 18 May 2011 08:10:49 -0400
Subject: [PATCH 17/19] h264: fix loopfilter with threading at slice
 boundaries.

---
 libavcodec/h264.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 6b262bc992..5338146499 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2557,7 +2557,7 @@ static int fill_filter_caches(H264Context *h, int mb_type){
     return 0;
 }
 
-static void loop_filter(H264Context *h){
+static void loop_filter(H264Context *h, int start_x, int end_x){
     MpegEncContext * const s = &h->s;
     uint8_t  *dest_y, *dest_cb, *dest_cr;
     int linesize, uvlinesize, mb_x, mb_y;
@@ -2566,7 +2566,7 @@ static void loop_filter(H264Context *h){
     const int pixel_shift = h->pixel_shift;
 
     if(h->deblocking_filter) {
-        for(mb_x= 0; mb_x<s->mb_width; mb_x++){
+        for(mb_x= start_x; mb_x<end_x; mb_x++){
             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
                 int mb_xy, mb_type;
                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
@@ -2632,6 +2632,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
     H264Context *h = *(void**)arg;
     MpegEncContext * const s = &h->s;
     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
+    int lf_x_start = s->mb_x;
 
     s->mb_skip_run= -1;
 
@@ -2670,6 +2671,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
 
             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+                if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);
                 return 0;
             }
             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
@@ -2679,8 +2681,8 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
             }
 
             if( ++s->mb_x >= s->mb_width ) {
-                s->mb_x = 0;
-                loop_filter(h);
+                loop_filter(h, lf_x_start, s->mb_x);
+                s->mb_x = lf_x_start = 0;
                 ff_draw_horiz_band(s, 16*s->mb_y, 16);
                 ++s->mb_y;
                 if(FIELD_OR_MBAFF_PICTURE) {
@@ -2693,6 +2695,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
             if( eos || s->mb_y >= s->mb_height ) {
                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+                if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
                 return 0;
             }
         }
@@ -2714,13 +2717,12 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
             if(ret<0){
                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
-
                 return -1;
             }
 
             if(++s->mb_x >= s->mb_width){
-                s->mb_x=0;
-                loop_filter(h);
+                loop_filter(h, lf_x_start, s->mb_x);
+                s->mb_x = lf_x_start = 0;
                 ff_draw_horiz_band(s, 16*s->mb_y, 16);
                 ++s->mb_y;
                 if(FIELD_OR_MBAFF_PICTURE) {
@@ -2747,6 +2749,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+                    if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
 
                     return 0;
                 }else{

From 4e987f8282ff7658a6f804b9db39954bb59fa72e Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 19 May 2011 16:47:59 -0400
Subject: [PATCH 18/19] h264: correct border check.

When backing up the top-left border, check that the top-left
(rather than left) MB indeed does belong to our slice. If it
doesn't, backing up has no positive effect but may accidentally
interfere with other threads writing in the same space.

Fixes occasional one-off effects when enabling slice-MT.
---
 libavcodec/h264.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 5338146499..5fb303c82f 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1034,7 +1034,7 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
                                   int linesize, int uvlinesize,
                                   int xchg, int simple, int pixel_shift){
     MpegEncContext * const s = &h->s;
-    int deblock_left;
+    int deblock_topleft;
     int deblock_top;
     int top_idx = 1;
     uint8_t *top_border_m1;
@@ -1050,11 +1050,11 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
     }
 
     if(h->deblocking_filter == 2) {
-        deblock_left = h->left_type[0];
-        deblock_top  = h->top_type;
+        deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
+        deblock_top     = h->top_type;
     } else {
-        deblock_left = (s->mb_x > 0);
-        deblock_top =  (s->mb_y > !!MB_FIELD);
+        deblock_topleft = (s->mb_x > 0);
+        deblock_top     = (s->mb_y > !!MB_FIELD);
     }
 
     src_y  -=   linesize + 1 + pixel_shift;
@@ -1077,7 +1077,7 @@ if (xchg) AV_SWAP64(b,a);\
 else      AV_COPY64(b,a);
 
     if(deblock_top){
-        if(deblock_left){
+        if(deblock_topleft){
             XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
         }
         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
@@ -1088,7 +1088,7 @@ else      AV_COPY64(b,a);
     }
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
         if(deblock_top){
-            if(deblock_left){
+            if(deblock_topleft){
                 XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
                 XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
             }
@@ -2611,7 +2611,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
         }
     }
     h->slice_type= old_slice_type;
-    s->mb_x= 0;
+    s->mb_x= end_x;
     s->mb_y= end_mb_y - FRAME_MBAFF;
     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);

From 984ece7503597d30e6f3bdeb67e337ea1616f880 Mon Sep 17 00:00:00 2001
From: Vitor Sessak <vitor1001@gmail.com>
Date: Thu, 19 May 2011 21:33:27 +0200
Subject: [PATCH 19/19] qdm2: Use floating point synthesis filter.

This avoid needlessly convertion from floating point to fixed point and back.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/qdm2.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index f74cfd9258..53ee304a28 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -172,9 +172,9 @@ typedef struct {
 
     /// Synthesis filter
     MPADSPContext mpadsp;
-    DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
+    DECLARE_ALIGNED(32, float, synth_buf)[MPA_MAX_CHANNELS][512*2];
     int synth_buf_offset[MPA_MAX_CHANNELS];
-    DECLARE_ALIGNED(16, int32_t, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT];
+    DECLARE_ALIGNED(32, float, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT];
 
     /// Mixed temporary data used in decoding
     float tone_level[MPA_MAX_CHANNELS][30][64];
@@ -331,11 +331,6 @@ static av_cold void qdm2_init_vlc(void)
     }
 }
 
-
-/* for floating point to fixed point conversion */
-static const float f2i_scale = (float) (1 << (FRAC_BITS - 15));
-
-
 static int qdm2_get_vlc (GetBitContext *gb, VLC *vlc, int flag, int depth)
 {
     int value;
@@ -484,8 +479,8 @@ static void build_sb_samples_from_noise (QDM2Context *q, int sb)
 
     for (ch = 0; ch < q->nb_channels; ch++)
         for (j = 0; j < 64; j++) {
-            q->sb_samples[ch][j * 2][sb] = (int32_t)(f2i_scale * SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j] + .5);
-            q->sb_samples[ch][j * 2 + 1][sb] = (int32_t)(f2i_scale * SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j] + .5);
+            q->sb_samples[ch][j * 2][sb] = SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j];
+            q->sb_samples[ch][j * 2 + 1][sb] = SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j];
         }
 }
 
@@ -925,11 +920,11 @@ static void synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int l
                     for (chs = 0; chs < q->nb_channels; chs++)
                         for (k = 0; k < run; k++)
                             if ((j + k) < 128)
-                                q->sb_samples[chs][j + k][sb] = (int32_t)(f2i_scale * q->tone_level[chs][sb][((j + k)/2)] * tmp[k][chs] + .5);
+                                q->sb_samples[chs][j + k][sb] = q->tone_level[chs][sb][((j + k)/2)] * tmp[k][chs];
                 } else {
                     for (k = 0; k < run; k++)
                         if ((j + k) < 128)
-                            q->sb_samples[ch][j + k][sb] = (int32_t)(f2i_scale * q->tone_level[ch][sb][(j + k)/2] * samples[k] + .5);
+                            q->sb_samples[ch][j + k][sb] = q->tone_level[ch][sb][(j + k)/2] * samples[k];
                 }
 
                 j += run;
@@ -1603,7 +1598,7 @@ static void qdm2_calculate_fft (QDM2Context *q, int channel, int sub_packet)
  */
 static void qdm2_synthesis_filter (QDM2Context *q, int index)
 {
-    OUT_INT samples[MPA_MAX_CHANNELS * MPA_FRAME_SIZE];
+    float samples[MPA_MAX_CHANNELS * MPA_FRAME_SIZE];
     int i, k, ch, sb_used, sub_sampling, dither_state = 0;
 
     /* copy sb_samples */
@@ -1615,12 +1610,12 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index)
                 q->sb_samples[ch][(8 * index) + i][k] = 0;
 
     for (ch = 0; ch < q->nb_channels; ch++) {
-        OUT_INT *samples_ptr = samples + ch;
+        float *samples_ptr = samples + ch;
 
         for (i = 0; i < 8; i++) {
-            ff_mpa_synth_filter_fixed(&q->mpadsp,
+            ff_mpa_synth_filter_float(&q->mpadsp,
                 q->synth_buf[ch], &(q->synth_buf_offset[ch]),
-                ff_mpa_synth_window_fixed, &dither_state,
+                ff_mpa_synth_window_float, &dither_state,
                 samples_ptr, q->nb_channels,
                 q->sb_samples[ch][(8 * index) + i]);
             samples_ptr += 32 * q->nb_channels;
@@ -1632,7 +1627,7 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index)
 
     for (ch = 0; ch < q->channels; ch++)
         for (i = 0; i < q->frame_size; i++)
-            q->output_buffer[q->channels * i + ch] += (float)(samples[q->nb_channels * sub_sampling * i + ch] >> (sizeof(OUT_INT)*8-16));
+            q->output_buffer[q->channels * i + ch] += (1 << 23) * samples[q->nb_channels * sub_sampling * i + ch];
 }
 
 
@@ -1649,7 +1644,7 @@ static av_cold void qdm2_init(QDM2Context *q) {
     initialized = 1;
 
     qdm2_init_vlc();
-    ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed);
+    ff_mpa_synth_init_float(ff_mpa_synth_window_float);
     softclip_table_init();
     rnd_table_init();
     init_noise_samples();