From 807aa2d8f398bf2b005fa1ecdf98b48be7cf54f9 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 30 Jun 2011 21:57:43 +0200
Subject: [PATCH 01/12] doxygen: Help doxygen parser to understand the
 DECLARE_ALIGNED and offsetof macros

Without this, members that have been declare with the DECLARE_ALIGNED
macro don't show up at all in the generated documentation.
---
 Doxyfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Doxyfile b/Doxyfile
index 6f32da6540..bc9e7a1253 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -1160,6 +1160,7 @@ INCLUDE_FILE_PATTERNS  =
 
 PREDEFINED             = __attribute__(x)="" "RENAME(x)=x ## _TMPL" "DEF(x)=x ## _TMPL" \
                          HAVE_AV_CONFIG_H HAVE_MMX HAVE_MMX2 HAVE_AMD3DNOW \
+                         "DECLARE_ALIGNED(a,t,n)=t n" "offsetof(x,y)=0x42" \
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
 # this tag can be used to specify a list of macro names that should be expanded.

From 134557f3a47697a7b5e5da2bd7e5a4b8f8d56b1c Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Thu, 30 Jun 2011 23:00:46 +0200
Subject: [PATCH 02/12] doxygen: fix usage of @file directive in
 libavutil/{dict,file}.h

---
 libavutil/dict.h | 3 ++-
 libavutil/file.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/libavutil/dict.h b/libavutil/dict.h
index ff24b36f81..397ce3852f 100644
--- a/libavutil/dict.h
+++ b/libavutil/dict.h
@@ -18,7 +18,8 @@
  */
 
 /**
- * @file Public dictionary API.
+ * @file
+ * Public dictionary API.
  */
 
 #ifndef AVUTIL_DICT_H
diff --git a/libavutil/file.h b/libavutil/file.h
index 8b65bfb01d..c481c37f93 100644
--- a/libavutil/file.h
+++ b/libavutil/file.h
@@ -22,7 +22,8 @@
 #include "avutil.h"
 
 /**
- * @file misc file utilities
+ * @file
+ * Misc file utilities.
  */
 
 /**

From cdc2c1c57616956d975c57b4b69eb73865f513f5 Mon Sep 17 00:00:00 2001
From: John Stebbins <stebbins@jetheaddev.com>
Date: Fri, 1 Jul 2011 08:57:42 -0700
Subject: [PATCH 03/12] matroskadec: matroska_read_seek after after EBML_STOP
 leads to failure.

EBML_STOP leaves matroska->current_id set. Then matroska_read_seek changes
the stream position without resetting current_id.  The next
matroska_parse_cluster  fails due to calculation of incorrect pos.  So clear
current_id when avio_seek happens in matroska_read_seek.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavformat/matroskadec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index c5f8e673dc..a1e827f093 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -1904,6 +1904,7 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
 
     if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
         avio_seek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET);
+        matroska->current_id = 0;
         while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
             matroska_clear_queue(matroska);
             if (matroska_parse_cluster(matroska) < 0)
@@ -1932,6 +1933,7 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
     }
 
     avio_seek(s->pb, st->index_entries[index_min].pos, SEEK_SET);
+    matroska->current_id = 0;
     matroska->skip_to_keyframe = !(flags & AVSEEK_FLAG_ANY);
     matroska->skip_to_timecode = st->index_entries[index].timestamp;
     matroska->done = 0;

From 8a8d0ce208b77f506759185ff580fa61b5c41f70 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 30 Jun 2011 17:35:13 -0700
Subject: [PATCH 04/12] swscale: for >8bit scaling, read in native bit-depth.

For 9/10bit, it means we don't have to upscale to 16bit before
actual scaling or pixel format conversion, and thus a performance
gain.
---
 libswscale/ppc/swscale_altivec.c  |  2 +-
 libswscale/swscale.c              | 99 ++++++++-----------------------
 libswscale/swscale_internal.h     |  2 +-
 libswscale/utils.c                |  1 -
 libswscale/x86/swscale_template.c |  2 +-
 tests/ref/lavfi/pixfmts_scale     |  8 +--
 6 files changed, 31 insertions(+), 83 deletions(-)

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 6fb3de0ee7..369e93b85a 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -220,7 +220,7 @@ yuv2yuvX_altivec_real(SwsContext *c,
     }
 }
 
-static void hScale_altivec_real(int16_t *dst, int dstW,
+static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
                                 const uint8_t *src, const int16_t *filter,
                                 const int16_t *filterPos, int filterSize)
 {
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index ef8fc39c48..dacf40ed8a 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1766,59 +1766,6 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
 
 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
 
-// FIXME Maybe dither instead.
-static av_always_inline void
-yuv9_OR_10ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
-                          const uint16_t *srcU, const uint16_t *srcV,
-                          int width, enum PixelFormat origin, int depth)
-{
-    int i;
-
-    for (i = 0; i < width; i++) {
-        int upx = input_pixel(&srcU[i]);
-        int vpx = input_pixel(&srcV[i]);
-        dstU[i] =  (upx << (16 - depth)) | (upx >> (2 * depth - 16));
-        dstV[i] =  (vpx << (16 - depth)) | (vpx >> (2 * depth - 16));
-    }
-}
-
-static av_always_inline void
-yuv9_or_10ToY_c_template(uint16_t *dstY, const uint16_t *srcY,
-                         int width, enum PixelFormat origin, int depth)
-{
-    int i;
-
-    for (i = 0; i < width; i++) {
-        int px = input_pixel(&srcY[i]);
-        dstY[i] =  (px << (16 - depth)) | (px >> (2 * depth - 16));
-    }
-}
-
-#undef input_pixel
-
-#define YUV_NBPS(depth, BE_LE, origin) \
-static void BE_LE ## depth ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
-                                     const uint8_t *_srcU, const uint8_t *_srcV, \
-                                     int width, uint32_t *unused) \
-{ \
-    uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
-    const uint16_t *srcU = (const uint16_t *) _srcU, \
-                   *srcV = (const uint16_t *) _srcV; \
-    yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
-} \
-static void BE_LE ## depth ## ToY_c(uint8_t *_dstY, const uint8_t *_srcY, \
-                                    int width, uint32_t *unused) \
-{ \
-    uint16_t *dstY = (uint16_t *) _dstY; \
-    const uint16_t *srcY = (const uint16_t *) _srcY; \
-    yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
-}
-
-YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
-YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
-YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
-YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
-
 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
                        int width, uint32_t *unused)
 {
@@ -1905,13 +1852,15 @@ static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
     }
 }
 
-static void hScale16_c(int16_t *_dst, int dstW, const uint8_t *_src,
+static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
                        const int16_t *filter,
                        const int16_t *filterPos, int filterSize)
 {
     int i;
     int32_t *dst = (int32_t *) _dst;
     const uint16_t *src = (const uint16_t *) _src;
+    int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+    int sh = (bits <= 7) ? 11 : (bits - 4);
 
     for (i = 0; i < dstW; i++) {
         int j;
@@ -1922,12 +1871,12 @@ static void hScale16_c(int16_t *_dst, int dstW, const uint8_t *_src,
             val += src[srcPos + j] * filter[filterSize * i + j];
         }
         // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
-        dst[i] = FFMIN(val >> 11, (1 << 19) - 1);
+        dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
     }
 }
 
 // bilinear / bicubic scaling
-static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
+static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
                      const int16_t *filter, const int16_t *filterPos,
                      int filterSize)
 {
@@ -2063,7 +2012,7 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
     }
 
     if (!c->hyscale_fast) {
-        c->hScale(dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
+        c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
     } else { // fast bilinear upscale / crap downscale
         c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
     }
@@ -2113,8 +2062,8 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
     }
 
     if (!c->hcscale_fast) {
-        c->hScale(dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
-        c->hScale(dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
+        c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
+        c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
     } else { // fast bilinear upscale / crap downscale
         c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
     }
@@ -2645,21 +2594,21 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
         case PIX_FMT_PAL8     :
         case PIX_FMT_BGR4_BYTE:
         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
+#if HAVE_BIGENDIAN
         case PIX_FMT_YUV444P9LE:
-        case PIX_FMT_YUV420P9LE:  c->chrToYV12 = LE9ToUV_c; break;
+        case PIX_FMT_YUV420P9LE:
         case PIX_FMT_YUV422P10LE:
         case PIX_FMT_YUV444P10LE:
-        case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
-        case PIX_FMT_YUV444P9BE:
-        case PIX_FMT_YUV420P9BE:  c->chrToYV12 = BE9ToUV_c; break;
-        case PIX_FMT_YUV444P10BE:
-        case PIX_FMT_YUV422P10BE:
-        case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
-#if HAVE_BIGENDIAN
+        case PIX_FMT_YUV420P10LE:
         case PIX_FMT_YUV420P16LE:
         case PIX_FMT_YUV422P16LE:
         case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
 #else
+        case PIX_FMT_YUV444P9BE:
+        case PIX_FMT_YUV420P9BE:
+        case PIX_FMT_YUV444P10BE:
+        case PIX_FMT_YUV422P10BE:
+        case PIX_FMT_YUV420P10BE:
         case PIX_FMT_YUV420P16BE:
         case PIX_FMT_YUV422P16BE:
         case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
@@ -2712,22 +2661,22 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
     c->lumToYV12 = NULL;
     c->alpToYV12 = NULL;
     switch (srcFormat) {
+#if HAVE_BIGENDIAN
     case PIX_FMT_YUV444P9LE:
-    case PIX_FMT_YUV420P9LE:  c->lumToYV12 = LE9ToY_c; break;
+    case PIX_FMT_YUV420P9LE:
     case PIX_FMT_YUV444P10LE:
     case PIX_FMT_YUV422P10LE:
-    case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
-    case PIX_FMT_YUV444P9BE:
-    case PIX_FMT_YUV420P9BE:  c->lumToYV12 = BE9ToY_c; break;
-    case PIX_FMT_YUV444P10BE:
-    case PIX_FMT_YUV422P10BE:
-    case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
-#if HAVE_BIGENDIAN
+    case PIX_FMT_YUV420P10LE:
     case PIX_FMT_YUV420P16LE:
     case PIX_FMT_YUV422P16LE:
     case PIX_FMT_YUV444P16LE:
     case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
 #else
+    case PIX_FMT_YUV444P9BE:
+    case PIX_FMT_YUV420P9BE:
+    case PIX_FMT_YUV444P10BE:
+    case PIX_FMT_YUV422P10BE:
+    case PIX_FMT_YUV420P10BE:
     case PIX_FMT_YUV420P16BE:
     case PIX_FMT_YUV422P16BE:
     case PIX_FMT_YUV444P16BE:
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index cfb19d9c13..e4b93c595b 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -440,7 +440,7 @@ typedef struct SwsContext {
      *                   (and input coefficients thus padded with zeroes)
      *                   to simplify creating SIMD code.
      */
-    void (*hScale)(int16_t *dst, int dstW, const uint8_t *src,
+    void (*hScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
                    const int16_t *filter, const int16_t *filterPos,
                    int filterSize);
 
diff --git a/libswscale/utils.c b/libswscale/utils.c
index b7ccac94ba..eea32a130a 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -877,7 +877,6 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         }
     }
 
-    // FIXME it's even nicer if bpp isn't 16, but max({src,dst}formatbpp)
     c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1,
                           av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 8 ? 16 : 8;
     if (c->scalingBpp == 16)
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 54c7edc25e..f58ac520e1 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1859,7 +1859,7 @@ static void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV,
 
 #if !COMPILE_TEMPLATE_MMX2
 // bilinear / bicubic scaling
-static void RENAME(hScale)(int16_t *dst, int dstW,
+static void RENAME(hScale)(SwsContext *c, int16_t *dst, int dstW,
                            const uint8_t *src, const int16_t *filter,
                            const int16_t *filterPos, int filterSize)
 {
diff --git a/tests/ref/lavfi/pixfmts_scale b/tests/ref/lavfi/pixfmts_scale
index de41952a21..8901fe9132 100644
--- a/tests/ref/lavfi/pixfmts_scale
+++ b/tests/ref/lavfi/pixfmts_scale
@@ -31,12 +31,12 @@ uyvy422             314bd486277111a95d9369b944fa0400
 yuv410p             7df8f6d69b56a8dcb6c7ee908e5018b5
 yuv411p             1143e7c5cc28fe0922b051b17733bc4c
 yuv420p             fdad2d8df8985e3d17e73c71f713cb14
-yuv420p10be         8b5ad855229840a8fa87786cab83f856
-yuv420p10le         5264e87921d47b17411578d9c92672b1
+yuv420p10be         2343beaf83fccc2ab23a590b2049d38b
+yuv420p10le         94d511d783d175f573e7be5cce75ba4d
 yuv420p16be         f6ef3ba90f238b467c7e72ade927083d
 yuv420p16le         faf6aab3b1c16e8afbe160686dd360e0
-yuv420p9be          c14bf746d161face61e1f39b491bf7ef
-yuv420p9le          59457f9a51768bf1d4342238d50a9be3
+yuv420p9be          fdafb9ad473a559246c4cb0a1f416cd8
+yuv420p9le          fccfd3c3941da635b13739f579819b5a
 yuv422p             918e37701ee7377d16a8a6c119c56a40
 yuv422p16be         837945d3a771366a5a72a4ed095a4f53
 yuv422p16le         b8292ae9b52eb7afc3d8b93e8fd895b4

From 6054cd25b4d7dce97c4fa3cc6e4757ba1e59ab86 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Thu, 2 Jun 2011 14:00:50 -0400
Subject: [PATCH 05/12] ac3enc: add int32_t array clipping function to DSPUtil,
 including x86 versions.

---
 libavcodec/dsputil.c            |  17 +++++
 libavcodec/dsputil.h            |  16 +++++
 libavcodec/x86/dsputil_mmx.c    |  23 +++++++
 libavcodec/x86/dsputil_yasm.asm | 115 ++++++++++++++++++++++++++++++++
 4 files changed, 171 insertions(+)

diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 4389289d82..4f17b435d1 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2676,6 +2676,22 @@ static void apply_window_int16_c(int16_t *output, const int16_t *input,
     }
 }
 
+static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
+                                int32_t max, unsigned int len)
+{
+    do {
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        len -= 8;
+    } while (len > 0);
+}
+
 #define W0 2048
 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
@@ -3122,6 +3138,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->scalarproduct_int16 = scalarproduct_int16_c;
     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
     c->apply_window_int16 = apply_window_int16_c;
+    c->vector_clip_int32 = vector_clip_int32_c;
     c->scalarproduct_float = scalarproduct_float_c;
     c->butterflies_float = butterflies_float_c;
     c->vector_fmul_scalar = vector_fmul_scalar_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index ea135ca1ba..ef2956eecb 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -555,6 +555,22 @@ typedef struct DSPContext {
     void (*apply_window_int16)(int16_t *output, const int16_t *input,
                                const int16_t *window, unsigned int len);
 
+    /**
+     * Clip each element in an array of int32_t to a given minimum and maximum value.
+     * @param dst  destination array
+     *             constraints: 16-byte aligned
+     * @param src  source array
+     *             constraints: 16-byte aligned
+     * @param min  minimum value
+     *             constraints: must in the the range [-(1<<24), 1<<24]
+     * @param max  maximum value
+     *             constraints: must in the the range [-(1<<24), 1<<24]
+     * @param len  number of elements in the array
+     *             constraints: multiple of 32 greater than zero
+     */
+    void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
+                              int32_t max, unsigned int len);
+
     /* rv30 functions */
     qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
     qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 4d4bbc5b6f..03c094533f 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2429,6 +2429,15 @@ int  ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, i
 
 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
 
+void ff_vector_clip_int32_mmx     (int32_t *dst, const int32_t *src, int32_t min,
+                                   int32_t max, unsigned int len);
+void ff_vector_clip_int32_sse2    (int32_t *dst, const int32_t *src, int32_t min,
+                                   int32_t max, unsigned int len);
+void ff_vector_clip_int32_sse2_int(int32_t *dst, const int32_t *src, int32_t min,
+                                   int32_t max, unsigned int len);
+void ff_vector_clip_int32_sse41   (int32_t *dst, const int32_t *src, int32_t min,
+                                   int32_t max, unsigned int len);
+
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 {
     int mm_flags = av_get_cpu_flags();
@@ -2570,6 +2579,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 
         c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
         c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
+
+        c->vector_clip_int32 = ff_vector_clip_int32_mmx;
 #endif
 
         if (mm_flags & AV_CPU_FLAG_MMX2) {
@@ -2855,6 +2866,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 #if HAVE_YASM
             c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
             c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
+            if (mm_flags & AV_CPU_FLAG_ATOM) {
+                c->vector_clip_int32 = ff_vector_clip_int32_sse2_int;
+            } else {
+                c->vector_clip_int32 = ff_vector_clip_int32_sse2;
+            }
             if (avctx->flags & CODEC_FLAG_BITEXACT) {
                 c->apply_window_int16 = ff_apply_window_int16_sse2_ba;
             } else {
@@ -2880,6 +2896,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             }
 #endif
         }
+
+        if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
+#if HAVE_YASM
+            c->vector_clip_int32 = ff_vector_clip_int32_sse41;
+#endif
+        }
+
 #if HAVE_AVX && HAVE_YASM
         if (mm_flags & AV_CPU_FLAG_AVX) {
             if (bit_depth == 10) {
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 8b19cc1441..4e1ec24a7a 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -1048,3 +1048,118 @@ emu_edge sse
 %ifdef ARCH_X86_32
 emu_edge mmx
 %endif
+
+;-----------------------------------------------------------------------------
+; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
+;                           int32_t max, unsigned int len)
+;-----------------------------------------------------------------------------
+
+%macro PMINSD_MMX 3 ; dst, src, tmp
+    mova      %3, %2
+    pcmpgtd   %3, %1
+    pxor      %1, %2
+    pand      %1, %3
+    pxor      %1, %2
+%endmacro
+
+%macro PMAXSD_MMX 3 ; dst, src, tmp
+    mova      %3, %1
+    pcmpgtd   %3, %2
+    pand      %1, %3
+    pandn     %3, %2
+    por       %1, %3
+%endmacro
+
+%macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp
+    PMINSD_MMX %1, %3, %4
+    PMAXSD_MMX %1, %2, %4
+%endmacro
+
+%macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused
+    cvtdq2ps  %1, %1
+    minps     %1, %3
+    maxps     %1, %2
+    cvtps2dq  %1, %1
+%endmacro
+
+%macro CLIPD_SSE41 3-4 ;  src/dst, min, max, unused
+    pminsd  %1, %3
+    pmaxsd  %1, %2
+%endmacro
+
+%macro SPLATD_MMX 1
+    punpckldq  %1, %1
+%endmacro
+
+%macro SPLATD_SSE2 1
+    pshufd  %1, %1, 0
+%endmacro
+
+%macro VECTOR_CLIP_INT32 4
+cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
+%ifidn %1, sse2
+    cvtsi2ss  m4, minm
+    cvtsi2ss  m5, maxm
+%else
+    movd      m4, minm
+    movd      m5, maxm
+%endif
+    SPLATD    m4
+    SPLATD    m5
+.loop:
+%assign %%i 1
+%rep %3
+    mova      m0,  [srcq+mmsize*0*%%i]
+    mova      m1,  [srcq+mmsize*1*%%i]
+    mova      m2,  [srcq+mmsize*2*%%i]
+    mova      m3,  [srcq+mmsize*3*%%i]
+%if %4
+    mova      m7,  [srcq+mmsize*4*%%i]
+    mova      m8,  [srcq+mmsize*5*%%i]
+    mova      m9,  [srcq+mmsize*6*%%i]
+    mova      m10, [srcq+mmsize*7*%%i]
+%endif
+    CLIPD  m0,  m4, m5, m6
+    CLIPD  m1,  m4, m5, m6
+    CLIPD  m2,  m4, m5, m6
+    CLIPD  m3,  m4, m5, m6
+%if %4
+    CLIPD  m7,  m4, m5, m6
+    CLIPD  m8,  m4, m5, m6
+    CLIPD  m9,  m4, m5, m6
+    CLIPD  m10, m4, m5, m6
+%endif
+    mova  [dstq+mmsize*0*%%i], m0
+    mova  [dstq+mmsize*1*%%i], m1
+    mova  [dstq+mmsize*2*%%i], m2
+    mova  [dstq+mmsize*3*%%i], m3
+%if %4
+    mova  [dstq+mmsize*4*%%i], m7
+    mova  [dstq+mmsize*5*%%i], m8
+    mova  [dstq+mmsize*6*%%i], m9
+    mova  [dstq+mmsize*7*%%i], m10
+%endif
+%assign %%i %%i+1
+%endrep
+    add     srcq, mmsize*4*(%3+%4)
+    add     dstq, mmsize*4*(%3+%4)
+    sub     lend, mmsize*(%3+%4)
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX
+%define SPLATD SPLATD_MMX
+%define CLIPD CLIPD_MMX
+VECTOR_CLIP_INT32 mmx, 0, 1, 0
+INIT_XMM
+%define SPLATD SPLATD_SSE2
+VECTOR_CLIP_INT32 sse2_int, 6, 1, 0
+%define CLIPD CLIPD_SSE2
+VECTOR_CLIP_INT32 sse2, 6, 2, 0
+%define CLIPD CLIPD_SSE41
+%ifdef m8
+VECTOR_CLIP_INT32 sse41, 11, 1, 1
+%else
+VECTOR_CLIP_INT32 sse41, 6, 1, 0
+%endif

From 523b7eba19590652b7ba19c5bdd85dd257bfe4f7 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Mon, 27 Jun 2011 14:29:33 -0400
Subject: [PATCH 06/12] ac3enc: clip coefficients after MDCT.

This ensures that any processing between the MDCT and exponent extraction will
be using clipped coefficients.
---
 libavcodec/ac3enc.h          |  4 ++++
 libavcodec/ac3enc_fixed.c    |  9 +++++++++
 libavcodec/ac3enc_float.c    |  9 +++++++++
 libavcodec/ac3enc_template.c | 16 +++++++++++++---
 4 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index be62656650..54f427a523 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -50,12 +50,16 @@
 #if CONFIG_AC3ENC_FLOAT
 #define AC3_NAME(x) ff_ac3_float_ ## x
 #define MAC_COEF(d,a,b) ((d)+=(a)*(b))
+#define COEF_MIN (-16777215.0/16777216.0)
+#define COEF_MAX ( 16777215.0/16777216.0)
 typedef float SampleType;
 typedef float CoefType;
 typedef float CoefSumType;
 #else
 #define AC3_NAME(x) ff_ac3_fixed_ ## x
 #define MAC_COEF(d,a,b) MAC64(d,a,b)
+#define COEF_MIN -16777215
+#define COEF_MAX  16777215
 typedef int16_t SampleType;
 typedef int32_t CoefType;
 typedef int64_t CoefSumType;
diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c
index d55720eb80..ea3a46cdfa 100644
--- a/libavcodec/ac3enc_fixed.c
+++ b/libavcodec/ac3enc_fixed.c
@@ -104,6 +104,15 @@ static void scale_coefficients(AC3EncodeContext *s)
 }
 
 
+/**
+ * Clip MDCT coefficients to allowable range.
+ */
+static void clip_coefficients(DSPContext *dsp, int32_t *coef, unsigned int len)
+{
+    dsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len);
+}
+
+
 static av_cold int ac3_fixed_encode_init(AVCodecContext *avctx)
 {
     AC3EncodeContext *s = avctx->priv_data;
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index 12d6b19241..718cc1f2b2 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -111,6 +111,15 @@ static void scale_coefficients(AC3EncodeContext *s)
 }
 
 
+/**
+ * Clip MDCT coefficients to allowable range.
+ */
+static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len)
+{
+    dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len);
+}
+
+
 #if CONFIG_AC3_ENCODER
 AVCodec ff_ac3_encoder = {
     "ac3",
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index 85eea54a4a..c7243c7644 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -41,6 +41,8 @@ static void apply_window(DSPContext *dsp, SampleType *output,
 
 static int normalize_samples(AC3EncodeContext *s);
 
+static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len);
+
 
 int AC3_NAME(allocate_sample_buffers)(AC3EncodeContext *s)
 {
@@ -171,8 +173,8 @@ static void apply_channel_coupling(AC3EncodeContext *s)
                 cpl_coef[i] += ch_coef[i];
         }
 
-        /* coefficients must be clipped to +/- 1.0 in order to be encoded */
-        s->dsp.vector_clipf(cpl_coef, cpl_coef, -1.0f, 1.0f, num_cpl_coefs);
+        /* coefficients must be clipped in order to be encoded */
+        clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs);
 
         /* scale coupling coefficients from float to 24-bit fixed-point */
         s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][cpl_start],
@@ -300,6 +302,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
         if (!block->cpl_in_use || !block->new_cpl_coords)
             continue;
 
+        clip_coefficients(&s->dsp, cpl_coords[blk][1], s->fbw_channels * 16);
         s->ac3dsp.float_to_fixed24(fixed_cpl_coords[blk][1],
                                    cpl_coords[blk][1],
                                    s->fbw_channels * 16);
@@ -433,7 +436,11 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame,
 
     apply_mdct(s);
 
-    scale_coefficients(s);
+    if (s->fixed_point)
+        scale_coefficients(s);
+
+    clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1],
+                      AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels);
 
     s->cpl_on = s->cpl_enabled;
     ff_ac3_compute_coupling_strategy(s);
@@ -443,6 +450,9 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame,
 
     compute_rematrixing_strategy(s);
 
+    if (!s->fixed_point)
+        scale_coefficients(s);
+
     ff_ac3_apply_rematrixing(s);
 
     ff_ac3_process_exponents(s);

From 8b7b2d6aaee8cef0051beb3a4cf3e1c5a87cf40f Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Thu, 16 Jun 2011 16:55:33 -0400
Subject: [PATCH 07/12] ac3dsp: simplify extract_exponents() now that it does
 not need to do clipping.

---
 libavcodec/ac3dsp.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 8ce5f8d2c5..98c73573cb 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -164,21 +164,8 @@ static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs)
     int i;
 
     for (i = 0; i < nb_coefs; i++) {
-        int e;
         int v = abs(coef[i]);
-        if (v == 0)
-            e = 24;
-        else {
-            e = 23 - av_log2(v);
-            if (e >= 24) {
-                e = 24;
-                coef[i] = 0;
-            } else if (e < 0) {
-                e = 0;
-                coef[i] = av_clip(coef[i], -16777215, 16777215);
-            }
-        }
-        exp[i] = e;
+        exp[i] = v ? 23 - av_log2(v) : 24;
     }
 }
 

From f99a5ef92e5aba87a2d861822274147c994041d5 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Thu, 30 Jun 2011 17:48:44 -0400
Subject: [PATCH 08/12] ac3dsp: add x86-optimized versions of
 ac3dsp.extract_exponents().

---
 libavcodec/x86/ac3dsp.asm   | 102 ++++++++++++++++++++++++++++++++++++
 libavcodec/x86/ac3dsp_mmx.c |   9 ++++
 2 files changed, 111 insertions(+)

diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 6892ec2765..c1b0906a85 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -32,6 +32,11 @@ cextern ac3_bap_bits
 pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
 pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
 
+; used in ff_ac3_extract_exponents()
+pd_1:   times 4 dd 1
+pd_151: times 4 dd 151
+pb_shuf_4dwb: db 0, 4, 8, 12
+
 SECTION .text
 
 ;-----------------------------------------------------------------------------
@@ -346,3 +351,100 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
     movd       eax, m0
     add        eax, sumd
     RET
+
+;------------------------------------------------------------------------------
+; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs)
+;------------------------------------------------------------------------------
+
+%macro PABSD_MMX 2 ; src/dst, tmp
+    pxor     %2, %2
+    pcmpgtd  %2, %1
+    pxor     %1, %2
+    psubd    %1, %2
+%endmacro
+
+%macro PABSD_SSSE3 1-2 ; src/dst, unused
+    pabsd    %1, %1
+%endmacro
+
+%ifdef HAVE_AMD3DNOW
+INIT_MMX
+cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
+    add      expq, lenq
+    lea     coefq, [coefq+4*lenq]
+    neg      lenq
+    movq       m3, [pd_1]
+    movq       m4, [pd_151]
+.loop:
+    movq       m0, [coefq+4*lenq  ]
+    movq       m1, [coefq+4*lenq+8]
+    PABSD_MMX  m0, m2
+    PABSD_MMX  m1, m2
+    pslld      m0, 1
+    por        m0, m3
+    pi2fd      m2, m0
+    psrld      m2, 23
+    movq       m0, m4
+    psubd      m0, m2
+    pslld      m1, 1
+    por        m1, m3
+    pi2fd      m2, m1
+    psrld      m2, 23
+    movq       m1, m4
+    psubd      m1, m2
+    packssdw   m0, m0
+    packuswb   m0, m0
+    packssdw   m1, m1
+    packuswb   m1, m1
+    punpcklwd  m0, m1
+    movd  [expq+lenq], m0
+    add      lenq, 4
+    jl .loop
+    REP_RET
+%endif
+
+%macro AC3_EXTRACT_EXPONENTS 1
+cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
+    add     expq, lenq
+    lea    coefq, [coefq+4*lenq]
+    neg     lenq
+    mova      m2, [pd_1]
+    mova      m3, [pd_151]
+%ifidn %1, ssse3 ;
+    movd      m4, [pb_shuf_4dwb]
+%endif
+.loop:
+    ; move 4 32-bit coefs to xmm0
+    mova      m0, [coefq+4*lenq]
+    ; absolute value
+    PABSD     m0, m1
+    ; convert to float and extract exponents
+    pslld     m0, 1
+    por       m0, m2
+    cvtdq2ps  m1, m0
+    psrld     m1, 23
+    mova      m0, m3
+    psubd     m0, m1
+    ; move the lowest byte in each of 4 dwords to the low dword
+%ifidn %1, ssse3
+    pshufb    m0, m4
+%else
+    packssdw  m0, m0
+    packuswb  m0, m0
+%endif
+    movd  [expq+lenq], m0
+
+    add     lenq, 4
+    jl .loop
+    REP_RET
+%endmacro
+
+%ifdef HAVE_SSE
+INIT_XMM
+%define PABSD PABSD_MMX
+AC3_EXTRACT_EXPONENTS sse2
+%ifdef HAVE_SSSE3
+%define PABSD PABSD_SSSE3
+AC3_EXTRACT_EXPONENTS ssse3
+%endif
+%endif
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
index 2664736bb6..692d240d4c 100644
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ b/libavcodec/x86/ac3dsp_mmx.c
@@ -44,6 +44,10 @@ extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned i
 
 extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
 
+extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs);
+extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
+extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
+
 av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
 {
     int mm_flags = av_get_cpu_flags();
@@ -56,6 +60,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
         c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
     }
     if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
+        c->extract_exponents = ff_ac3_extract_exponents_3dnow;
         if (!bit_exact) {
             c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
         }
@@ -72,6 +77,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
         c->float_to_fixed24 = ff_float_to_fixed24_sse2;
         c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
+        c->extract_exponents = ff_ac3_extract_exponents_sse2;
         if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
             c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
             c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
@@ -79,6 +85,9 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
     }
     if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
+        if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
+            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
+        }
     }
 #endif
 }

From 29651e1d44f7e702a43133752a31398f4e73fe09 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Mon, 27 Jun 2011 23:12:55 -0700
Subject: [PATCH 09/12] fate-aac: Expand coverage.

Add al05_44, al06_44, al17_44.
---
 tests/fate/aac.mak | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/fate/aac.mak b/tests/fate/aac.mak
index 6701e149d2..8c805575f3 100644
--- a/tests/fate/aac.mak
+++ b/tests/fate/aac.mak
@@ -2,10 +2,22 @@ FATE_AAC += fate-aac-al04_44
 fate-aac-al04_44: CMD = pcm -i $(SAMPLES)/aac/al04_44.mp4
 fate-aac-al04_44: REF = $(SAMPLES)/aac/al04_44.s16
 
+FATE_AAC += fate-aac-al05_44
+fate-aac-al05_44: CMD = pcm -i $(SAMPLES)/aac/al05_44.mp4
+fate-aac-al05_44: REF = $(SAMPLES)/aac/al05_44.s16
+
+FATE_AAC += fate-aac-al06_44
+fate-aac-al06_44: CMD = pcm -i $(SAMPLES)/aac/al06_44.mp4
+fate-aac-al06_44: REF = $(SAMPLES)/aac/al06_44.s16
+
 FATE_AAC += fate-aac-al07_96
 fate-aac-al07_96: CMD = pcm -i $(SAMPLES)/aac/al07_96.mp4
 fate-aac-al07_96: REF = $(SAMPLES)/aac/al07_96.s16
 
+FATE_AAC += fate-aac-al17_44
+fate-aac-al17_44: CMD = pcm -i $(SAMPLES)/aac/al17_44.mp4
+fate-aac-al17_44: REF = $(SAMPLES)/aac/al17_44.s16
+
 FATE_AAC += fate-aac-am00_88
 fate-aac-am00_88: CMD = pcm -i $(SAMPLES)/aac/am00_88.mp4
 fate-aac-am00_88: REF = $(SAMPLES)/aac/am00_88.s16

From 8b84af748865105a6520a1e486792e8a5a82e2a6 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Wed, 29 Jun 2011 13:30:09 -0700
Subject: [PATCH 10/12] avidec: Factor out the sync fucntionality.

---
 libavformat/avidec.c | 244 +++++++++++++++++++++++--------------------
 1 file changed, 128 insertions(+), 116 deletions(-)

diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index 85fc794362..7e509ce8ce 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -836,13 +836,137 @@ static int get_stream_idx(int *d){
     }
 }
 
-static int avi_read_packet(AVFormatContext *s, AVPacket *pkt)
+static int avi_sync(AVFormatContext *s)
 {
     AVIContext *avi = s->priv_data;
     AVIOContext *pb = s->pb;
     int n, d[8];
     unsigned int size;
     int64_t i, sync;
+
+start_sync:
+    memset(d, -1, sizeof(int)*8);
+    for(i=sync=avio_tell(pb); !pb->eof_reached; i++) {
+        int j;
+
+        for(j=0; j<7; j++)
+            d[j]= d[j+1];
+        d[7]= avio_r8(pb);
+
+        size= d[4] + (d[5]<<8) + (d[6]<<16) + (d[7]<<24);
+
+        n= get_stream_idx(d+2);
+//av_log(s, AV_LOG_DEBUG, "%X %X %X %X %X %X %X %X %"PRId64" %d %d\n", d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], i, size, n);
+        if(i + (uint64_t)size > avi->fsize || d[0]<0)
+            continue;
+
+        //parse ix##
+        if(  (d[0] == 'i' && d[1] == 'x' && n < s->nb_streams)
+        //parse JUNK
+           ||(d[0] == 'J' && d[1] == 'U' && d[2] == 'N' && d[3] == 'K')
+           ||(d[0] == 'i' && d[1] == 'd' && d[2] == 'x' && d[3] == '1')){
+            avio_skip(pb, size);
+//av_log(s, AV_LOG_DEBUG, "SKIP\n");
+            goto start_sync;
+        }
+
+        //parse stray LIST
+        if(d[0] == 'L' && d[1] == 'I' && d[2] == 'S' && d[3] == 'T'){
+            avio_skip(pb, 4);
+            goto start_sync;
+        }
+
+        n= get_stream_idx(d);
+
+        if(!((i-avi->last_pkt_pos)&1) && get_stream_idx(d+1) < s->nb_streams)
+            continue;
+
+        //detect ##ix chunk and skip
+        if(d[2] == 'i' && d[3] == 'x' && n < s->nb_streams){
+            avio_skip(pb, size);
+            goto start_sync;
+        }
+
+        //parse ##dc/##wb
+        if(n < s->nb_streams){
+            AVStream *st;
+            AVIStream *ast;
+            st = s->streams[n];
+            ast = st->priv_data;
+
+            if(s->nb_streams>=2){
+                AVStream *st1  = s->streams[1];
+                AVIStream *ast1= st1->priv_data;
+                //workaround for broken small-file-bug402.avi
+                if(   d[2] == 'w' && d[3] == 'b'
+                   && n==0
+                   && st ->codec->codec_type == AVMEDIA_TYPE_VIDEO
+                   && st1->codec->codec_type == AVMEDIA_TYPE_AUDIO
+                   && ast->prefix == 'd'*256+'c'
+                   && (d[2]*256+d[3] == ast1->prefix || !ast1->prefix_count)
+                  ){
+                    n=1;
+                    st = st1;
+                    ast = ast1;
+                    av_log(s, AV_LOG_WARNING, "Invalid stream + prefix combination, assuming audio.\n");
+                }
+            }
+
+
+            if(   (st->discard >= AVDISCARD_DEFAULT && size==0)
+               /*|| (st->discard >= AVDISCARD_NONKEY && !(pkt->flags & AV_PKT_FLAG_KEY))*/ //FIXME needs a little reordering
+               || st->discard >= AVDISCARD_ALL){
+                ast->frame_offset += get_duration(ast, size);
+                avio_skip(pb, size);
+                goto start_sync;
+            }
+
+            if (d[2] == 'p' && d[3] == 'c' && size<=4*256+4) {
+                int k = avio_r8(pb);
+                int last = (k + avio_r8(pb) - 1) & 0xFF;
+
+                avio_rl16(pb); //flags
+
+                for (; k <= last; k++)
+                    ast->pal[k] = avio_rb32(pb)>>8;// b + (g << 8) + (r << 16);
+                ast->has_pal= 1;
+                goto start_sync;
+            } else if(   ((ast->prefix_count<5 || sync+9 > i) && d[2]<128 && d[3]<128) ||
+                         d[2]*256+d[3] == ast->prefix /*||
+                         (d[2] == 'd' && d[3] == 'c') ||
+                         (d[2] == 'w' && d[3] == 'b')*/) {
+
+//av_log(s, AV_LOG_DEBUG, "OK\n");
+                if(d[2]*256+d[3] == ast->prefix)
+                    ast->prefix_count++;
+                else{
+                    ast->prefix= d[2]*256+d[3];
+                    ast->prefix_count= 0;
+                }
+
+                avi->stream_index= n;
+                ast->packet_size= size + 8;
+                ast->remaining= size;
+
+                if(size || !ast->sample_size){
+                    uint64_t pos= avio_tell(pb) - 8;
+                    if(!st->index_entries || !st->nb_index_entries || st->index_entries[st->nb_index_entries - 1].pos < pos){
+                        av_add_index_entry(st, pos, ast->frame_offset, size, 0, AVINDEX_KEYFRAME);
+                    }
+                }
+                return 0;
+            }
+        }
+    }
+
+    return AVERROR_EOF;
+}
+
+static int avi_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    AVIContext *avi = s->priv_data;
+    AVIOContext *pb = s->pb;
+    int err;
     void* dstr;
 
     if (CONFIG_DV_DEMUXER && avi->dv_demux) {
@@ -993,121 +1117,9 @@ resync:
         return size;
     }
 
-    memset(d, -1, sizeof(int)*8);
-    for(i=sync=avio_tell(pb); !pb->eof_reached; i++) {
-        int j;
-
-        for(j=0; j<7; j++)
-            d[j]= d[j+1];
-        d[7]= avio_r8(pb);
-
-        size= d[4] + (d[5]<<8) + (d[6]<<16) + (d[7]<<24);
-
-        n= get_stream_idx(d+2);
-//av_log(s, AV_LOG_DEBUG, "%X %X %X %X %X %X %X %X %"PRId64" %d %d\n", d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], i, size, n);
-        if(i + (uint64_t)size > avi->fsize || d[0]<0)
-            continue;
-
-        //parse ix##
-        if(  (d[0] == 'i' && d[1] == 'x' && n < s->nb_streams)
-        //parse JUNK
-           ||(d[0] == 'J' && d[1] == 'U' && d[2] == 'N' && d[3] == 'K')
-           ||(d[0] == 'i' && d[1] == 'd' && d[2] == 'x' && d[3] == '1')){
-            avio_skip(pb, size);
-//av_log(s, AV_LOG_DEBUG, "SKIP\n");
-            goto resync;
-        }
-
-        //parse stray LIST
-        if(d[0] == 'L' && d[1] == 'I' && d[2] == 'S' && d[3] == 'T'){
-            avio_skip(pb, 4);
-            goto resync;
-        }
-
-        n= get_stream_idx(d);
-
-        if(!((i-avi->last_pkt_pos)&1) && get_stream_idx(d+1) < s->nb_streams)
-            continue;
-
-        //detect ##ix chunk and skip
-        if(d[2] == 'i' && d[3] == 'x' && n < s->nb_streams){
-            avio_skip(pb, size);
-            goto resync;
-        }
-
-        //parse ##dc/##wb
-        if(n < s->nb_streams){
-            AVStream *st;
-            AVIStream *ast;
-            st = s->streams[n];
-            ast = st->priv_data;
-
-            if(s->nb_streams>=2){
-                AVStream *st1  = s->streams[1];
-                AVIStream *ast1= st1->priv_data;
-                //workaround for broken small-file-bug402.avi
-                if(   d[2] == 'w' && d[3] == 'b'
-                   && n==0
-                   && st ->codec->codec_type == AVMEDIA_TYPE_VIDEO
-                   && st1->codec->codec_type == AVMEDIA_TYPE_AUDIO
-                   && ast->prefix == 'd'*256+'c'
-                   && (d[2]*256+d[3] == ast1->prefix || !ast1->prefix_count)
-                  ){
-                    n=1;
-                    st = st1;
-                    ast = ast1;
-                    av_log(s, AV_LOG_WARNING, "Invalid stream + prefix combination, assuming audio.\n");
-                }
-            }
-
-
-            if(   (st->discard >= AVDISCARD_DEFAULT && size==0)
-               /*|| (st->discard >= AVDISCARD_NONKEY && !(pkt->flags & AV_PKT_FLAG_KEY))*/ //FIXME needs a little reordering
-               || st->discard >= AVDISCARD_ALL){
-                ast->frame_offset += get_duration(ast, size);
-                avio_skip(pb, size);
-                goto resync;
-            }
-
-            if (d[2] == 'p' && d[3] == 'c' && size<=4*256+4) {
-                int k = avio_r8(pb);
-                int last = (k + avio_r8(pb) - 1) & 0xFF;
-
-                avio_rl16(pb); //flags
-
-                for (; k <= last; k++)
-                    ast->pal[k] = avio_rb32(pb)>>8;// b + (g << 8) + (r << 16);
-                ast->has_pal= 1;
-                goto resync;
-            } else if(   ((ast->prefix_count<5 || sync+9 > i) && d[2]<128 && d[3]<128) ||
-                         d[2]*256+d[3] == ast->prefix /*||
-                         (d[2] == 'd' && d[3] == 'c') ||
-                         (d[2] == 'w' && d[3] == 'b')*/) {
-
-//av_log(s, AV_LOG_DEBUG, "OK\n");
-                if(d[2]*256+d[3] == ast->prefix)
-                    ast->prefix_count++;
-                else{
-                    ast->prefix= d[2]*256+d[3];
-                    ast->prefix_count= 0;
-                }
-
-                avi->stream_index= n;
-                ast->packet_size= size + 8;
-                ast->remaining= size;
-
-                if(size || !ast->sample_size){
-                    uint64_t pos= avio_tell(pb) - 8;
-                    if(!st->index_entries || !st->nb_index_entries || st->index_entries[st->nb_index_entries - 1].pos < pos){
-                        av_add_index_entry(st, pos, ast->frame_offset, size, 0, AVINDEX_KEYFRAME);
-                    }
-                }
-                goto resync;
-            }
-        }
-    }
-
-    return AVERROR_EOF;
+    if ((err = avi_sync(s)) < 0)
+        return err;
+    goto resync;
 }
 
 /* XXX: We make the implicit supposition that the positions are sorted

From fccab0180748c7e796f7e5911cb5c63cf8a97697 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Fri, 1 Jul 2011 15:49:53 +0200
Subject: [PATCH 11/12] doc: Remove outdated information about our issue
 tracker

We have now switched to http://bugzilla.libav.org.
---
 doc/issue_tracker.txt | 228 ------------------------------------------
 1 file changed, 228 deletions(-)
 delete mode 100644 doc/issue_tracker.txt

diff --git a/doc/issue_tracker.txt b/doc/issue_tracker.txt
deleted file mode 100644
index e5733ec9d5..0000000000
--- a/doc/issue_tracker.txt
+++ /dev/null
@@ -1,228 +0,0 @@
-Libav's bug/patch/feature request tracker manual
-================================================
-
-NOTE: This is a draft.
-
-Overview:
----------
-Libav uses Roundup for tracking issues, new issues and changes to
-existing issues can be done through a web interface and through email.
-It is possible to subscribe to individual issues by adding yourself to the
-nosy list or to subscribe to the ffmpeg-issues mailing list which receives
-a mail for every change to every issue. Replies to such mails will also
-be properly added to the respective issue.
-(the above does all work already after light testing)
-The subscription URL for the ffmpeg-issues list is:
-http://live.polito/mailman/listinfo/ffmpeg-issues
-The URL of the webinterface of the tracker is:
-http(s)://roundup.libav.org/
-Note the URLs in this document are obfuscated, you must append the top level
-domain for non-profit organizations to the tracker, and of Italy to the
-mailing list.
-
-Email Interface:
-----------------
-There is a mailing list to which all new issues and changes to existing issues
-are sent. You can subscribe through
-http://live.polito/mailman/listinfo/ffmpeg-issues
-Replies to messages there will have their text added to the specific issues.
-Attachments will be added as if they had been uploaded via the web interface.
-You can change the status, substatus, topic, ... by changing the subject in
-your reply like:
-Re: [issue94] register_avcodec and allcodecs.h [type=patch;status=open;substatus=approved]
-Roundup will then change things as you requested and remove the [...] from
-the subject before forwarding the mail to the mailing list.
-
-
-NOTE: issue = (bug report || patch || feature request)
-
-Type:
------
-bug
-    An error, flaw, mistake, failure, or fault in ffmpeg or libav* that
-    prevents it from behaving as intended.
-
-feature request
-    Request of support for encoding or decoding of a new codec, container
-    or variant.
-    Request of support for more, less or plain different output or behavior
-    where the current implementation cannot be considered wrong.
-
-patch
-    A patch as generated by diff which conforms to the patch submission and
-    development policy.
-
-
-Priority:
----------
-critical
-    Bugs and patches which deal with data loss and security issues.
-    No feature request can be critical.
-
-important
-    Bugs which make Libav unusable for a significant number of users, and
-    patches fixing them.
-    Examples here might be completely broken MPEG-4 decoding or a build issue
-    on Linux.
-    While broken 4xm decoding or a broken OS/2 build would not be important,
-    the separation to normal is somewhat fuzzy.
-    For feature requests this priority would be used for things many people
-    want.
-
-normal
-
-
-minor
-    Bugs and patches about things like spelling errors, "mp2" instead of
-    "mp3" being shown and such.
-    Feature requests about things few people want or which do not make a big
-    difference.
-
-wish
-    Something that is desirable to have but that there is no urgency at
-    all to implement, e.g. something completely cosmetic like a website
-    restyle or a personalized doxy template or the Libav logo.
-    This priority is not valid for bugs.
-
-
-Status:
--------
-new
-    initial state
-
-open
-    intermediate states
-
-closed
-    final state
-
-
-Type/Status/Substatus:
-----------
-*/new/new
-    Initial state of new bugs, patches and feature requests submitted by
-    users.
-
-*/open/open
-    Issues which have been briefly looked at and which did not look outright
-    invalid.
-    This implicates that no real more detailed state applies yet. Conversely,
-    the more detailed states below implicate that the issue has been briefly
-    looked at.
-
-*/closed/duplicate
-    Bugs, patches or feature requests which are duplicates.
-    Note that patches dealing with the same thing in a different way are not
-    duplicates.
-    Note, if you mark something as duplicate, do not forget setting the
-    superseder so bug reports are properly linked.
-
-*/closed/invalid
-    Bugs caused by user errors, random ineligible or otherwise nonsense stuff.
-
-*/closed/needs_more_info
-    Issues for which some information has been requested by the developers,
-    but which has not been provided by anyone within reasonable time.
-
-bug/open/reproduced
-    Bugs which have been reproduced.
-
-bug/open/analyzed
-    Bugs which have been analyzed and where it is understood what causes them
-    and which exact chain of events triggers them. This analysis should be
-    available as a message in the bug report.
-    Note, do not change the status to analyzed without also providing a clear
-    and understandable analysis.
-    This state implicates that the bug either has been reproduced or that
-    reproduction is not needed as the bug is already understood.
-
-bug/open/needs_more_info
-    Bug reports which are incomplete and or where more information is needed
-    from the submitter or another person who can provide it.
-    This state implicates that the bug has not been analyzed or reproduced.
-    Note, the idea behind needs_more_info is to offload work from the
-    developers to the users whenever possible.
-
-bug/closed/fixed
-    Bugs which have to the best of our knowledge been fixed.
-
-bug/closed/wont_fix
-    Bugs which we will not fix. Possible reasons include legality, high
-    complexity for the sake of supporting obscure corner cases, speed loss
-    for similarly esoteric purposes, et cetera.
-    This also means that we would reject a patch.
-    If we are just too lazy to fix a bug then the correct state is open
-    and unassigned. Closed means that the case is closed which is not
-    the case if we are just waiting for a patch.
-
-bug/closed/works_for_me
-    Bugs for which sufficient information was provided to reproduce but
-    reproduction failed - that is the code seems to work correctly to the
-    best of our knowledge.
-
-patch/open/approved
-    Patches which have been reviewed and approved by a developer.
-    Such patches can be applied anytime by any other developer after some
-    reasonable testing (compile + regression tests + does the patch do
-    what the author claimed).
-
-patch/open/needs_changes
-    Patches which have been reviewed and need changes to be accepted.
-
-patch/closed/applied
-    Patches which have been applied.
-
-patch/closed/rejected
-    Patches which have been rejected.
-
-feature_request/open/needs_more_info
-    Feature requests where it is not clear what exactly is wanted
-    (these also could be closed as invalid ...).
-
-feature_request/closed/implemented
-    Feature requests which have been implemented.
-
-feature_request/closed/wont_implement
-    Feature requests which will not be implemented. The reasons here could
-    be legal, philosophical or others.
-
-Note, please do not use type-status-substatus combinations other than the
-above without asking on libav-devel first!
-
-Note2, if you provide the requested info do not forget to remove the
-needs_more_info substate.
-
-Topic:
-------
-A topic is a tag you should add to your issue in order to make grouping them
-easier.
-
-avcodec
-    issues in libavcodec/*
-
-avformat
-    issues in libavformat/*
-
-avutil
-    issues in libavutil/*
-
-regression test
-    issues in tests/*
-
-ffmpeg
-    issues in or related to ffmpeg.c
-
-ffplay
-    issues in or related to ffplay.c
-
-ffserver
-    issues in or related to ffserver.c
-
-build system
-    issues in or related to configure/Makefile
-
-regression
-    bugs which were working in a past revision
-
-roundup
-    issues related to our issue tracker

From 23ce6e72123a40895baaeefeb27c7c18748bd67e Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Fri, 1 Jul 2011 12:47:36 +0100
Subject: [PATCH 12/12] get_bits: remove x86 inline asm in A32 bitstream reader

x86 does not use this variant so having inline asm there
is pointless.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/get_bits.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index b592b9a9a5..d2ae345315 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -201,19 +201,11 @@ static inline void skip_bits_long(GetBitContext *s, int n){
         }                                                               \
     } while (0)
 
-#if ARCH_X86
-#   define SKIP_CACHE(name, gb, num)                            \
-    __asm__("shldl %2, %1, %0          \n\t"                    \
-            "shll  %2, %1              \n\t"                    \
-            : "+r" (name##_cache0), "+r" (name##_cache1)        \
-            : "Ic" ((uint8_t)(num)))
-#else
 #   define SKIP_CACHE(name, gb, num) do {               \
         name##_cache0 <<= (num);                        \
         name##_cache0 |= NEG_USR32(name##_cache1,num);  \
         name##_cache1 <<= (num);                        \
     } while (0)
-#endif
 
 #   define SKIP_COUNTER(name, gb, num) name##_bit_count += (num)